aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/hippi.c1
-rw-r--r--net/Kconfig32
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/atm/Makefile3
-rw-r--r--net/atm/atm_sysfs.c15
-rw-r--r--net/atm/br2684.c30
-rw-r--r--net/atm/clip.c39
-rw-r--r--net/atm/ipcommon.c63
-rw-r--r--net/atm/ipcommon.h22
-rw-r--r--net/atm/lec.c25
-rw-r--r--net/atm/lec.h8
-rw-r--r--net/atm/mpc.c29
-rw-r--r--net/atm/mpc.h6
-rw-r--r--net/atm/mpoa_caches.c20
-rw-r--r--net/atm/mpoa_caches.h16
-rw-r--r--net/atm/mpoa_proc.c6
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/ax25/ax25_addr.c36
-rw-r--r--net/ax25/ax25_out.c4
-rw-r--r--net/ax25/ax25_route.c5
-rw-r--r--net/ax25/sysctl_net_ax25.c5
-rw-r--r--net/bluetooth/bnep/bnep.h4
-rw-r--r--net/bluetooth/bnep/core.c28
-rw-r--r--net/bluetooth/bnep/netdev.c11
-rw-r--r--net/bluetooth/hci_event.c19
-rw-r--r--net/bluetooth/hci_sock.c11
-rw-r--r--net/bluetooth/hci_sysfs.c16
-rw-r--r--net/bluetooth/l2cap.c15
-rw-r--r--net/bluetooth/rfcomm/core.c18
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/bridge/br_ioctl.c9
-rw-r--r--net/bridge/br_netfilter.c227
-rw-r--r--net/bridge/br_netlink.c113
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/netfilter/ebt_802_3.c2
-rw-r--r--net/bridge/netfilter/ebt_among.c22
-rw-r--r--net/bridge/netfilter/ebt_arp.c6
-rw-r--r--net/bridge/netfilter/ebt_ip.c4
-rw-r--r--net/bridge/netfilter/ebt_log.c6
-rw-r--r--net/bridge/netfilter/ebt_mark.c14
-rw-r--r--net/bridge/netfilter/ebt_mark_m.c4
-rw-r--r--net/bridge/netfilter/ebt_snat.c27
-rw-r--r--net/bridge/netfilter/ebt_ulog.c2
-rw-r--r--net/bridge/netfilter/ebt_vlan.c2
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c224
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c16
-rw-r--r--net/core/dev.c41
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/dv.c546
-rw-r--r--net/core/fib_rules.c71
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/flow.c6
-rw-r--r--net/core/iovec.c4
-rw-r--r--net/core/kmap_skb.h19
-rw-r--r--net/core/link_watch.c13
-rw-r--r--net/core/neighbour.c37
-rw-r--r--net/core/netpoll.c352
-rw-r--r--net/core/pktgen.c98
-rw-r--r--net/core/request_sock.c35
-rw-r--r--net/core/rtnetlink.c60
-rw-r--r--net/core/skbuff.c59
-rw-r--r--net/core/sock.c41
-rw-r--r--net/core/sysctl_net_core.c14
-rw-r--r--net/core/utils.c10
-rw-r--r--net/core/wireless.c37
-rw-r--r--net/dccp/Kconfig11
-rw-r--r--net/dccp/Makefile8
-rw-r--r--net/dccp/ackvec.c134
-rw-r--r--net/dccp/ackvec.h23
-rw-r--r--net/dccp/ccid.c6
-rw-r--r--net/dccp/ccid.h26
-rw-r--r--net/dccp/ccids/Kconfig75
-rw-r--r--net/dccp/ccids/ccid2.c73
-rw-r--r--net/dccp/ccids/ccid2.h3
-rw-r--r--net/dccp/ccids/ccid3.c930
-rw-r--r--net/dccp/ccids/ccid3.h129
-rw-r--r--net/dccp/ccids/lib/loss_interval.c8
-rw-r--r--net/dccp/ccids/lib/loss_interval.h2
-rw-r--r--net/dccp/ccids/lib/packet_history.c219
-rw-r--r--net/dccp/ccids/lib/packet_history.h130
-rw-r--r--net/dccp/ccids/lib/tfrc.h23
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c241
-rw-r--r--net/dccp/dccp.h111
-rw-r--r--net/dccp/feat.c131
-rw-r--r--net/dccp/feat.h48
-rw-r--r--net/dccp/input.c114
-rw-r--r--net/dccp/ipv4.c553
-rw-r--r--net/dccp/ipv6.c639
-rw-r--r--net/dccp/minisocks.c53
-rw-r--r--net/dccp/options.c68
-rw-r--r--net/dccp/output.c111
-rw-r--r--net/dccp/probe.c8
-rw-r--r--net/dccp/proto.c70
-rw-r--r--net/dccp/sysctl.c60
-rw-r--r--net/dccp/timer.c140
-rw-r--r--net/decnet/Kconfig8
-rw-r--r--net/decnet/af_decnet.c25
-rw-r--r--net/decnet/dn_dev.c179
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_nsp_in.c10
-rw-r--r--net/decnet/dn_nsp_out.c2
-rw-r--r--net/decnet/dn_route.c46
-rw-r--r--net/decnet/dn_rules.c47
-rw-r--r--net/decnet/dn_table.c46
-rw-r--r--net/decnet/sysctl_net_decnet.c6
-rw-r--r--net/ethernet/eth.c1
-rw-r--r--net/ieee80211/Kconfig2
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c1
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c1
-rw-r--r--net/ieee80211/ieee80211_module.c25
-rw-r--r--net/ieee80211/ieee80211_rx.c80
-rw-r--r--net/ieee80211/ieee80211_tx.c4
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_assoc.c86
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_auth.c49
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_event.c12
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_io.c11
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_module.c5
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_priv.h15
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_scan.c20
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_wx.c80
-rw-r--r--net/ipv4/Kconfig19
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c16
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/cipso_ipv4.c820
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/fib_hash.c8
-rw-r--r--net/ipv4/fib_rules.c53
-rw-r--r--net/ipv4/fib_semantics.c36
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/icmp.c6
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/inet_timewait_sock.c8
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c56
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c34
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipconfig.c105
-rw-r--r--net/ipv4/ipip.c16
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c6
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c22
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c17
-rw-r--r--net/ipv4/netfilter.c10
-rw-r--r--net/ipv4/netfilter/Kconfig138
-rw-r--r--net/ipv4/netfilter/Makefile30
-rw-r--r--net/ipv4/netfilter/arp_tables.c42
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c11
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c18
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c8
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c172
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c33
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c66
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c118
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c6
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c6
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c32
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_h323.c58
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c29
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c10
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_sip.c199
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c85
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c6
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c9
-rw-r--r--net/ipv4/netfilter/ip_queue.c9
-rw-r--r--net/ipv4/netfilter/ip_tables.c236
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c35
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c11
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c20
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c29
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c18
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c12
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c24
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c5
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c5
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_recent.c2
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c8
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c137
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c412
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c57
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c78
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c647
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c179
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c596
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c433
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c101
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c315
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c179
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c86
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c148
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c138
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c54
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c343
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c283
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1332
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c406
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c52
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/route.c71
-rw-r--r--net/ipv4/syncookies.c18
-rw-r--r--net/ipv4/sysctl_net_ipv4.c83
-rw-r--r--net/ipv4/tcp.c168
-rw-r--r--net/ipv4/tcp_cong.c99
-rw-r--r--net/ipv4/tcp_cubic.c6
-rw-r--r--net/ipv4/tcp_htcp.c6
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_ipv4.c713
-rw-r--r--net/ipv4/tcp_minisocks.c72
-rw-r--r--net/ipv4/tcp_output.c122
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/tcp_vegas.c4
-rw-r--r--net/ipv4/udp.c558
-rw-r--r--net/ipv4/udp_impl.h38
-rw-r--r--net/ipv4/udplite.c119
-rw-r--r--net/ipv4/xfrm4_policy.c9
-rw-r--r--net/ipv6/Kconfig7
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c193
-rw-r--r--net/ipv6/af_inet6.c33
-rw-r--r--net/ipv6/ah6.c5
-rw-r--r--net/ipv6/datagram.c16
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/exthdrs.c59
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/fib6_rules.c60
-rw-r--r--net/ipv6/icmp.c23
-rw-r--r--net/ipv6/inet6_connection_sock.c17
-rw-r--r--net/ipv6/inet6_hashtables.c6
-rw-r--r--net/ipv6/ip6_fib.c21
-rw-r--r--net/ipv6/ip6_flowlabel.c32
-rw-r--r--net/ipv6/ip6_input.c42
-rw-r--r--net/ipv6/ip6_output.c103
-rw-r--r--net/ipv6/ip6_tunnel.c315
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c34
-rw-r--r--net/ipv6/mcast.c36
-rw-r--r--net/ipv6/mip6.c4
-rw-r--r--net/ipv6/ndisc.c39
-rw-r--r--net/ipv6/netfilter.c11
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/ip6_queue.c11
-rw-r--r--net/ipv6/netfilter/ip6_tables.c84
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c23
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c7
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c7
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c7
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c7
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c9
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c93
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c40
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c26
-rw-r--r--net/ipv6/proc.c18
-rw-r--r--net/ipv6/raw.c45
-rw-r--r--net/ipv6/reassembly.c87
-rw-r--r--net/ipv6/route.c114
-rw-r--r--net/ipv6/sit.c23
-rw-r--r--net/ipv6/tcp_ipv6.c607
-rw-r--r--net/ipv6/tunnel6.c2
-rw-r--r--net/ipv6/udp.c429
-rw-r--r--net/ipv6/udp_impl.h34
-rw-r--r--net/ipv6/udplite.c105
-rw-r--r--net/ipv6/xfrm6_policy.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c16
-rw-r--r--net/ipx/af_ipx.c85
-rw-r--r--net/ipx/ipx_proc.c12
-rw-r--r--net/ipx/ipx_route.c14
-rw-r--r--net/irda/discovery.c1
-rw-r--r--net/irda/ircomm/ircomm_tty.c11
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c2
-rw-r--r--net/irda/iriap.c12
-rw-r--r--net/irda/irias_object.c4
-rw-r--r--net/irda/irlan/irlan_common.c2
-rw-r--r--net/irda/irlmp.c7
-rw-r--r--net/irda/irqueue.c3
-rw-r--r--net/irda/irttp.c9
-rw-r--r--net/key/af_key.c69
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_input.c4
-rw-r--r--net/netfilter/Kconfig195
-rw-r--r--net/netfilter/Makefile17
-rw-r--r--net/netfilter/core.c31
-rw-r--r--net/netfilter/nf_conntrack_amanda.c238
-rw-r--r--net/netfilter/nf_conntrack_core.c711
-rw-r--r--net/netfilter/nf_conntrack_ecache.c93
-rw-r--r--net/netfilter/nf_conntrack_expect.c445
-rw-r--r--net/netfilter/nf_conntrack_ftp.c42
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c (renamed from net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c)4
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c1856
-rw-r--r--net/netfilter/nf_conntrack_h323_types.c (renamed from net/ipv4/netfilter/ip_conntrack_helper_h323_types.c)7
-rw-r--r--net/netfilter/nf_conntrack_helper.c155
-rw-r--r--net/netfilter/nf_conntrack_irc.c281
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c126
-rw-r--r--net/netfilter/nf_conntrack_netlink.c214
-rw-r--r--net/netfilter/nf_conntrack_pptp.c607
-rw-r--r--net/netfilter/nf_conntrack_proto.c410
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c47
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c305
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c182
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c275
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c83
-rw-r--r--net/netfilter/nf_conntrack_sip.c531
-rw-r--r--net/netfilter/nf_conntrack_standalone.c444
-rw-r--r--net/netfilter/nf_conntrack_tftp.c160
-rw-r--r--net/netfilter/nf_sysctl.c134
-rw-r--r--net/netfilter/nfnetlink_log.c55
-rw-r--r--net/netfilter/nfnetlink_queue.c32
-rw-r--r--net/netfilter/x_tables.c1
-rw-r--r--net/netfilter/xt_CONNMARK.c27
-rw-r--r--net/netfilter/xt_CONNSECMARK.c14
-rw-r--r--net/netfilter/xt_MARK.c12
-rw-r--r--net/netfilter/xt_NFLOG.c86
-rw-r--r--net/netfilter/xt_connbytes.c14
-rw-r--r--net/netfilter/xt_connmark.c7
-rw-r--r--net/netfilter/xt_conntrack.c8
-rw-r--r--net/netfilter/xt_hashlimit.c (renamed from net/ipv4/netfilter/ipt_hashlimit.c)514
-rw-r--r--net/netfilter/xt_helper.c8
-rw-r--r--net/netfilter/xt_mark.c2
-rw-r--r--net/netfilter/xt_multiport.c9
-rw-r--r--net/netfilter/xt_physdev.c12
-rw-r--r--net/netfilter/xt_sctp.c2
-rw-r--r--net/netfilter/xt_state.c7
-rw-r--r--net/netfilter/xt_tcpudp.c20
-rw-r--r--net/netlabel/Kconfig2
-rw-r--r--net/netlabel/netlabel_cipso_v4.c47
-rw-r--r--net/netlabel/netlabel_domainhash.c48
-rw-r--r--net/netlabel/netlabel_kapi.c212
-rw-r--r--net/netlabel/netlabel_mgmt.c42
-rw-r--r--net/netlabel/netlabel_unlabeled.c48
-rw-r--r--net/netlabel/netlabel_user.c7
-rw-r--r--net/netlabel/netlabel_user.h31
-rw-r--r--net/netlink/af_netlink.c21
-rw-r--r--net/netlink/genetlink.c68
-rw-r--r--net/netrom/nr_route.c12
-rw-r--r--net/packet/af_packet.c21
-rw-r--r--net/rose/rose_route.c4
-rw-r--r--net/rxrpc/krxiod.c1
-rw-r--r--net/rxrpc/krxsecd.c1
-rw-r--r--net/rxrpc/krxtimod.c1
-rw-r--r--net/rxrpc/transport.c1
-rw-r--r--net/sched/Kconfig6
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_ipt.c6
-rw-r--r--net/sched/act_police.c26
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_fw.c7
-rw-r--r--net/sched/cls_rsvp.h16
-rw-r--r--net/sched/cls_u32.c2
-rw-r--r--net/sched/em_meta.c13
-rw-r--r--net/sched/em_nbyte.c4
-rw-r--r--net/sched/ematch.c3
-rw-r--r--net/sched/sch_api.c41
-rw-r--r--net/sched/sch_atm.c5
-rw-r--r--net/sched/sch_cbq.c31
-rw-r--r--net/sched/sch_dsmark.c9
-rw-r--r--net/sched/sch_generic.c10
-rw-r--r--net/sched/sch_hfsc.c27
-rw-r--r--net/sched/sch_htb.c93
-rw-r--r--net/sched/sch_netem.c14
-rw-r--r--net/sched/sch_prio.c14
-rw-r--r--net/sched/sch_red.c14
-rw-r--r--net/sched/sch_sfq.c3
-rw-r--r--net/sched/sch_tbf.c16
-rw-r--r--net/sctp/associola.c49
-rw-r--r--net/sctp/bind_addr.c4
-rw-r--r--net/sctp/endpointola.c28
-rw-r--r--net/sctp/input.c15
-rw-r--r--net/sctp/inqueue.c9
-rw-r--r--net/sctp/ipv6.c62
-rw-r--r--net/sctp/outqueue.c4
-rw-r--r--net/sctp/proc.c6
-rw-r--r--net/sctp/protocol.c100
-rw-r--r--net/sctp/sm_make_chunk.c69
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/sm_statefuns.c55
-rw-r--r--net/sctp/sm_statetable.c696
-rw-r--r--net/sctp/socket.c208
-rw-r--r--net/sctp/tsnmap.c9
-rw-r--r--net/sctp/ulpevent.c6
-rw-r--r--net/socket.c43
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c47
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c101
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c21
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c55
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c87
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c153
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c134
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c101
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c6
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c92
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c18
-rw-r--r--net/sunrpc/cache.c49
-rw-r--r--net/sunrpc/clnt.c73
-rw-r--r--net/sunrpc/pmap_clnt.c13
-rw-r--r--net/sunrpc/rpc_pipe.c22
-rw-r--r--net/sunrpc/sched.c149
-rw-r--r--net/sunrpc/socklib.c25
-rw-r--r--net/sunrpc/sunrpc_syms.c5
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcauth.c8
-rw-r--r--net/sunrpc/svcauth_unix.c13
-rw-r--r--net/sunrpc/svcsock.c63
-rw-r--r--net/sunrpc/sysctl.c50
-rw-r--r--net/sunrpc/xdr.c255
-rw-r--r--net/sunrpc/xprt.c40
-rw-r--r--net/sunrpc/xprtsock.c753
-rw-r--r--net/tipc/bcast.c6
-rw-r--r--net/tipc/config.c34
-rw-r--r--net/tipc/dbg.c3
-rw-r--r--net/tipc/handler.c2
-rw-r--r--net/tipc/name_distr.c10
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/tipc/port.c5
-rw-r--r--net/tipc/subscr.c3
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wanrouter/af_wanpipe.c4
-rw-r--r--net/wanrouter/wanmain.c59
-rw-r--r--net/xfrm/xfrm_algo.c17
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_policy.c225
-rw-r--r--net/xfrm/xfrm_state.c70
-rw-r--r--net/xfrm/xfrm_user.c156
462 files changed, 24768 insertions, 10631 deletions
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 6d7fed3dd99..579e2ddf5eb 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -36,7 +36,6 @@
#include <net/arp.h>
#include <net/sock.h>
#include <asm/uaccess.h>
-#include <asm/checksum.h>
#include <asm/system.h>
/*
diff --git a/net/Kconfig b/net/Kconfig
index a81aca43932..7dfc9492069 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -63,6 +63,7 @@ config INET
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
+source "net/netlabel/Kconfig"
endif # if INET
@@ -74,7 +75,7 @@ config NETWORK_SECMARK
If you are unsure how to answer this question, answer N.
menuconfig NETFILTER
- bool "Network packet filtering (replaces ipchains)"
+ bool "Network packet filtering framework (Netfilter)"
---help---
Netfilter is a framework for filtering and mangling network packets
that pass through your Linux box.
@@ -174,33 +175,6 @@ source "net/ipx/Kconfig"
source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
source "net/lapb/Kconfig"
-
-config NET_DIVERT
- bool "Frame Diverter (EXPERIMENTAL)"
- depends on EXPERIMENTAL && BROKEN
- ---help---
- The Frame Diverter allows you to divert packets from the
- network, that are not aimed at the interface receiving it (in
- promisc. mode). Typically, a Linux box setup as an Ethernet bridge
- with the Frames Diverter on, can do some *really* transparent www
- caching using a Squid proxy for example.
-
- This is very useful when you don't want to change your router's
- config (or if you simply don't have access to it).
-
- The other possible usages of diverting Ethernet Frames are
- numberous:
- - reroute smtp traffic to another interface
- - traffic-shape certain network streams
- - transparently proxy smtp connections
- - etc...
-
- For more informations, please refer to:
- <http://diverter.sourceforge.net/>
- <http://perso.wanadoo.fr/magpie/EtherDivert.html>
-
- If unsure, say N.
-
source "net/econet/Kconfig"
source "net/wanrouter/Kconfig"
source "net/sched/Kconfig"
@@ -249,8 +223,6 @@ source "net/ieee80211/Kconfig"
config WIRELESS_EXT
bool
-source "net/netlabel/Kconfig"
-
config FIB_RULES
bool
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 708e2e0371a..3a705220770 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -61,6 +61,7 @@
#include <net/tcp_states.h>
#include <net/route.h>
#include <linux/atalk.h>
+#include "../core/kmap_skb.h"
struct datalink_proto *ddp_dl, *aarp_dl;
static const struct proto_ops atalk_dgram_ops;
@@ -1584,7 +1585,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
if (usat->sat_addr.s_net || usat->sat_addr.s_node == ATADDR_ANYNODE) {
rt = atrtr_find(&usat->sat_addr);
- dev = rt->dev;
} else {
struct atalk_addr at_hint;
@@ -1592,7 +1592,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
at_hint.s_net = at->src_net;
rt = atrtr_find(&at_hint);
- dev = rt->dev;
}
if (!rt)
return -ENETUNREACH;
diff --git a/net/atm/Makefile b/net/atm/Makefile
index 89656d6c0b9..cc50bd1ff1d 100644
--- a/net/atm/Makefile
+++ b/net/atm/Makefile
@@ -7,10 +7,7 @@ mpoa-objs := mpc.o mpoa_caches.o mpoa_proc.o
obj-$(CONFIG_ATM) += atm.o
obj-$(CONFIG_ATM_CLIP) += clip.o
-atm-$(subst m,y,$(CONFIG_ATM_CLIP)) += ipcommon.o
obj-$(CONFIG_ATM_BR2684) += br2684.o
-atm-$(subst m,y,$(CONFIG_ATM_BR2684)) += ipcommon.o
-atm-$(subst m,y,$(CONFIG_NET_SCH_ATM)) += ipcommon.o
atm-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_ATM_LANE) += lec.o
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index c0a4ae28fcf..62f6ed1f2f9 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -141,7 +141,7 @@ static struct class atm_class = {
int atm_register_sysfs(struct atm_dev *adev)
{
struct class_device *cdev = &adev->class_dev;
- int i, err;
+ int i, j, err;
cdev->class = &atm_class;
class_set_devdata(cdev, adev);
@@ -151,10 +151,19 @@ int atm_register_sysfs(struct atm_dev *adev)
if (err < 0)
return err;
- for (i = 0; atm_attrs[i]; i++)
- class_device_create_file(cdev, atm_attrs[i]);
+ for (i = 0; atm_attrs[i]; i++) {
+ err = class_device_create_file(cdev, atm_attrs[i]);
+ if (err)
+ goto err_out;
+ }
return 0;
+
+err_out:
+ for (j = 0; j < i; j++)
+ class_device_remove_file(cdev, atm_attrs[j]);
+ class_device_del(cdev);
+ return err;
}
void atm_unregister_sysfs(struct atm_dev *adev)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d00cca97eb3..83a1c1b1d6c 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -23,7 +23,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary
#include <linux/atmbr2684.h>
#include "common.h"
-#include "ipcommon.h"
/*
* Define this to use a version of the code which interacts with the higher
@@ -372,7 +371,7 @@ static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg)
/* Returns 1 if packet should be dropped */
static inline int
-packet_fails_filter(u16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
+packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
{
if (brvcc->filter.netmask == 0)
return 0; /* no filter in place */
@@ -500,11 +499,12 @@ Note: we do not have explicit unassign, but look at _push()
*/
int err;
struct br2684_vcc *brvcc;
- struct sk_buff_head copy;
struct sk_buff *skb;
+ struct sk_buff_head *rq;
struct br2684_dev *brdev;
struct net_device *net_dev;
struct atm_backend_br2684 be;
+ unsigned long flags;
if (copy_from_user(&be, arg, sizeof be))
return -EFAULT;
@@ -554,12 +554,30 @@ Note: we do not have explicit unassign, but look at _push()
brvcc->old_push = atmvcc->push;
barrier();
atmvcc->push = br2684_push;
- skb_queue_head_init(&copy);
- skb_migrate(&sk_atm(atmvcc)->sk_receive_queue, &copy);
- while ((skb = skb_dequeue(&copy)) != NULL) {
+
+ rq = &sk_atm(atmvcc)->sk_receive_queue;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ if (skb_queue_empty(rq)) {
+ skb = NULL;
+ } else {
+ /* NULL terminate the list. */
+ rq->prev->next = NULL;
+ skb = rq->next;
+ }
+ rq->prev = rq->next = (struct sk_buff *)rq;
+ rq->qlen = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ while (skb) {
+ struct sk_buff *next = skb->next;
+
+ skb->next = skb->prev = NULL;
BRPRIV(skb->dev)->stats.rx_bytes -= skb->len;
BRPRIV(skb->dev)->stats.rx_packets--;
br2684_push(atmvcc, skb);
+
+ skb = next;
}
__module_get(THIS_MODULE);
return 0;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 7af2c411da8..5f8a1d22272 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -38,7 +38,6 @@
#include "common.h"
#include "resources.h"
-#include "ipcommon.h"
#include <net/atmclip.h>
@@ -54,7 +53,7 @@ static struct atm_vcc *atmarpd;
static struct neigh_table clip_tbl;
static struct timer_list idle_timer;
-static int to_atmarpd(enum atmarp_ctrl_type type, int itf, unsigned long ip)
+static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
{
struct sock *sk;
struct atmarp_ctrl *ctrl;
@@ -220,7 +219,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
|| memcmp(skb->data, llc_oui, sizeof (llc_oui)))
skb->protocol = htons(ETH_P_IP);
else {
- skb->protocol = ((u16 *) skb->data)[3];
+ skb->protocol = ((__be16 *) skb->data)[3];
skb_pull(skb, RFC1483LLC_LEN);
if (skb->protocol == htons(ETH_P_ARP)) {
PRIV(skb->dev)->stats.rx_packets++;
@@ -430,7 +429,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
here = skb_push(skb, RFC1483LLC_LEN);
memcpy(here, llc_oui, sizeof(llc_oui));
- ((u16 *) here)[3] = skb->protocol;
+ ((__be16 *) here)[3] = skb->protocol;
}
atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = vcc->atm_options;
@@ -469,8 +468,9 @@ static struct net_device_stats *clip_get_stats(struct net_device *dev)
static int clip_mkip(struct atm_vcc *vcc, int timeout)
{
struct clip_vcc *clip_vcc;
- struct sk_buff_head copy;
struct sk_buff *skb;
+ struct sk_buff_head *rq;
+ unsigned long flags;
if (!vcc->push)
return -EBADFD;
@@ -490,10 +490,26 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
clip_vcc->old_pop = vcc->pop;
vcc->push = clip_push;
vcc->pop = clip_pop;
- skb_queue_head_init(&copy);
- skb_migrate(&sk_atm(vcc)->sk_receive_queue, &copy);
+
+ rq = &sk_atm(vcc)->sk_receive_queue;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ if (skb_queue_empty(rq)) {
+ skb = NULL;
+ } else {
+ /* NULL terminate the list. */
+ rq->prev->next = NULL;
+ skb = rq->next;
+ }
+ rq->prev = rq->next = (struct sk_buff *)rq;
+ rq->qlen = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+
/* re-process everything received between connection setup and MKIP */
- while ((skb = skb_dequeue(&copy)) != NULL)
+ while (skb) {
+ struct sk_buff *next = skb->next;
+
+ skb->next = skb->prev = NULL;
if (!clip_devs) {
atm_return(vcc, skb->truesize);
kfree_skb(skb);
@@ -506,10 +522,13 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
PRIV(skb->dev)->stats.rx_bytes -= len;
kfree_skb(skb);
}
+
+ skb = next;
+ }
return 0;
}
-static int clip_setentry(struct atm_vcc *vcc, u32 ip)
+static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
{
struct neighbour *neigh;
struct atmarp_entry *entry;
@@ -752,7 +771,7 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = clip_mkip(vcc, arg);
break;
case ATMARP_SETENTRY:
- err = clip_setentry(vcc, arg);
+ err = clip_setentry(vcc, (__force __be32)arg);
break;
case ATMARP_ENCAP:
err = clip_encap(vcc, arg);
diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c
deleted file mode 100644
index 1d3de42fada..00000000000
--- a/net/atm/ipcommon.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* net/atm/ipcommon.c - Common items for all ways of doing IP over ATM */
-
-/* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/atmdev.h>
-#include <linux/atmclip.h>
-
-#include "common.h"
-#include "ipcommon.h"
-
-
-#if 0
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-
-/*
- * skb_migrate appends the list at "from" to "to", emptying "from" in the
- * process. skb_migrate is atomic with respect to all other skb operations on
- * "from" and "to". Note that it locks both lists at the same time, so to deal
- * with the lock ordering, the locks are taken in address order.
- *
- * This function should live in skbuff.c or skbuff.h.
- */
-
-
-void skb_migrate(struct sk_buff_head *from, struct sk_buff_head *to)
-{
- unsigned long flags;
- struct sk_buff *skb_from = (struct sk_buff *) from;
- struct sk_buff *skb_to = (struct sk_buff *) to;
- struct sk_buff *prev;
-
- if ((unsigned long) from < (unsigned long) to) {
- spin_lock_irqsave(&from->lock, flags);
- spin_lock_nested(&to->lock, SINGLE_DEPTH_NESTING);
- } else {
- spin_lock_irqsave(&to->lock, flags);
- spin_lock_nested(&from->lock, SINGLE_DEPTH_NESTING);
- }
- prev = from->prev;
- from->next->prev = to->prev;
- prev->next = skb_to;
- to->prev->next = from->next;
- to->prev = from->prev;
- to->qlen += from->qlen;
- spin_unlock(&to->lock);
- from->prev = skb_from;
- from->next = skb_from;
- from->qlen = 0;
- spin_unlock_irqrestore(&from->lock, flags);
-}
-
-
-EXPORT_SYMBOL(skb_migrate);
diff --git a/net/atm/ipcommon.h b/net/atm/ipcommon.h
deleted file mode 100644
index d72165f6093..00000000000
--- a/net/atm/ipcommon.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* net/atm/ipcommon.h - Common items for all ways of doing IP over ATM */
-
-/* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */
-
-
-#ifndef NET_ATM_IPCOMMON_H
-#define NET_ATM_IPCOMMON_H
-
-
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/atmdev.h>
-
-/*
- * Appends all skbs from "from" to "to". The operation is atomic with respect
- * to all other skb operations on "from" or "to".
- */
-
-void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to);
-
-#endif
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 66c57c1091a..3fc0abeeaf3 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -204,9 +204,9 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
memset(rdesc, 0, ETH_ALEN);
/* offset 4 comes from LAN destination field in LE control frames */
if (trh->rcf & htons((uint16_t) TR_RCF_DIR_BIT))
- memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(uint16_t));
+ memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(__be16));
else {
- memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t));
+ memcpy(&rdesc[4], &trh->rseg[1], sizeof(__be16));
rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0));
}
@@ -775,7 +775,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
unsigned char *src, *dst;
atm_return(vcc, skb->truesize);
- if (*(uint16_t *) skb->data == htons(priv->lecid) ||
+ if (*(__be16 *) skb->data == htons(priv->lecid) ||
!priv->lecd || !(dev->flags & IFF_UP)) {
/*
* Probably looping back, or if lecd is missing,
@@ -1321,11 +1321,10 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
if (table == NULL)
return -1;
- *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC);
+ *tlvs = kmemdup(table->tlvs, table->sizeoftlvs, GFP_ATOMIC);
if (*tlvs == NULL)
return -1;
- memcpy(*tlvs, table->tlvs, table->sizeoftlvs);
*sizeoftlvs = table->sizeoftlvs;
return 0;
@@ -1364,11 +1363,10 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
kfree(priv->tlvs); /* NULL if there was no previous association */
- priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
+ priv->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL);
if (priv->tlvs == NULL)
return (0);
priv->sizeoftlvs = sizeoftlvs;
- memcpy(priv->tlvs, tlvs, sizeoftlvs);
skb = alloc_skb(sizeoftlvs, GFP_ATOMIC);
if (skb == NULL)
@@ -1409,12 +1407,10 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr,
kfree(entry->tlvs);
- entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
+ entry->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL);
if (entry->tlvs == NULL)
return;
-
entry->sizeoftlvs = sizeoftlvs;
- memcpy(entry->tlvs, tlvs, sizeoftlvs);
#endif
#if 0
printk("lec.c: lane2_associate_ind()\n");
@@ -1458,7 +1454,7 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr,
#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
-static void lec_arp_check_expire(void *data);
+static void lec_arp_check_expire(struct work_struct *work);
static void lec_arp_expire_arp(unsigned long data);
/*
@@ -1481,7 +1477,7 @@ static void lec_arp_init(struct lec_priv *priv)
INIT_HLIST_HEAD(&priv->lec_no_forward);
INIT_HLIST_HEAD(&priv->mcast_fwds);
spin_lock_init(&priv->lec_arp_lock);
- INIT_WORK(&priv->lec_arp_work, lec_arp_check_expire, priv);
+ INIT_DELAYED_WORK(&priv->lec_arp_work, lec_arp_check_expire);
schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL);
}
@@ -1879,10 +1875,11 @@ static void lec_arp_expire_vcc(unsigned long data)
* to ESI_FORWARD_DIRECT. This causes the flush period to end
* regardless of the progress of the flush protocol.
*/
-static void lec_arp_check_expire(void *data)
+static void lec_arp_check_expire(struct work_struct *work)
{
unsigned long flags;
- struct lec_priv *priv = data;
+ struct lec_priv *priv =
+ container_of(work, struct lec_priv, lec_arp_work.work);
struct hlist_node *node, *next;
struct lec_arp_table *entry;
unsigned long now;
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 877f5093969..99136babd53 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -14,14 +14,14 @@
#define LEC_HEADER_LEN 16
struct lecdatahdr_8023 {
- unsigned short le_header;
+ __be16 le_header;
unsigned char h_dest[ETH_ALEN];
unsigned char h_source[ETH_ALEN];
- unsigned short h_type;
+ __be16 h_type;
};
struct lecdatahdr_8025 {
- unsigned short le_header;
+ __be16 le_header;
unsigned char ac_pad;
unsigned char fc;
unsigned char h_dest[ETH_ALEN];
@@ -92,7 +92,7 @@ struct lec_priv {
spinlock_t lec_arp_lock;
struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */
struct atm_vcc *lecd;
- struct work_struct lec_arp_work; /* C10 */
+ struct delayed_work lec_arp_work; /* C10 */
unsigned int maximum_unknown_frame_count;
/*
* Within the period of time defined by this variable, the client will send
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 0d2b994af51..c18f73715ef 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -152,7 +152,7 @@ static struct mpoa_client *find_mpc_by_lec(struct net_device *dev)
/*
* Overwrites the old entry or makes a new one.
*/
-struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos)
+struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos)
{
struct atm_mpoa_qos *entry;
@@ -177,7 +177,7 @@ struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos)
return entry;
}
-struct atm_mpoa_qos *atm_mpoa_search_qos(uint32_t dst_ip)
+struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip)
{
struct atm_mpoa_qos *qos;
@@ -460,11 +460,11 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
in_cache_entry *entry;
struct iphdr *iph;
char *buff;
- uint32_t ipaddr = 0;
+ __be32 ipaddr = 0;
static struct {
struct llc_snap_hdr hdr;
- uint32_t tag;
+ __be32 tag;
} tagged_llc_snap_hdr = {
{0xaa, 0xaa, 0x03, {0x00, 0x00, 0x00}, {0x88, 0x4c}},
0
@@ -559,7 +559,7 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
struct mpoa_client *mpc;
struct atmmpc_ioc ioc_data;
in_cache_entry *in_entry;
- uint32_t ipaddr;
+ __be32 ipaddr;
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc));
if (bytes_left != 0) {
@@ -638,7 +638,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
struct sk_buff *new_skb;
eg_cache_entry *eg;
struct mpoa_client *mpc;
- uint32_t tag;
+ __be32 tag;
char *tmp;
ddprintk("mpoa: (%s) mpc_push:\n", dev->name);
@@ -683,7 +683,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
}
tmp = skb->data + sizeof(struct llc_snap_hdr);
- tag = *(uint32_t *)tmp;
+ tag = *(__be32 *)tmp;
eg = mpc->eg_ops->get_by_tag(tag, mpc);
if (eg == NULL) {
@@ -1029,7 +1029,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo
static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
{
- uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+ __be32 dst_ip = msg->content.in_info.in_dst_ip;
in_cache_entry *entry;
entry = mpc->in_ops->get(dst_ip, mpc);
@@ -1066,7 +1066,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
*/
static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry)
{
- uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+ __be32 dst_ip = msg->content.in_info.in_dst_ip;
struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip);
eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client);
@@ -1102,7 +1102,7 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien
static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
{
- uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+ __be32 dst_ip = msg->content.in_info.in_dst_ip;
in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc);
dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip));
@@ -1148,8 +1148,8 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
{
- uint32_t dst_ip = msg->content.in_info.in_dst_ip;
- uint32_t mask = msg->ip_mask;
+ __be32 dst_ip = msg->content.in_info.in_dst_ip;
+ __be32 mask = msg->ip_mask;
in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
if(entry == NULL){
@@ -1173,7 +1173,7 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
{
- uint32_t cache_id = msg->content.eg_info.cache_id;
+ __be32 cache_id = msg->content.eg_info.cache_id;
eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(cache_id, mpc);
if (entry == NULL) {
@@ -1322,13 +1322,12 @@ static void set_mps_mac_addr_rcvd(struct k_message *msg, struct mpoa_client *cli
if(client->number_of_mps_macs)
kfree(client->mps_macs);
client->number_of_mps_macs = 0;
- client->mps_macs = kmalloc(ETH_ALEN,GFP_KERNEL);
+ client->mps_macs = kmemdup(msg->MPS_ctrl, ETH_ALEN, GFP_KERNEL);
if (client->mps_macs == NULL) {
printk("mpoa: set_mps_mac_addr_rcvd: out of memory\n");
return;
}
client->number_of_mps_macs = 1;
- memcpy(client->mps_macs, msg->MPS_ctrl, ETH_ALEN);
return;
}
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 3c7981a229e..51f460d005c 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -36,14 +36,14 @@ struct mpoa_client {
struct atm_mpoa_qos {
struct atm_mpoa_qos *next;
- uint32_t ipaddr;
+ __be32 ipaddr;
struct atm_qos qos;
};
/* MPOA QoS operations */
-struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos);
-struct atm_mpoa_qos *atm_mpoa_search_qos(uint32_t dst_ip);
+struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos);
+struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip);
int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos);
/* Display QoS entries. This is for the procfs */
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index fbf13cdcf46..697a081533b 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -22,7 +22,7 @@
#define ddprintk(format,args...)
#endif
-static in_cache_entry *in_cache_get(uint32_t dst_ip,
+static in_cache_entry *in_cache_get(__be32 dst_ip,
struct mpoa_client *client)
{
in_cache_entry *entry;
@@ -42,9 +42,9 @@ static in_cache_entry *in_cache_get(uint32_t dst_ip,
return NULL;
}
-static in_cache_entry *in_cache_get_with_mask(uint32_t dst_ip,
+static in_cache_entry *in_cache_get_with_mask(__be32 dst_ip,
struct mpoa_client *client,
- uint32_t mask)
+ __be32 mask)
{
in_cache_entry *entry;
@@ -84,10 +84,10 @@ static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc,
return NULL;
}
-static in_cache_entry *in_cache_add_entry(uint32_t dst_ip,
+static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
struct mpoa_client *client)
{
- in_cache_entry* entry = kmalloc(sizeof(in_cache_entry), GFP_KERNEL);
+ in_cache_entry *entry = kzalloc(sizeof(in_cache_entry), GFP_KERNEL);
if (entry == NULL) {
printk("mpoa: mpoa_caches.c: new_in_cache_entry: out of memory\n");
@@ -95,7 +95,6 @@ static in_cache_entry *in_cache_add_entry(uint32_t dst_ip,
}
dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip));
- memset(entry,0,sizeof(in_cache_entry));
atomic_set(&entry->use, 1);
dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: about to lock\n");
@@ -319,7 +318,7 @@ static void in_destroy_cache(struct mpoa_client *mpc)
return;
}
-static eg_cache_entry *eg_cache_get_by_cache_id(uint32_t cache_id, struct mpoa_client *mpc)
+static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id, struct mpoa_client *mpc)
{
eg_cache_entry *entry;
@@ -339,7 +338,7 @@ static eg_cache_entry *eg_cache_get_by_cache_id(uint32_t cache_id, struct mpoa_c
}
/* This can be called from any context since it saves CPU flags */
-static eg_cache_entry *eg_cache_get_by_tag(uint32_t tag, struct mpoa_client *mpc)
+static eg_cache_entry *eg_cache_get_by_tag(__be32 tag, struct mpoa_client *mpc)
{
unsigned long flags;
eg_cache_entry *entry;
@@ -380,7 +379,7 @@ static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, struct mpoa_clie
return NULL;
}
-static eg_cache_entry *eg_cache_get_by_src_ip(uint32_t ipaddr, struct mpoa_client *mpc)
+static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, struct mpoa_client *mpc)
{
eg_cache_entry *entry;
@@ -447,7 +446,7 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_client *client)
{
- eg_cache_entry *entry = kmalloc(sizeof(eg_cache_entry), GFP_KERNEL);
+ eg_cache_entry *entry = kzalloc(sizeof(eg_cache_entry), GFP_KERNEL);
if (entry == NULL) {
printk("mpoa: mpoa_caches.c: new_eg_cache_entry: out of memory\n");
@@ -455,7 +454,6 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli
}
dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(msg->content.eg_info.eg_dst_ip));
- memset(entry, 0, sizeof(eg_cache_entry));
atomic_set(&entry->use, 1);
dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: about to lock\n");
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 6c9886a03d0..84de977def2 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -29,12 +29,12 @@ typedef struct in_cache_entry {
} in_cache_entry;
struct in_cache_ops{
- in_cache_entry *(*add_entry)(uint32_t dst_ip,
+ in_cache_entry *(*add_entry)(__be32 dst_ip,
struct mpoa_client *client);
- in_cache_entry *(*get)(uint32_t dst_ip, struct mpoa_client *client);
- in_cache_entry *(*get_with_mask)(uint32_t dst_ip,
+ in_cache_entry *(*get)(__be32 dst_ip, struct mpoa_client *client);
+ in_cache_entry *(*get_with_mask)(__be32 dst_ip,
struct mpoa_client *client,
- uint32_t mask);
+ __be32 mask);
in_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc,
struct mpoa_client *client);
void (*put)(in_cache_entry *entry);
@@ -56,17 +56,17 @@ typedef struct eg_cache_entry{
struct atm_vcc *shortcut;
uint32_t packets_rcvd;
uint16_t entry_state;
- uint32_t latest_ip_addr; /* The src IP address of the last packet */
+ __be32 latest_ip_addr; /* The src IP address of the last packet */
struct eg_ctrl_info ctrl_info;
atomic_t use;
} eg_cache_entry;
struct eg_cache_ops{
eg_cache_entry *(*add_entry)(struct k_message *msg, struct mpoa_client *client);
- eg_cache_entry *(*get_by_cache_id)(uint32_t cache_id, struct mpoa_client *client);
- eg_cache_entry *(*get_by_tag)(uint32_t cache_id, struct mpoa_client *client);
+ eg_cache_entry *(*get_by_cache_id)(__be32 cache_id, struct mpoa_client *client);
+ eg_cache_entry *(*get_by_tag)(__be32 cache_id, struct mpoa_client *client);
eg_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, struct mpoa_client *client);
- eg_cache_entry *(*get_by_src_ip)(uint32_t ipaddr, struct mpoa_client *client);
+ eg_cache_entry *(*get_by_src_ip)(__be32 ipaddr, struct mpoa_client *client);
void (*put)(eg_cache_entry *entry);
void (*remove_entry)(eg_cache_entry *entry, struct mpoa_client *client);
void (*update)(eg_cache_entry *entry, uint16_t holding_time);
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index d37b8911b3a..3844c85d602 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -231,14 +231,14 @@ static int parse_qos(const char *buff)
*/
unsigned char ip[4];
int tx_pcr, tx_sdu, rx_pcr, rx_sdu;
- uint32_t ipaddr;
+ __be32 ipaddr;
struct atm_qos qos;
memset(&qos, 0, sizeof(struct atm_qos));
if (sscanf(buff, "del %hhu.%hhu.%hhu.%hhu",
ip, ip+1, ip+2, ip+3) == 4) {
- ipaddr = *(uint32_t *)ip;
+ ipaddr = *(__be32 *)ip;
return atm_mpoa_delete_qos(atm_mpoa_search_qos(ipaddr));
}
@@ -250,7 +250,7 @@ static int parse_qos(const char *buff)
ip, ip+1, ip+2, ip+3, &tx_pcr, &tx_sdu, &rx_pcr, &rx_sdu) != 8)
return 0;
- ipaddr = *(uint32_t *)ip;
+ ipaddr = *(__be32 *)ip;
qos.txtp.traffic_class = ATM_CBR;
qos.txtp.max_pcr = tx_pcr;
qos.txtp.max_sdu = tx_sdu;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 91fe5f53ff1..739866bfe9e 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -393,7 +393,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
if (count == 0) return 0;
page = get_zeroed_page(GFP_KERNEL);
if (!page) return -ENOMEM;
- dev = PDE(file->f_dentry->d_inode)->data;
+ dev = PDE(file->f_path.dentry->d_inode)->data;
if (!dev->ops->proc_read)
length = -EINVAL;
else {
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 000695c4858..6cabf6d8a75 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -906,13 +906,13 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
ax25->source_addr = oax25->source_addr;
if (oax25->digipeat != NULL) {
- if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+ ax25->digipeat = kmemdup(oax25->digipeat, sizeof(ax25_digi),
+ GFP_ATOMIC);
+ if (ax25->digipeat == NULL) {
sk_free(sk);
ax25_cb_put(ax25);
return NULL;
}
-
- memcpy(ax25->digipeat, oax25->digipeat, sizeof(ax25_digi));
}
sk->sk_protinfo = ax25;
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index 5f0896ad004..97a49c79c60 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c
@@ -29,17 +29,26 @@
#include <linux/interrupt.h>
/*
- * The null address is defined as a callsign of all spaces with an
- * SSID of zero.
+ * The default broadcast address of an interface is QST-0; the default address
+ * is LINUX-1. The null address is defined as a callsign of all spaces with
+ * an SSID of zero.
*/
-ax25_address null_ax25_address = {{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00}};
+const ax25_address ax25_bcast =
+ {{'Q' << 1, 'S' << 1, 'T' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}};
+const ax25_address ax25_defaddr =
+ {{'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, 1 << 1}};
+const ax25_address null_ax25_address =
+ {{' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}};
+
+EXPORT_SYMBOL_GPL(ax25_bcast);
+EXPORT_SYMBOL_GPL(ax25_defaddr);
EXPORT_SYMBOL(null_ax25_address);
/*
* ax25 -> ascii conversion
*/
-char *ax2asc(char *buf, ax25_address *a)
+char *ax2asc(char *buf, const ax25_address *a)
{
char c, *s;
int n;
@@ -72,9 +81,9 @@ EXPORT_SYMBOL(ax2asc);
/*
* ascii -> ax25 conversion
*/
-void asc2ax(ax25_address *addr, char *callsign)
+void asc2ax(ax25_address *addr, const char *callsign)
{
- char *s;
+ const char *s;
int n;
for (s = callsign, n = 0; n < 6; n++) {
@@ -107,7 +116,7 @@ EXPORT_SYMBOL(asc2ax);
/*
* Compare two ax.25 addresses
*/
-int ax25cmp(ax25_address *a, ax25_address *b)
+int ax25cmp(const ax25_address *a, const ax25_address *b)
{
int ct = 0;
@@ -128,7 +137,7 @@ EXPORT_SYMBOL(ax25cmp);
/*
* Compare two AX.25 digipeater paths.
*/
-int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2)
+int ax25digicmp(const ax25_digi *digi1, const ax25_digi *digi2)
{
int i;
@@ -149,7 +158,9 @@ int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2)
* Given an AX.25 address pull of to, from, digi list, command/response and the start of data
*
*/
-unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, int *dama)
+const unsigned char *ax25_addr_parse(const unsigned char *buf, int len,
+ ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags,
+ int *dama)
{
int d = 0;
@@ -204,7 +215,8 @@ unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, a
/*
* Assemble an AX.25 header from the bits
*/
-int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, ax25_digi *d, int flag, int modulus)
+int ax25_addr_build(unsigned char *buf, const ax25_address *src,
+ const ax25_address *dest, const ax25_digi *d, int flag, int modulus)
{
int len = 0;
int ct = 0;
@@ -261,7 +273,7 @@ int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, a
return len;
}
-int ax25_addr_size(ax25_digi *dp)
+int ax25_addr_size(const ax25_digi *dp)
{
if (dp == NULL)
return 2 * AX25_ADDR_LEN;
@@ -272,7 +284,7 @@ int ax25_addr_size(ax25_digi *dp)
/*
* Reverse Digipeat List. May not pass both parameters as same struct
*/
-void ax25_digi_invert(ax25_digi *in, ax25_digi *out)
+void ax25_digi_invert(const ax25_digi *in, ax25_digi *out)
{
int ct;
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index d7736e58533..f84047d1e8c 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -70,11 +70,11 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2
ax25->dest_addr = *dest;
if (digi != NULL) {
- if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+ ax25->digipeat = kmemdup(digi, sizeof(*digi), GFP_ATOMIC);
+ if (ax25->digipeat == NULL) {
ax25_cb_put(ax25);
return NULL;
}
- memcpy(ax25->digipeat, digi, sizeof(ax25_digi));
}
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 51b7bdaf27e..8580356ace5 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -432,11 +432,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
}
if (ax25_rt->digipeat != NULL) {
- if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+ ax25->digipeat = kmemdup(ax25_rt->digipeat, sizeof(ax25_digi),
+ GFP_ATOMIC);
+ if (ax25->digipeat == NULL) {
err = -ENOMEM;
goto put;
}
- memcpy(ax25->digipeat, ax25_rt->digipeat, sizeof(ax25_digi));
ax25_adjust_path(addr, ax25->digipeat);
}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 867d4253797..d23a27f25d2 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -209,7 +209,9 @@ void ax25_register_sysctl(void)
}
for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) {
- ctl_table *child = kmalloc(sizeof(ax25_param_table), GFP_ATOMIC);
+ struct ctl_table *child = kmemdup(ax25_param_table,
+ sizeof(ax25_param_table),
+ GFP_ATOMIC);
if (!child) {
while (n--)
kfree(ax25_table[n].child);
@@ -217,7 +219,6 @@ void ax25_register_sysctl(void)
spin_unlock_bh(&ax25_dev_lock);
return;
}
- memcpy(child, ax25_param_table, sizeof(ax25_param_table));
ax25_table[n].child = ax25_dev->systable = child;
ax25_table[n].ctl_name = n + 1;
ax25_table[n].procname = ax25_dev->dev->name;
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index bbb1ed7097a..0b6cd0e2528 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -95,14 +95,14 @@ struct bnep_setup_conn_req {
struct bnep_set_filter_req {
__u8 type;
__u8 ctrl;
- __u16 len;
+ __be16 len;
__u8 list[0];
} __attribute__((packed));
struct bnep_control_rsp {
__u8 type;
__u8 ctrl;
- __u16 resp;
+ __be16 resp;
} __attribute__((packed));
struct bnep_ext_hdr {
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 4d3424c2421..7ba6470dc50 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -117,18 +117,18 @@ static int bnep_send_rsp(struct bnep_session *s, u8 ctrl, u16 resp)
static inline void bnep_set_default_proto_filter(struct bnep_session *s)
{
/* (IPv4, ARP) */
- s->proto_filter[0].start = htons(0x0800);
- s->proto_filter[0].end = htons(0x0806);
+ s->proto_filter[0].start = ETH_P_IP;
+ s->proto_filter[0].end = ETH_P_ARP;
/* (RARP, AppleTalk) */
- s->proto_filter[1].start = htons(0x8035);
- s->proto_filter[1].end = htons(0x80F3);
+ s->proto_filter[1].start = ETH_P_RARP;
+ s->proto_filter[1].end = ETH_P_AARP;
/* (IPX, IPv6) */
- s->proto_filter[2].start = htons(0x8137);
- s->proto_filter[2].end = htons(0x86DD);
+ s->proto_filter[2].start = ETH_P_IPX;
+ s->proto_filter[2].end = ETH_P_IPV6;
}
#endif
-static int bnep_ctrl_set_netfilter(struct bnep_session *s, u16 *data, int len)
+static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len)
{
int n;
@@ -150,8 +150,8 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, u16 *data, int len)
int i;
for (i = 0; i < n; i++) {
- f[i].start = get_unaligned(data++);
- f[i].end = get_unaligned(data++);
+ f[i].start = ntohs(get_unaligned(data++));
+ f[i].end = ntohs(get_unaligned(data++));
BT_DBG("proto filter start %d end %d",
f[i].start, f[i].end);
@@ -180,7 +180,7 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
if (len < 2)
return -EILSEQ;
- n = ntohs(get_unaligned((u16 *) data));
+ n = ntohs(get_unaligned((__be16 *) data));
data += 2; len -= 2;
if (len < n)
@@ -332,7 +332,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
goto badframe;
- s->eh.h_proto = get_unaligned((u16 *) (skb->data - 2));
+ s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2));
if (type & BNEP_EXT_HEADER) {
if (bnep_rx_extension(s, skb) < 0)
@@ -343,7 +343,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
if (ntohs(s->eh.h_proto) == 0x8100) {
if (!skb_pull(skb, 4))
goto badframe;
- s->eh.h_proto = get_unaligned((u16 *) (skb->data - 2));
+ s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2));
}
/* We have to alloc new skb and copy data here :(. Because original skb
@@ -365,7 +365,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
case BNEP_COMPRESSED_SRC_ONLY:
memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
- put_unaligned(s->eh.h_proto, (u16 *) __skb_put(nskb, 2));
+ put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
break;
case BNEP_COMPRESSED_DST_ONLY:
@@ -375,7 +375,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
case BNEP_GENERAL:
memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
- put_unaligned(s->eh.h_proto, (u16 *) __skb_put(nskb, 2));
+ put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
break;
}
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 7f7b27db6a8..67a002a9751 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -158,14 +158,15 @@ static inline int bnep_net_mc_filter(struct sk_buff *skb, struct bnep_session *s
static inline u16 bnep_net_eth_proto(struct sk_buff *skb)
{
struct ethhdr *eh = (void *) skb->data;
+ u16 proto = ntohs(eh->h_proto);
- if (ntohs(eh->h_proto) >= 1536)
- return eh->h_proto;
+ if (proto >= 1536)
+ return proto;
- if (get_unaligned((u16 *) skb->data) == 0xFFFF)
- return htons(ETH_P_802_3);
+ if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF))
+ return ETH_P_802_3;
- return htons(ETH_P_802_2);
+ return ETH_P_802_2;
}
static inline int bnep_net_proto_filter(struct sk_buff *skb, struct bnep_session *s)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 65f09484571..bb94e6da223 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -57,6 +57,7 @@
static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
{
__u8 status;
+ struct hci_conn *pend;
BT_DBG("%s ocf 0x%x", hdev->name, ocf);
@@ -71,6 +72,15 @@ static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb
clear_bit(HCI_INQUIRY, &hdev->flags);
hci_req_complete(hdev, status);
}
+
+ hci_dev_lock(hdev);
+
+ pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2);
+ if (pend)
+ hci_acl_connect(pend);
+
+ hci_dev_unlock(hdev);
+
break;
default:
@@ -565,11 +575,20 @@ static void hci_cs_info_param(struct hci_dev *hdev, __u16 ocf, __u8 status)
static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
+ struct hci_conn *pend;
BT_DBG("%s status %d", hdev->name, status);
clear_bit(HCI_INQUIRY, &hdev->flags);
hci_req_complete(hdev, status);
+
+ hci_dev_lock(hdev);
+
+ pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2);
+ if (pend)
+ hci_acl_connect(pend);
+
+ hci_dev_unlock(hdev);
}
/* Inquiry Result */
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f26a9eb4994..dbf98c49dba 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -120,10 +120,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
if (!hci_test_bit(evt, &flt->event_mask))
continue;
- if (flt->opcode && ((evt == HCI_EV_CMD_COMPLETE &&
- flt->opcode != *(__u16 *)(skb->data + 3)) ||
- (evt == HCI_EV_CMD_STATUS &&
- flt->opcode != *(__u16 *)(skb->data + 4))))
+ if (flt->opcode &&
+ ((evt == HCI_EV_CMD_COMPLETE &&
+ flt->opcode !=
+ get_unaligned((__le16 *)(skb->data + 3))) ||
+ (evt == HCI_EV_CMD_STATUS &&
+ flt->opcode !=
+ get_unaligned((__le16 *)(skb->data + 4)))))
continue;
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 954eb74eb37..d4c935692cc 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -237,9 +237,9 @@ static void bt_release(struct device *dev)
kfree(data);
}
-static void add_conn(void *data)
+static void add_conn(struct work_struct *work)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn = container_of(work, struct hci_conn, work);
int i;
if (device_register(&conn->dev) < 0) {
@@ -259,7 +259,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
BT_DBG("conn %p", conn);
- conn->dev.parent = &hdev->dev;
+ conn->dev.bus = &bt_bus;
+ conn->dev.parent = &hdev->dev;
+
conn->dev.release = bt_release;
snprintf(conn->dev.bus_id, BUS_ID_SIZE,
@@ -270,14 +272,14 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
dev_set_drvdata(&conn->dev, conn);
- INIT_WORK(&conn->work, add_conn, (void *) conn);
+ INIT_WORK(&conn->work, add_conn);
schedule_work(&conn->work);
}
-static void del_conn(void *data)
+static void del_conn(struct work_struct *work)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn = container_of(work, struct hci_conn, work);
device_del(&conn->dev);
}
@@ -285,7 +287,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
{
BT_DBG("conn %p", conn);
- INIT_WORK(&conn->work, del_conn, (void *) conn);
+ INIT_WORK(&conn->work, del_conn);
schedule_work(&conn->work);
}
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2b3dcb8f90f..29a8fa4d372 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -770,7 +770,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
long timeo;
int err = 0;
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_state != BT_LISTEN) {
err = -EBADFD;
@@ -792,7 +792,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
release_sock(sk);
timeo = schedule_timeout(timeo);
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_state != BT_LISTEN) {
err = -EBADFD;
@@ -1353,12 +1353,12 @@ static inline int l2cap_conf_output(struct sock *sk, void **ptr)
/* Configure output options and let the other side know
* which ones we don't like. */
- if (pi->conf_mtu < pi->omtu) {
- l2cap_add_conf_opt(ptr, L2CAP_CONF_MTU, 2, pi->omtu);
+ if (pi->conf_mtu < pi->omtu)
result = L2CAP_CONF_UNACCEPT;
- } else {
+ else
pi->omtu = pi->conf_mtu;
- }
+
+ l2cap_add_conf_opt(ptr, L2CAP_CONF_MTU, 2, pi->omtu);
BT_DBG("sk %p result %d", sk, result);
return result;
@@ -1533,6 +1533,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
return -ENOENT;
+ if (sk->sk_state == BT_DISCONN)
+ goto unlock;
+
l2cap_parse_conf_req(sk, req->data, cmd->len - sizeof(*req));
if (flags & 0x0001) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ddc4e9d5963..278c8676906 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -854,7 +854,7 @@ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
rpn->flow_ctrl = flow_ctrl_settings;
rpn->xon_char = xon_char;
rpn->xoff_char = xoff_char;
- rpn->param_mask = param_mask;
+ rpn->param_mask = cpu_to_le16(param_mask);
*ptr = __fcs(buf); ptr++;
@@ -1018,7 +1018,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
if (len > 127) {
hdr = (void *) skb_push(skb, 4);
- put_unaligned(htobs(__len16(len)), (u16 *) &hdr->len);
+ put_unaligned(htobs(__len16(len)), (__le16 *) &hdr->len);
} else {
hdr = (void *) skb_push(skb, 3);
hdr->len = __len8(len);
@@ -1343,7 +1343,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
/* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit,
* no parity, no flow control lines, normal XON/XOFF chars */
- if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) {
bit_rate = rpn->bit_rate;
if (bit_rate != RFCOMM_RPN_BR_115200) {
BT_DBG("RPN bit rate mismatch 0x%x", bit_rate);
@@ -1352,7 +1352,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
}
}
- if (rpn->param_mask & RFCOMM_RPN_PM_DATA) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_DATA)) {
data_bits = __get_rpn_data_bits(rpn->line_settings);
if (data_bits != RFCOMM_RPN_DATA_8) {
BT_DBG("RPN data bits mismatch 0x%x", data_bits);
@@ -1361,7 +1361,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
}
}
- if (rpn->param_mask & RFCOMM_RPN_PM_STOP) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_STOP)) {
stop_bits = __get_rpn_stop_bits(rpn->line_settings);
if (stop_bits != RFCOMM_RPN_STOP_1) {
BT_DBG("RPN stop bits mismatch 0x%x", stop_bits);
@@ -1370,7 +1370,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
}
}
- if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_PARITY)) {
parity = __get_rpn_parity(rpn->line_settings);
if (parity != RFCOMM_RPN_PARITY_NONE) {
BT_DBG("RPN parity mismatch 0x%x", parity);
@@ -1379,7 +1379,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
}
}
- if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_FLOW)) {
flow_ctrl = rpn->flow_ctrl;
if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) {
BT_DBG("RPN flow ctrl mismatch 0x%x", flow_ctrl);
@@ -1388,7 +1388,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
}
}
- if (rpn->param_mask & RFCOMM_RPN_PM_XON) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_XON)) {
xon_char = rpn->xon_char;
if (xon_char != RFCOMM_RPN_XON_CHAR) {
BT_DBG("RPN XON char mismatch 0x%x", xon_char);
@@ -1397,7 +1397,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
}
}
- if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) {
+ if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_XOFF)) {
xoff_char = rpn->xoff_char;
if (xoff_char != RFCOMM_RPN_XOFF_CHAR) {
BT_DBG("RPN XOFF char mismatch 0x%x", xoff_char);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index b8e3a5f1c8a..e0e0d09023b 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -752,9 +752,9 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned
return -ENOIOCTLCMD;
}
-static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old)
+static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
{
- struct termios *new = (struct termios *) tty->termios;
+ struct ktermios *new = tty->termios;
int old_baud_rate = tty_termios_baud_rate(old);
int new_baud_rate = tty_termios_baud_rate(new);
@@ -765,7 +765,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old)
BT_DBG("tty %p termios %p", tty, old);
- if (!dev)
+ if (!dev || !dev->dlc || !dev->dlc->session)
return;
/* Handle turning off CRTSCTS */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d9f04864d15..8ca448db7a0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,7 +23,7 @@
#include <asm/atomic.h>
#include "br_private.h"
-static kmem_cache_t *br_fdb_cache __read_mostly;
+static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f753c40c11d..55bb2634c08 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -77,12 +77,16 @@ static int port_cost(struct net_device *dev)
* Called from work queue to allow for calling functions that
* might sleep (such as speed check), and to debounce.
*/
-static void port_carrier_check(void *arg)
+static void port_carrier_check(struct work_struct *work)
{
- struct net_device *dev = arg;
struct net_bridge_port *p;
+ struct net_device *dev;
struct net_bridge *br;
+ dev = container_of(work, struct net_bridge_port,
+ carrier_check.work)->dev;
+ work_release(work);
+
rtnl_lock();
p = dev->br_port;
if (!p)
@@ -276,7 +280,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
p->port_no = index;
br_init_port(p);
p->state = BR_STATE_DISABLED;
- INIT_WORK(&p->carrier_check, port_carrier_check, dev);
+ INIT_DELAYED_WORK_NAR(&p->carrier_check, port_carrier_check);
br_stp_port_timer_init(p);
kobject_init(&p->kobj);
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 4e4119a1213..4c61a7e0a86 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -58,12 +58,13 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf,
{
int num;
void *buf;
- size_t size = maxnum * sizeof(struct __fdb_entry);
+ size_t size;
- if (size > PAGE_SIZE) {
- size = PAGE_SIZE;
+ /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
+ if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
- }
+
+ size = maxnum * sizeof(struct __fdb_entry);
buf = kmalloc(size, GFP_USER);
if (!buf)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index ac181be13d8..ea3337ad0ed 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -34,13 +34,13 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
+#include <linux/inetdevice.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/route.h>
#include <asm/uaccess.h>
-#include <asm/checksum.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -61,9 +61,6 @@ static int brnf_filter_vlan_tagged __read_mostly = 1;
#define brnf_filter_vlan_tagged 1
#endif
-int brnf_deferred_hooks;
-EXPORT_SYMBOL_GPL(brnf_deferred_hooks);
-
static __be16 inline vlan_proto(const struct sk_buff *skb)
{
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -222,10 +219,14 @@ static void __br_dnat_complain(void)
*
* Otherwise, the packet is considered to be routed and we just
* change the destination MAC address so that the packet will
- * later be passed up to the IP stack to be routed.
+ * later be passed up to the IP stack to be routed. For a redirected
+ * packet, ip_route_input() will give back the localhost as output device,
+ * which differs from the bridge device.
*
* Let us now consider the case that ip_route_input() fails:
*
+ * This can be because the destination address is martian, in which case
+ * the packet will be dropped.
* After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
* will fail, while __ip_route_output_key() will return success. The source
* address for __ip_route_output_key() is set to zero, so __ip_route_output_key
@@ -238,7 +239,8 @@ static void __br_dnat_complain(void)
*
* --Lennert, 20020411
* --Bart, 20020416 (updated)
- * --Bart, 20021007 (updated) */
+ * --Bart, 20021007 (updated)
+ * --Bart, 20062711 (updated) */
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{
if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -265,15 +267,15 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct iphdr *iph = skb->nh.iph;
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ int err;
if (nf_bridge->mask & BRNF_PKT_TYPE) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
if (dnat_took_place(skb)) {
- if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev)) {
+ if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct rtable *rt;
struct flowi fl = {
.nl_u = {
@@ -284,19 +286,33 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
},
.proto = 0,
};
+ struct in_device *in_dev = in_dev_get(dev);
+
+ /* If err equals -EHOSTUNREACH the error is due to a
+ * martian destination or due to the fact that
+ * forwarding is disabled. For most martian packets,
+ * ip_route_output_key() will fail. It won't fail for 2 types of
+ * martian destinations: loopback destinations and destination
+ * 0.0.0.0. In both cases the packet will be dropped because the
+ * destination is the loopback device and not the bridge. */
+ if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
+ goto free_skb;
if (!ip_route_output_key(&rt, &fl)) {
/* - Bridged-and-DNAT'ed traffic doesn't
- * require ip_forwarding.
- * - Deal with redirected traffic. */
- if (((struct dst_entry *)rt)->dev == dev ||
- rt->rt_type == RTN_LOCAL) {
+ * require ip_forwarding. */
+ if (((struct dst_entry *)rt)->dev == dev) {
skb->dst = (struct dst_entry *)rt;
goto bridged_dnat;
}
+ /* we are sure that forwarding is disabled, so printing
+ * this message is no problem. Note that the packet could
+ * still have a martian destination address, in which case
+ * the packet could be dropped even if forwarding were enabled */
__br_dnat_complain();
dst_release((struct dst_entry *)rt);
}
+free_skb:
kfree_skb(skb);
return 0;
} else {
@@ -381,7 +397,7 @@ static int check_hbh_len(struct sk_buff *skb)
case IPV6_TLV_JUMBO:
if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2)
goto bad;
- pkt_len = ntohl(*(u32 *) (skb->nh.raw + off + 2));
+ pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2));
if (pkt_len <= IPV6_MAXPLEN ||
skb->nh.ipv6h->payload_len)
goto bad;
@@ -666,110 +682,50 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
return NF_STOLEN;
}
-/* PF_BRIDGE/LOCAL_OUT ***********************************************/
-static int br_nf_local_out_finish(struct sk_buff *skb)
-{
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
-
- NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
- br_forward_finish, NF_BR_PRI_FIRST + 1);
-
- return 0;
-}
-
-/* This function sees both locally originated IP packets and forwarded
+/* PF_BRIDGE/LOCAL_OUT ***********************************************
+ *
+ * This function sees both locally originated IP packets and forwarded
* IP packets (in both cases the destination device is a bridge
* device). It also sees bridged-and-DNAT'ed packets.
- * To be able to filter on the physical bridge devices (with the physdev
- * module), we steal packets destined to a bridge device away from the
- * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later,
- * when we have determined the real output device. This is done in here.
*
* If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
* and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
* will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
* NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
* will be executed.
- * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched
- * this packet before, and so the packet was locally originated. We fake
- * the PF_INET/LOCAL_OUT hook.
- * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed,
- * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure
- * even routed packets that didn't arrive on a bridge interface have their
- * nf_bridge->physindev set. */
+ */
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct net_device *realindev, *realoutdev;
+ struct net_device *realindev;
struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
- int pf;
if (!skb->nf_bridge)
return NF_ACCEPT;
- if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
- pf = PF_INET;
- else
- pf = PF_INET6;
-
nf_bridge = skb->nf_bridge;
- nf_bridge->physoutdev = skb->dev;
- realindev = nf_bridge->physindev;
+ if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
+ return NF_ACCEPT;
/* Bridged, take PF_BRIDGE/FORWARD.
* (see big note in front of br_nf_pre_routing_finish) */
- if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
- skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
- }
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge->physoutdev = skb->dev;
+ realindev = nf_bridge->physindev;
- NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
- skb->dev, br_forward_finish);
- goto out;
+ if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->mask ^= BRNF_PKT_TYPE;
}
- realoutdev = bridge_parent(skb->dev);
- if (!realoutdev)
- return NF_DROP;
-
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
- /* iptables should match -o br0.x */
- if (nf_bridge->netoutdev)
- realoutdev = nf_bridge->netoutdev;
-#endif
if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(skb, VLAN_HLEN);
- (*pskb)->nh.raw += VLAN_HLEN;
- }
- /* IP forwarded traffic has a physindev, locally
- * generated traffic hasn't. */
- if (realindev != NULL) {
- if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
- struct net_device *parent = bridge_parent(realindev);
- if (parent)
- realindev = parent;
- }
-
- NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
- realoutdev, br_nf_local_out_finish,
- NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
- } else {
- NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
- realoutdev, br_nf_local_out_finish,
- NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+ skb_push(skb, VLAN_HLEN);
+ skb->nh.raw -= VLAN_HLEN;
}
-out:
+ NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
+ br_forward_finish);
return NF_STOLEN;
}
@@ -875,69 +831,6 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
return NF_ACCEPT;
}
-/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT
- * and PF_INET(6)/POST_ROUTING until we have done the forwarding
- * decision in the bridge code and have determined nf_bridge->physoutdev. */
-static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct sk_buff *skb = *pskb;
-
- if ((out->hard_start_xmit == br_dev_xmit &&
- okfn != br_nf_forward_finish &&
- okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit)
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
- || ((out->priv_flags & IFF_802_1Q_VLAN) &&
- VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
-#endif
- ) {
- struct nf_bridge_info *nf_bridge;
-
- if (!skb->nf_bridge) {
-#ifdef CONFIG_SYSCTL
- /* This code is executed while in the IP(v6) stack,
- the version should be 4 or 6. We can't use
- skb->protocol because that isn't set on
- PF_INET(6)/LOCAL_OUT. */
- struct iphdr *ip = skb->nh.iph;
-
- if (ip->version == 4 && !brnf_call_iptables)
- return NF_ACCEPT;
- else if (ip->version == 6 && !brnf_call_ip6tables)
- return NF_ACCEPT;
- else if (!brnf_deferred_hooks)
- return NF_ACCEPT;
-#endif
- if (hook == NF_IP_POST_ROUTING)
- return NF_ACCEPT;
- if (!nf_bridge_alloc(skb))
- return NF_DROP;
- }
-
- nf_bridge = skb->nf_bridge;
-
- /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
- * will need the indev then. For a brouter, the real indev
- * can be a bridge port, so we make sure br_nf_local_out()
- * doesn't use the bridge parent of the indev by using
- * the BRNF_DONT_TAKE_PARENT mask. */
- if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) {
- nf_bridge->mask |= BRNF_DONT_TAKE_PARENT;
- nf_bridge->physindev = (struct net_device *)in;
- }
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
- /* the iptables outdev is br0.x, not br0 */
- if (out->priv_flags & IFF_802_1Q_VLAN)
- nf_bridge->netoutdev = (struct net_device *)out;
-#endif
- return NF_STOP;
- }
-
- return NF_ACCEPT;
-}
-
/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
* PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
@@ -983,36 +876,6 @@ static struct nf_hook_ops br_nf_ops[] = {
.pf = PF_INET6,
.hooknum = NF_IP6_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_FORWARD,
- .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_FORWARD,
- .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_LOCAL_OUT,
- .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_FIRST, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_POST_ROUTING,
- .priority = NF_IP6_PRI_FIRST, },
};
#ifdef CONFIG_SYSCTL
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8f661195d09..a9139682c49 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -15,6 +15,18 @@
#include <net/netlink.h>
#include "br_private.h"
+static inline size_t br_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+ + nla_total_size(1); /* IFLA_PROTINFO */
+}
+
/*
* Create one netlink message for one interface
* Contains port and master info as well as carrier and bridge state.
@@ -24,51 +36,43 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
{
const struct net_bridge *br = port->br;
const struct net_device *dev = port->dev;
- struct ifinfomsg *r;
+ struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- u32 mtu = dev->mtu;
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
- u8 portstate = port->state;
pr_debug("br_fill_info event %d port %s master %s\n",
event, dev->name, br->dev->name);
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
- r = NLMSG_DATA(nlh);
- r->ifi_family = AF_BRIDGE;
- r->__ifi_pad = 0;
- r->ifi_type = dev->type;
- r->ifi_index = dev->ifindex;
- r->ifi_flags = dev_get_flags(dev);
- r->ifi_change = 0;
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
- RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+ hdr = nlmsg_data(nlh);
+ hdr->ifi_family = AF_BRIDGE;
+ hdr->__ifi_pad = 0;
+ hdr->ifi_type = dev->type;
+ hdr->ifi_index = dev->ifindex;
+ hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_change = 0;
- RTA_PUT(skb, IFLA_MASTER, sizeof(int), &br->dev->ifindex);
+ NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+ NLA_PUT_U32(skb, IFLA_MASTER, br->dev->ifindex);
+ NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ NLA_PUT_U8(skb, IFLA_OPERSTATE, operstate);
if (dev->addr_len)
- RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
- RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
if (dev->ifindex != dev->iflink)
- RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
-
-
- RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
+ NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
if (event == RTM_NEWLINK)
- RTA_PUT(skb, IFLA_PROTINFO, sizeof(portstate), &portstate);
-
- nlh->nlmsg_len = skb->tail - b;
-
- return skb->len;
+ NLA_PUT_U8(skb, IFLA_PROTINFO, port->state);
-nlmsg_failure:
-rtattr_failure:
+ return nlmsg_end(skb, nlh);
- skb_trim(skb, b - skb->data);
- return -EINVAL;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
/*
@@ -77,19 +81,16 @@ rtattr_failure:
void br_ifinfo_notify(int event, struct net_bridge_port *port)
{
struct sk_buff *skb;
- int payload = sizeof(struct ifinfomsg) + 128;
int err = -ENOBUFS;
pr_debug("bridge notify event=%d\n", event);
- skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+ skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in br_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
errout:
@@ -104,25 +105,18 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net_device *dev;
int idx;
- int s_idx = cb->args[0];
- int err = 0;
read_lock(&dev_base_lock);
for (dev = dev_base, idx = 0; dev; dev = dev->next) {
- struct net_bridge_port *p = dev->br_port;
-
/* not a bridge port */
- if (!p)
- continue;
-
- if (idx < s_idx)
- goto cont;
+ if (dev->br_port == NULL || idx < cb->args[0])
+ goto skip;
- err = br_fill_ifinfo(skb, p, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
- if (err <= 0)
+ if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWLINK,
+ NLM_F_MULTI) < 0)
break;
-cont:
+skip:
++idx;
}
read_unlock(&dev_base_lock);
@@ -138,26 +132,27 @@ cont:
*/
static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
- struct ifinfomsg *ifm = NLMSG_DATA(nlh);
+ struct ifinfomsg *ifm;
+ struct nlattr *protinfo;
struct net_device *dev;
struct net_bridge_port *p;
u8 new_state;
+ if (nlmsg_len(nlh) < sizeof(*ifm))
+ return -EINVAL;
+
+ ifm = nlmsg_data(nlh);
if (ifm->ifi_family != AF_BRIDGE)
return -EPFNOSUPPORT;
- /* Must pass valid state as PROTINFO */
- if (rta[IFLA_PROTINFO-1]) {
- u8 *pstate = RTA_DATA(rta[IFLA_PROTINFO-1]);
- new_state = *pstate;
- } else
+ protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
+ if (!protinfo || nla_len(protinfo) < sizeof(u8))
return -EINVAL;
+ new_state = nla_get_u8(protinfo);
if (new_state > BR_STATE_BLOCKING)
return -EINVAL;
- /* Find bridge port */
dev = __dev_get_by_index(ifm->ifi_index);
if (!dev)
return -ENODEV;
@@ -170,10 +165,8 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (p->br->stp_enabled)
return -EBUSY;
- if (!netif_running(dev))
- return -ENETDOWN;
-
- if (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED)
+ if (!netif_running(dev) ||
+ (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED))
return -ENETDOWN;
p->state = new_state;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 74258d86f25..3a534e94c7f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -82,7 +82,7 @@ struct net_bridge_port
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
- struct work_struct carrier_check;
+ struct delayed_work carrier_check;
struct rcu_head rcu;
};
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 96bcb2ff59a..de9d1a9473f 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -376,7 +376,7 @@ int br_sysfs_addbr(struct net_device *dev)
err = sysfs_create_bin_file(brobj, &bridge_forward);
if (err) {
- pr_info("%s: can't create attribue file %s/%s\n",
+ pr_info("%s: can't create attribute file %s/%s\n",
__FUNCTION__, dev->name, bridge_forward.attr.name);
goto out2;
}
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index d42f63f5e9f..9abbc09ccdc 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -17,7 +17,7 @@ static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *
{
struct ebt_802_3_info *info = (struct ebt_802_3_info *)data;
struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
- uint16_t type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
+ __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
if (info->bitmask & EBT_802_3_SAP) {
if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP))
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index a614485828a..ce97c4285f9 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -15,7 +15,7 @@
#include <linux/module.h>
static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
- const char *mac, uint32_t ip)
+ const char *mac, __be32 ip)
{
/* You may be puzzled as to how this code works.
* Some tricks were used, refer to
@@ -70,7 +70,7 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash
return 0;
}
-static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr)
+static int get_ip_dst(const struct sk_buff *skb, __be32 *addr)
{
if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) {
struct iphdr _iph, *ih;
@@ -81,16 +81,16 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr)
*addr = ih->daddr;
} else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
struct arphdr _arph, *ah;
- uint32_t buf, *bp;
+ __be32 buf, *bp;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL ||
- ah->ar_pln != sizeof(uint32_t) ||
+ ah->ar_pln != sizeof(__be32) ||
ah->ar_hln != ETH_ALEN)
return -1;
bp = skb_header_pointer(skb, sizeof(struct arphdr) +
- 2 * ETH_ALEN + sizeof(uint32_t),
- sizeof(uint32_t), &buf);
+ 2 * ETH_ALEN + sizeof(__be32),
+ sizeof(__be32), &buf);
if (bp == NULL)
return -1;
*addr = *bp;
@@ -98,7 +98,7 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr)
return 0;
}
-static int get_ip_src(const struct sk_buff *skb, uint32_t *addr)
+static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
{
if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) {
struct iphdr _iph, *ih;
@@ -109,15 +109,15 @@ static int get_ip_src(const struct sk_buff *skb, uint32_t *addr)
*addr = ih->saddr;
} else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
struct arphdr _arph, *ah;
- uint32_t buf, *bp;
+ __be32 buf, *bp;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL ||
- ah->ar_pln != sizeof(uint32_t) ||
+ ah->ar_pln != sizeof(__be32) ||
ah->ar_hln != ETH_ALEN)
return -1;
bp = skb_header_pointer(skb, sizeof(struct arphdr) +
- ETH_ALEN, sizeof(uint32_t), &buf);
+ ETH_ALEN, sizeof(__be32), &buf);
if (bp == NULL)
return -1;
*addr = *bp;
@@ -133,7 +133,7 @@ static int ebt_filter_among(const struct sk_buff *skb,
struct ebt_among_info *info = (struct ebt_among_info *) data;
const char *dmac, *smac;
const struct ebt_mac_wormhash *wh_dst, *wh_src;
- uint32_t dip = 0, sip = 0;
+ __be32 dip = 0, sip = 0;
wh_dst = ebt_among_wh_dst(info);
wh_src = ebt_among_wh_src(info);
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index a6c81d9f73b..9c599800a90 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -35,10 +35,10 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
return EBT_NOMATCH;
if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
- uint32_t _addr, *ap;
+ __be32 _addr, *ap;
/* IPv4 addresses are always 4 bytes */
- if (ah->ar_pln != sizeof(uint32_t))
+ if (ah->ar_pln != sizeof(__be32))
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_SRC_IP) {
ap = skb_header_pointer(skb, sizeof(struct arphdr) +
@@ -53,7 +53,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
if (info->bitmask & EBT_ARP_DST_IP) {
ap = skb_header_pointer(skb, sizeof(struct arphdr) +
- 2*ah->ar_hln+sizeof(uint32_t),
+ 2*ah->ar_hln+sizeof(__be32),
sizeof(_addr), &_addr);
if (ap == NULL)
return EBT_NOMATCH;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 65b665ce57b..e4c642448e1 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -20,8 +20,8 @@
#include <linux/module.h>
struct tcpudphdr {
- uint16_t src;
- uint16_t dst;
+ __be16 src;
+ __be16 dst;
};
static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 466ed3440b7..a184f879f25 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -38,8 +38,8 @@ static int ebt_log_check(const char *tablename, unsigned int hookmask,
struct tcpudphdr
{
- uint16_t src;
- uint16_t dst;
+ __be16 src;
+ __be16 dst;
};
struct arppayload
@@ -130,7 +130,7 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
* then log the ARP payload */
if (ah->ar_hrd == htons(1) &&
ah->ar_hln == ETH_ALEN &&
- ah->ar_pln == sizeof(uint32_t)) {
+ ah->ar_pln == sizeof(__be32)) {
struct arppayload _arpp, *ap;
ap = skb_header_pointer(skb, sizeof(_arph),
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index b54306a934e..62d23c7b25e 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -25,15 +25,15 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
int action = info->target & -16;
if (action == MARK_SET_VALUE)
- (*pskb)->nfmark = info->mark;
+ (*pskb)->mark = info->mark;
else if (action == MARK_OR_VALUE)
- (*pskb)->nfmark |= info->mark;
+ (*pskb)->mark |= info->mark;
else if (action == MARK_AND_VALUE)
- (*pskb)->nfmark &= info->mark;
+ (*pskb)->mark &= info->mark;
else
- (*pskb)->nfmark ^= info->mark;
+ (*pskb)->mark ^= info->mark;
- return info->target | -16;
+ return info->target | ~EBT_VERDICT_BITS;
}
static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
@@ -44,13 +44,13 @@ static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
return -EINVAL;
- tmp = info->target | -16;
+ tmp = info->target | ~EBT_VERDICT_BITS;
if (BASE_CHAIN && tmp == EBT_RETURN)
return -EINVAL;
CLEAR_BASE_CHAIN_BIT;
if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
return -EINVAL;
- tmp = info->target & -16;
+ tmp = info->target & ~EBT_VERDICT_BITS;
if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
return -EINVAL;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index a6413e4b498..025869ee0b6 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -19,8 +19,8 @@ static int ebt_filter_mark(const struct sk_buff *skb,
struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data;
if (info->bitmask & EBT_MARK_OR)
- return !(!!(skb->nfmark & info->mask) ^ info->invert);
- return !(((skb->nfmark & info->mask) == info->mark) ^ info->invert);
+ return !(!!(skb->mark & info->mask) ^ info->invert);
+ return !(((skb->mark & info->mask) == info->mark) ^ info->invert);
}
static int ebt_mark_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index cbb33e24ca8..a50722182bf 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -12,6 +12,8 @@
#include <linux/netfilter_bridge/ebt_nat.h>
#include <linux/module.h>
#include <net/sock.h>
+#include <linux/if_arp.h>
+#include <net/arp.h>
static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
@@ -31,24 +33,43 @@ static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
*pskb = nskb;
}
memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN);
- return info->target;
+ if (!(info->target & NAT_ARP_BIT) &&
+ eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) {
+ struct arphdr _ah, *ap;
+
+ ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah);
+ if (ap == NULL)
+ return EBT_DROP;
+ if (ap->ar_hln != ETH_ALEN)
+ goto out;
+ if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN))
+ return EBT_DROP;
+ }
+out:
+ return info->target | ~EBT_VERDICT_BITS;
}
static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
const struct ebt_entry *e, void *data, unsigned int datalen)
{
struct ebt_nat_info *info = (struct ebt_nat_info *) data;
+ int tmp;
if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info)))
return -EINVAL;
- if (BASE_CHAIN && info->target == EBT_RETURN)
+ tmp = info->target | ~EBT_VERDICT_BITS;
+ if (BASE_CHAIN && tmp == EBT_RETURN)
return -EINVAL;
CLEAR_BASE_CHAIN_BIT;
if (strcmp(tablename, "nat"))
return -EINVAL;
if (hookmask & ~(1 << NF_BR_POST_ROUTING))
return -EINVAL;
- if (INVALID_TARGET)
+
+ if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+ return -EINVAL;
+ tmp = info->target | EBT_VERDICT_BITS;
+ if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
return -EINVAL;
return 0;
}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 9f950db3b76..c1af68b5a29 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -168,7 +168,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
if (ub->qlen == 1)
skb_set_timestamp(ub->skb, &pm->stamp);
pm->data_len = copy_len;
- pm->mark = skb->nfmark;
+ pm->mark = skb->mark;
pm->hook = hooknr;
if (uloginfo->prefix != NULL)
strcpy(pm->prefix, uloginfo->prefix);
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index a2b452862b7..7ee37762296 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -55,7 +55,7 @@ ebt_filter_vlan(const struct sk_buff *skb,
unsigned short id; /* VLAN ID, given from frame TCI */
unsigned char prio; /* user_priority, given from frame TCI */
/* VLAN encapsulated Type/Length field, given from orig frame */
- unsigned short encap;
+ __be16 encap;
fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
if (fp == NULL)
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 9a6e548e148..d37ce047893 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -23,7 +23,7 @@ static struct ebt_entries initial_chain = {
.policy = EBT_ACCEPT,
};
-static struct ebt_replace initial_table =
+static struct ebt_replace_kernel initial_table =
{
.name = "broute",
.valid_hooks = 1 << NF_BR_BROUTING,
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 3d5bd44f239..127135ead2d 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -30,7 +30,7 @@ static struct ebt_entries initial_chains[] =
},
};
-static struct ebt_replace initial_table =
+static struct ebt_replace_kernel initial_table =
{
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 04dd42efda1..9c50488b62e 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -30,7 +30,7 @@ static struct ebt_entries initial_chains[] =
}
};
-static struct ebt_replace initial_table =
+static struct ebt_replace_kernel initial_table =
{
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3df55b2bd91..bee558a4180 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -86,7 +86,7 @@ static inline int ebt_do_match (struct ebt_entry_match *m,
static inline int ebt_dev_check(char *entry, const struct net_device *device)
{
int i = 0;
- char *devname = device->name;
+ const char *devname = device->name;
if (*entry == '\0')
return 0;
@@ -338,10 +338,11 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
const char *name, unsigned int hookmask, unsigned int *cnt)
{
struct ebt_match *match;
+ size_t left = ((char *)e + e->watchers_offset) - (char *)m;
int ret;
- if (((char *)m) + m->match_size + sizeof(struct ebt_entry_match) >
- ((char *)e) + e->watchers_offset)
+ if (left < sizeof(struct ebt_entry_match) ||
+ left - sizeof(struct ebt_entry_match) < m->match_size)
return -EINVAL;
match = find_match_lock(m->u.name, &ret, &ebt_mutex);
if (!match)
@@ -367,10 +368,11 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
const char *name, unsigned int hookmask, unsigned int *cnt)
{
struct ebt_watcher *watcher;
+ size_t left = ((char *)e + e->target_offset) - (char *)w;
int ret;
- if (((char *)w) + w->watcher_size + sizeof(struct ebt_entry_watcher) >
- ((char *)e) + e->target_offset)
+ if (left < sizeof(struct ebt_entry_watcher) ||
+ left - sizeof(struct ebt_entry_watcher) < w->watcher_size)
return -EINVAL;
watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex);
if (!watcher)
@@ -391,35 +393,91 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
return 0;
}
+static int ebt_verify_pointers(struct ebt_replace *repl,
+ struct ebt_table_info *newinfo)
+{
+ unsigned int limit = repl->entries_size;
+ unsigned int valid_hooks = repl->valid_hooks;
+ unsigned int offset = 0;
+ int i;
+
+ for (i = 0; i < NF_BR_NUMHOOKS; i++)
+ newinfo->hook_entry[i] = NULL;
+
+ newinfo->entries_size = repl->entries_size;
+ newinfo->nentries = repl->nentries;
+
+ while (offset < limit) {
+ size_t left = limit - offset;
+ struct ebt_entry *e = (void *)newinfo->entries + offset;
+
+ if (left < sizeof(unsigned int))
+ break;
+
+ for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+ if ((valid_hooks & (1 << i)) == 0)
+ continue;
+ if ((char __user *)repl->hook_entry[i] ==
+ repl->entries + offset)
+ break;
+ }
+
+ if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
+ if (e->bitmask != 0) {
+ /* we make userspace set this right,
+ so there is no misunderstanding */
+ BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
+ "in distinguisher\n");
+ return -EINVAL;
+ }
+ if (i != NF_BR_NUMHOOKS)
+ newinfo->hook_entry[i] = (struct ebt_entries *)e;
+ if (left < sizeof(struct ebt_entries))
+ break;
+ offset += sizeof(struct ebt_entries);
+ } else {
+ if (left < sizeof(struct ebt_entry))
+ break;
+ if (left < e->next_offset)
+ break;
+ offset += e->next_offset;
+ }
+ }
+ if (offset != limit) {
+ BUGPRINT("entries_size too small\n");
+ return -EINVAL;
+ }
+
+ /* check if all valid hooks have a chain */
+ for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+ if (!newinfo->hook_entry[i] &&
+ (valid_hooks & (1 << i))) {
+ BUGPRINT("Valid hook without chain\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
/*
* this one is very careful, as it is the first function
* to parse the userspace data
*/
static inline int
ebt_check_entry_size_and_hooks(struct ebt_entry *e,
- struct ebt_table_info *newinfo, char *base, char *limit,
- struct ebt_entries **hook_entries, unsigned int *n, unsigned int *cnt,
- unsigned int *totalcnt, unsigned int *udc_cnt, unsigned int valid_hooks)
+ struct ebt_table_info *newinfo,
+ unsigned int *n, unsigned int *cnt,
+ unsigned int *totalcnt, unsigned int *udc_cnt)
{
int i;
for (i = 0; i < NF_BR_NUMHOOKS; i++) {
- if ((valid_hooks & (1 << i)) == 0)
- continue;
- if ( (char *)hook_entries[i] - base ==
- (char *)e - newinfo->entries)
+ if ((void *)e == (void *)newinfo->hook_entry[i])
break;
}
/* beginning of a new chain
if i == NF_BR_NUMHOOKS it must be a user defined chain */
- if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
- if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) != 0) {
- /* we make userspace set this right,
- so there is no misunderstanding */
- BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
- "in distinguisher\n");
- return -EINVAL;
- }
+ if (i != NF_BR_NUMHOOKS || !e->bitmask) {
/* this checks if the previous chain has as many entries
as it said it has */
if (*n != *cnt) {
@@ -427,12 +485,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e,
"in the chain\n");
return -EINVAL;
}
- /* before we look at the struct, be sure it is not too big */
- if ((char *)hook_entries[i] + sizeof(struct ebt_entries)
- > limit) {
- BUGPRINT("entries_size too small\n");
- return -EINVAL;
- }
if (((struct ebt_entries *)e)->policy != EBT_DROP &&
((struct ebt_entries *)e)->policy != EBT_ACCEPT) {
/* only RETURN from udc */
@@ -444,8 +496,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e,
}
if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */
(*udc_cnt)++;
- else
- newinfo->hook_entry[i] = (struct ebt_entries *)e;
if (((struct ebt_entries *)e)->counter_offset != *totalcnt) {
BUGPRINT("counter_offset != totalcnt");
return -EINVAL;
@@ -466,7 +516,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e,
BUGPRINT("target size too small\n");
return -EINVAL;
}
-
(*cnt)++;
(*totalcnt)++;
return 0;
@@ -485,17 +534,14 @@ struct ebt_cl_stack
*/
static inline int
ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
- struct ebt_entries **hook_entries, unsigned int *n, unsigned int valid_hooks,
- struct ebt_cl_stack *udc)
+ unsigned int *n, struct ebt_cl_stack *udc)
{
int i;
/* we're only interested in chain starts */
- if (e->bitmask & EBT_ENTRY_OR_ENTRIES)
+ if (e->bitmask)
return 0;
for (i = 0; i < NF_BR_NUMHOOKS; i++) {
- if ((valid_hooks & (1 << i)) == 0)
- continue;
if (newinfo->hook_entry[i] == (struct ebt_entries *)e)
break;
}
@@ -541,7 +587,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
{
struct ebt_entry_target *t;
- if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0)
+ if (e->bitmask == 0)
return 0;
/* we're done */
if (cnt && (*cnt)-- == 0)
@@ -558,16 +604,17 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
static inline int
ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
- const char *name, unsigned int *cnt, unsigned int valid_hooks,
+ const char *name, unsigned int *cnt,
struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
{
struct ebt_entry_target *t;
struct ebt_target *target;
unsigned int i, j, hook = 0, hookmask = 0;
+ size_t gap = e->next_offset - e->target_offset;
int ret;
/* don't mess with the struct ebt_entries */
- if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0)
+ if (e->bitmask == 0)
return 0;
if (e->bitmask & ~EBT_F_MASK) {
@@ -584,7 +631,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
}
/* what hook do we belong to? */
for (i = 0; i < NF_BR_NUMHOOKS; i++) {
- if ((valid_hooks & (1 << i)) == 0)
+ if (!newinfo->hook_entry[i])
continue;
if ((char *)newinfo->hook_entry[i] < (char *)e)
hook = i;
@@ -625,8 +672,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
t->u.target = target;
if (t->u.target == &ebt_standard_target) {
- if (e->target_offset + sizeof(struct ebt_standard_target) >
- e->next_offset) {
+ if (gap < sizeof(struct ebt_standard_target)) {
BUGPRINT("Standard target size too big\n");
ret = -EFAULT;
goto cleanup_watchers;
@@ -637,8 +683,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
ret = -EFAULT;
goto cleanup_watchers;
}
- } else if ((e->target_offset + t->target_size +
- sizeof(struct ebt_entry_target) > e->next_offset) ||
+ } else if (t->target_size > gap - sizeof(struct ebt_entry_target) ||
(t->u.target->check &&
t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){
module_put(t->u.target->me);
@@ -708,7 +753,9 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
BUGPRINT("loop\n");
return -1;
}
- /* this can't be 0, so the above test is correct */
+ if (cl_s[i].hookmask & (1 << hooknr))
+ goto letscontinue;
+ /* this can't be 0, so the loop test is correct */
cl_s[i].cs.n = pos + 1;
pos = 0;
cl_s[i].cs.e = ((void *)e + e->next_offset);
@@ -728,42 +775,35 @@ letscontinue:
}
/* do the parsing of the table/chains/entries/matches/watchers/targets, heh */
-static int translate_table(struct ebt_replace *repl,
- struct ebt_table_info *newinfo)
+static int translate_table(char *name, struct ebt_table_info *newinfo)
{
unsigned int i, j, k, udc_cnt;
int ret;
struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */
i = 0;
- while (i < NF_BR_NUMHOOKS && !(repl->valid_hooks & (1 << i)))
+ while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i])
i++;
if (i == NF_BR_NUMHOOKS) {
BUGPRINT("No valid hooks specified\n");
return -EINVAL;
}
- if (repl->hook_entry[i] != (struct ebt_entries *)repl->entries) {
+ if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) {
BUGPRINT("Chains don't start at beginning\n");
return -EINVAL;
}
/* make sure chains are ordered after each other in same order
as their corresponding hooks */
for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
- if (!(repl->valid_hooks & (1 << j)))
+ if (!newinfo->hook_entry[j])
continue;
- if ( repl->hook_entry[j] <= repl->hook_entry[i] ) {
+ if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) {
BUGPRINT("Hook order must be followed\n");
return -EINVAL;
}
i = j;
}
- for (i = 0; i < NF_BR_NUMHOOKS; i++)
- newinfo->hook_entry[i] = NULL;
-
- newinfo->entries_size = repl->entries_size;
- newinfo->nentries = repl->nentries;
-
/* do some early checkings and initialize some things */
i = 0; /* holds the expected nr. of entries for the chain */
j = 0; /* holds the up to now counted entries for the chain */
@@ -771,9 +811,8 @@ static int translate_table(struct ebt_replace *repl,
newinfo->nentries afterwards */
udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */
ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
- ebt_check_entry_size_and_hooks, newinfo, repl->entries,
- repl->entries + repl->entries_size, repl->hook_entry, &i, &j, &k,
- &udc_cnt, repl->valid_hooks);
+ ebt_check_entry_size_and_hooks, newinfo,
+ &i, &j, &k, &udc_cnt);
if (ret != 0)
return ret;
@@ -788,15 +827,6 @@ static int translate_table(struct ebt_replace *repl,
return -EINVAL;
}
- /* check if all valid hooks have a chain */
- for (i = 0; i < NF_BR_NUMHOOKS; i++) {
- if (newinfo->hook_entry[i] == NULL &&
- (repl->valid_hooks & (1 << i))) {
- BUGPRINT("Valid hook without chain\n");
- return -EINVAL;
- }
- }
-
/* get the location of the udc, put them in an array
while we're at it, allocate the chainstack */
if (udc_cnt) {
@@ -824,8 +854,7 @@ static int translate_table(struct ebt_replace *repl,
return -ENOMEM;
i = 0; /* the i'th udc */
EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
- ebt_get_udc_positions, newinfo, repl->hook_entry, &i,
- repl->valid_hooks, cl_s);
+ ebt_get_udc_positions, newinfo, &i, cl_s);
/* sanity check */
if (i != udc_cnt) {
BUGPRINT("i != udc_cnt\n");
@@ -836,7 +865,7 @@ static int translate_table(struct ebt_replace *repl,
/* Check for loops */
for (i = 0; i < NF_BR_NUMHOOKS; i++)
- if (repl->valid_hooks & (1 << i))
+ if (newinfo->hook_entry[i])
if (check_chainloops(newinfo->hook_entry[i],
cl_s, udc_cnt, i, newinfo->entries)) {
vfree(cl_s);
@@ -856,8 +885,7 @@ static int translate_table(struct ebt_replace *repl,
/* used to know what we need to clean up if something goes wrong */
i = 0;
ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
- ebt_check_entry, newinfo, repl->name, &i, repl->valid_hooks,
- cl_s, udc_cnt);
+ ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt);
if (ret != 0) {
EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
ebt_cleanup_entry, &i);
@@ -954,7 +982,11 @@ static int do_replace(void __user *user, unsigned int len)
/* this can get initialized by translate_table() */
newinfo->chainstack = NULL;
- ret = translate_table(&tmp, newinfo);
+ ret = ebt_verify_pointers(&tmp, newinfo);
+ if (ret != 0)
+ goto free_counterstmp;
+
+ ret = translate_table(tmp.name, newinfo);
if (ret != 0)
goto free_counterstmp;
@@ -1125,35 +1157,47 @@ int ebt_register_table(struct ebt_table *table)
{
struct ebt_table_info *newinfo;
struct ebt_table *t;
+ struct ebt_replace_kernel *repl;
int ret, i, countersize;
+ void *p;
- if (!table || !table->table ||!table->table->entries ||
- table->table->entries_size == 0 ||
- table->table->counters || table->private) {
+ if (!table || !(repl = table->table) || !repl->entries ||
+ repl->entries_size == 0 ||
+ repl->counters || table->private) {
BUGPRINT("Bad table data for ebt_register_table!!!\n");
return -EINVAL;
}
- countersize = COUNTER_OFFSET(table->table->nentries) *
+ countersize = COUNTER_OFFSET(repl->nentries) *
(highest_possible_processor_id()+1);
newinfo = vmalloc(sizeof(*newinfo) + countersize);
ret = -ENOMEM;
if (!newinfo)
return -ENOMEM;
- newinfo->entries = vmalloc(table->table->entries_size);
- if (!(newinfo->entries))
+ p = vmalloc(repl->entries_size);
+ if (!p)
goto free_newinfo;
- memcpy(newinfo->entries, table->table->entries,
- table->table->entries_size);
+ memcpy(p, repl->entries, repl->entries_size);
+ newinfo->entries = p;
+
+ newinfo->entries_size = repl->entries_size;
+ newinfo->nentries = repl->nentries;
if (countersize)
memset(newinfo->counters, 0, countersize);
/* fill in newinfo and parse the entries */
newinfo->chainstack = NULL;
- ret = translate_table(table->table, newinfo);
+ for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+ if ((repl->valid_hooks & (1 << i)) == 0)
+ newinfo->hook_entry[i] = NULL;
+ else
+ newinfo->hook_entry[i] = p +
+ ((char *)repl->hook_entry[i] - repl->entries);
+ }
+ ret = translate_table(repl->name, newinfo);
if (ret != 0) {
BUGPRINT("Translate_table failed\n");
goto free_chainstack;
@@ -1277,33 +1321,33 @@ free_tmp:
}
static inline int ebt_make_matchname(struct ebt_entry_match *m,
- char *base, char *ubase)
+ char *base, char __user *ubase)
{
- char *hlp = ubase - base + (char *)m;
+ char __user *hlp = ubase + ((char *)m - base);
if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
}
static inline int ebt_make_watchername(struct ebt_entry_watcher *w,
- char *base, char *ubase)
+ char *base, char __user *ubase)
{
- char *hlp = ubase - base + (char *)w;
+ char __user *hlp = ubase + ((char *)w - base);
if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
}
-static inline int ebt_make_names(struct ebt_entry *e, char *base, char *ubase)
+static inline int ebt_make_names(struct ebt_entry *e, char *base, char __user *ubase)
{
int ret;
- char *hlp;
+ char __user *hlp;
struct ebt_entry_target *t;
- if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0)
+ if (e->bitmask == 0)
return 0;
- hlp = ubase - base + (char *)e + e->target_offset;
+ hlp = ubase + (((char *)e + e->target_offset) - base);
t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase);
diff --git a/net/core/Makefile b/net/core/Makefile
index 119568077da..73272d506e9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
-obj-$(CONFIG_NET_DIVERT) += dv.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_WIRELESS_EXT) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f558c61aecc..797fdd4352c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -321,7 +321,7 @@ fault:
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
u8 __user *to, int len,
- unsigned int *csump)
+ __wsum *csump)
{
int start = skb_headlen(skb);
int pos = 0;
@@ -350,7 +350,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
int err = 0;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -386,7 +386,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
end = start + list->len;
if ((copy = end - offset) > 0) {
- unsigned int csum2 = 0;
+ __wsum csum2 = 0;
if (copy > len)
copy = len;
if (skb_copy_and_csum_datagram(list,
@@ -411,11 +411,11 @@ fault:
return -EFAULT;
}
-unsigned int __skb_checksum_complete(struct sk_buff *skb)
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
- unsigned int sum;
+ __sum16 sum;
- sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+ sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
@@ -441,7 +441,7 @@ EXPORT_SYMBOL(__skb_checksum_complete);
int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
int hlen, struct iovec *iov)
{
- unsigned int csum;
+ __wsum csum;
int chunk = skb->len - hlen;
/* Skip filled elements.
@@ -460,7 +460,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
chunk, &csum))
goto fault;
- if ((unsigned short)csum_fold(csum))
+ if (csum_fold(csum))
goto csum_error;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 81c426adcd1..e660cb57e42 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -98,7 +98,6 @@
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/if_bridge.h>
-#include <linux/divert.h>
#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
@@ -1170,7 +1169,7 @@ EXPORT_SYMBOL(netif_device_attach);
*/
int skb_checksum_help(struct sk_buff *skb)
{
- unsigned int csum;
+ __wsum csum;
int ret = 0, offset = skb->h.raw - skb->data;
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -1192,9 +1191,9 @@ int skb_checksum_help(struct sk_buff *skb)
offset = skb->tail - skb->h.raw;
BUG_ON(offset <= 0);
- BUG_ON(skb->csum + 2 > offset);
+ BUG_ON(skb->csum_offset + 2 > offset);
- *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
+ *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
@@ -1216,7 +1215,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
- int type = skb->protocol;
+ __be16 type = skb->protocol;
int err;
BUG_ON(skb_shinfo(skb)->frag_list);
@@ -1767,7 +1766,7 @@ int netif_receive_skb(struct sk_buff *skb)
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
- unsigned short type;
+ __be16 type;
/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
@@ -1827,8 +1826,6 @@ int netif_receive_skb(struct sk_buff *skb)
ncls:
#endif
- handle_diverter(skb);
-
if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
@@ -2898,10 +2895,6 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->ingress_lock);
#endif
- ret = alloc_divert_blk(dev);
- if (ret)
- goto out;
-
dev->iflink = -1;
/* Init, if this function is available */
@@ -2910,13 +2903,13 @@ int register_netdevice(struct net_device *dev)
if (ret) {
if (ret > 0)
ret = -EIO;
- goto out_err;
+ goto out;
}
}
if (!dev_valid_name(dev->name)) {
ret = -EINVAL;
- goto out_err;
+ goto out;
}
dev->ifindex = dev_new_index();
@@ -2930,7 +2923,7 @@ int register_netdevice(struct net_device *dev)
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
ret = -EEXIST;
- goto out_err;
+ goto out;
}
}
@@ -2974,7 +2967,7 @@ int register_netdevice(struct net_device *dev)
ret = netdev_register_sysfs(dev);
if (ret)
- goto out_err;
+ goto out;
dev->reg_state = NETREG_REGISTERED;
/*
@@ -3001,9 +2994,6 @@ int register_netdevice(struct net_device *dev)
out:
return ret;
-out_err:
- free_divert_blk(dev);
- goto out;
}
/**
@@ -3035,15 +3025,6 @@ int register_netdev(struct net_device *dev)
goto out;
}
- /*
- * Back compatibility hook. Kill this one in 2.5
- */
- if (dev->name[0] == 0 || dev->name[0] == ' ') {
- err = dev_alloc_name(dev, "eth%d");
- if (err < 0)
- goto out;
- }
-
err = register_netdevice(dev);
out:
rtnl_unlock();
@@ -3329,8 +3310,6 @@ int unregister_netdevice(struct net_device *dev)
/* Notifier chain MUST detach us from master device. */
BUG_TRAP(!dev->master);
- free_divert_blk(dev);
-
/* Finish processing unregister after unlock */
net_set_todo(dev);
@@ -3361,7 +3340,6 @@ void unregister_netdev(struct net_device *dev)
EXPORT_SYMBOL(unregister_netdev);
-#ifdef CONFIG_HOTPLUG_CPU
static int dev_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *ocpu)
@@ -3405,7 +3383,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_NET_DMA
/**
diff --git a/net/core/dst.c b/net/core/dst.c
index 1a5e49da0e7..836ec660692 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -125,7 +125,7 @@ void * dst_alloc(struct dst_ops * ops)
if (ops->gc())
return NULL;
}
- dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
+ dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
return NULL;
memset(dst, 0, ops->entry_size);
diff --git a/net/core/dv.c b/net/core/dv.c
deleted file mode 100644
index 29ee77f1593..00000000000
--- a/net/core/dv.c
+++ /dev/null
@@ -1,546 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * Generic frame diversion
- *
- * Authors:
- * Benoit LOCHER: initial integration within the kernel with support for ethernet
- * Dave Miller: improvement on the code (correctness, performance and source files)
- *
- */
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <net/dst.h>
-#include <net/arp.h>
-#include <net/sock.h>
-#include <net/ipv6.h>
-#include <net/ip.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/checksum.h>
-#include <linux/divert.h>
-#include <linux/sockios.h>
-
-const char sysctl_divert_version[32]="0.46"; /* Current version */
-
-static int __init dv_init(void)
-{
- return 0;
-}
-module_init(dv_init);
-
-/*
- * Allocate a divert_blk for a device. This must be an ethernet nic.
- */
-int alloc_divert_blk(struct net_device *dev)
-{
- int alloc_size = (sizeof(struct divert_blk) + 3) & ~3;
-
- dev->divert = NULL;
- if (dev->type == ARPHRD_ETHER) {
- dev->divert = kzalloc(alloc_size, GFP_KERNEL);
- if (dev->divert == NULL) {
- printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n",
- dev->name);
- return -ENOMEM;
- }
- dev_hold(dev);
- }
-
- return 0;
-}
-
-/*
- * Free a divert_blk allocated by the above function, if it was
- * allocated on that device.
- */
-void free_divert_blk(struct net_device *dev)
-{
- if (dev->divert) {
- kfree(dev->divert);
- dev->divert=NULL;
- dev_put(dev);
- }
-}
-
-/*
- * Adds a tcp/udp (source or dest) port to an array
- */
-static int add_port(u16 ports[], u16 port)
-{
- int i;
-
- if (port == 0)
- return -EINVAL;
-
- /* Storing directly in network format for performance,
- * thanks Dave :)
- */
- port = htons(port);
-
- for (i = 0; i < MAX_DIVERT_PORTS; i++) {
- if (ports[i] == port)
- return -EALREADY;
- }
-
- for (i = 0; i < MAX_DIVERT_PORTS; i++) {
- if (ports[i] == 0) {
- ports[i] = port;
- return 0;
- }
- }
-
- return -ENOBUFS;
-}
-
-/*
- * Removes a port from an array tcp/udp (source or dest)
- */
-static int remove_port(u16 ports[], u16 port)
-{
- int i;
-
- if (port == 0)
- return -EINVAL;
-
- /* Storing directly in network format for performance,
- * thanks Dave !
- */
- port = htons(port);
-
- for (i = 0; i < MAX_DIVERT_PORTS; i++) {
- if (ports[i] == port) {
- ports[i] = 0;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
-/* Some basic sanity checks on the arguments passed to divert_ioctl() */
-static int check_args(struct divert_cf *div_cf, struct net_device **dev)
-{
- char devname[32];
- int ret;
-
- if (dev == NULL)
- return -EFAULT;
-
- /* GETVERSION: all other args are unused */
- if (div_cf->cmd == DIVCMD_GETVERSION)
- return 0;
-
- /* Network device index should reasonably be between 0 and 1000 :) */
- if (div_cf->dev_index < 0 || div_cf->dev_index > 1000)
- return -EINVAL;
-
- /* Let's try to find the ifname */
- sprintf(devname, "eth%d", div_cf->dev_index);
- *dev = dev_get_by_name(devname);
-
- /* dev should NOT be null */
- if (*dev == NULL)
- return -EINVAL;
-
- ret = 0;
-
- /* user issuing the ioctl must be a super one :) */
- if (!capable(CAP_SYS_ADMIN)) {
- ret = -EPERM;
- goto out;
- }
-
- /* Device must have a divert_blk member NOT null */
- if ((*dev)->divert == NULL)
- ret = -EINVAL;
-out:
- dev_put(*dev);
- return ret;
-}
-
-/*
- * control function of the diverter
- */
-#if 0
-#define DVDBG(a) \
- printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a))
-#else
-#define DVDBG(a)
-#endif
-
-int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg)
-{
- struct divert_cf div_cf;
- struct divert_blk *div_blk;
- struct net_device *dev;
- int ret;
-
- switch (cmd) {
- case SIOCGIFDIVERT:
- DVDBG("SIOCGIFDIVERT, copy_from_user");
- if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
- return -EFAULT;
- DVDBG("before check_args");
- ret = check_args(&div_cf, &dev);
- if (ret)
- return ret;
- DVDBG("after checkargs");
- div_blk = dev->divert;
-
- DVDBG("befre switch()");
- switch (div_cf.cmd) {
- case DIVCMD_GETSTATUS:
- /* Now, just give the user the raw divert block
- * for him to play with :)
- */
- if (copy_to_user(div_cf.arg1.ptr, dev->divert,
- sizeof(struct divert_blk)))
- return -EFAULT;
- break;
-
- case DIVCMD_GETVERSION:
- DVDBG("GETVERSION: checking ptr");
- if (div_cf.arg1.ptr == NULL)
- return -EINVAL;
- DVDBG("GETVERSION: copying data to userland");
- if (copy_to_user(div_cf.arg1.ptr,
- sysctl_divert_version, 32))
- return -EFAULT;
- DVDBG("GETVERSION: data copied");
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case SIOCSIFDIVERT:
- if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
- return -EFAULT;
-
- ret = check_args(&div_cf, &dev);
- if (ret)
- return ret;
-
- div_blk = dev->divert;
-
- switch(div_cf.cmd) {
- case DIVCMD_RESET:
- div_blk->divert = 0;
- div_blk->protos = DIVERT_PROTO_NONE;
- memset(div_blk->tcp_dst, 0,
- MAX_DIVERT_PORTS * sizeof(u16));
- memset(div_blk->tcp_src, 0,
- MAX_DIVERT_PORTS * sizeof(u16));
- memset(div_blk->udp_dst, 0,
- MAX_DIVERT_PORTS * sizeof(u16));
- memset(div_blk->udp_src, 0,
- MAX_DIVERT_PORTS * sizeof(u16));
- return 0;
-
- case DIVCMD_DIVERT:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ENABLE:
- if (div_blk->divert)
- return -EALREADY;
- div_blk->divert = 1;
- break;
-
- case DIVARG1_DISABLE:
- if (!div_blk->divert)
- return -EALREADY;
- div_blk->divert = 0;
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_IP:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ENABLE:
- if (div_blk->protos & DIVERT_PROTO_IP)
- return -EALREADY;
- div_blk->protos |= DIVERT_PROTO_IP;
- break;
-
- case DIVARG1_DISABLE:
- if (!(div_blk->protos & DIVERT_PROTO_IP))
- return -EALREADY;
- div_blk->protos &= ~DIVERT_PROTO_IP;
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_TCP:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ENABLE:
- if (div_blk->protos & DIVERT_PROTO_TCP)
- return -EALREADY;
- div_blk->protos |= DIVERT_PROTO_TCP;
- break;
-
- case DIVARG1_DISABLE:
- if (!(div_blk->protos & DIVERT_PROTO_TCP))
- return -EALREADY;
- div_blk->protos &= ~DIVERT_PROTO_TCP;
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_TCPDST:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ADD:
- return add_port(div_blk->tcp_dst,
- div_cf.arg2.uint16);
-
- case DIVARG1_REMOVE:
- return remove_port(div_blk->tcp_dst,
- div_cf.arg2.uint16);
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_TCPSRC:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ADD:
- return add_port(div_blk->tcp_src,
- div_cf.arg2.uint16);
-
- case DIVARG1_REMOVE:
- return remove_port(div_blk->tcp_src,
- div_cf.arg2.uint16);
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_UDP:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ENABLE:
- if (div_blk->protos & DIVERT_PROTO_UDP)
- return -EALREADY;
- div_blk->protos |= DIVERT_PROTO_UDP;
- break;
-
- case DIVARG1_DISABLE:
- if (!(div_blk->protos & DIVERT_PROTO_UDP))
- return -EALREADY;
- div_blk->protos &= ~DIVERT_PROTO_UDP;
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_UDPDST:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ADD:
- return add_port(div_blk->udp_dst,
- div_cf.arg2.uint16);
-
- case DIVARG1_REMOVE:
- return remove_port(div_blk->udp_dst,
- div_cf.arg2.uint16);
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_UDPSRC:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ADD:
- return add_port(div_blk->udp_src,
- div_cf.arg2.uint16);
-
- case DIVARG1_REMOVE:
- return remove_port(div_blk->udp_src,
- div_cf.arg2.uint16);
-
- default:
- return -EINVAL;
- }
-
- break;
-
- case DIVCMD_ICMP:
- switch(div_cf.arg1.int32) {
- case DIVARG1_ENABLE:
- if (div_blk->protos & DIVERT_PROTO_ICMP)
- return -EALREADY;
- div_blk->protos |= DIVERT_PROTO_ICMP;
- break;
-
- case DIVARG1_DISABLE:
- if (!(div_blk->protos & DIVERT_PROTO_ICMP))
- return -EALREADY;
- div_blk->protos &= ~DIVERT_PROTO_ICMP;
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- default:
- return -EINVAL;
- }
-
- break;
-
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-
-/*
- * Check if packet should have its dest mac address set to the box itself
- * for diversion
- */
-
-#define ETH_DIVERT_FRAME(skb) \
- memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \
- skb->pkt_type=PACKET_HOST
-
-void divert_frame(struct sk_buff *skb)
-{
- struct ethhdr *eth = eth_hdr(skb);
- struct iphdr *iph;
- struct tcphdr *tcph;
- struct udphdr *udph;
- struct divert_blk *divert = skb->dev->divert;
- int i, src, dst;
- unsigned char *skb_data_end = skb->data + skb->len;
-
- /* Packet is already aimed at us, return */
- if (!compare_ether_addr(eth->h_dest, skb->dev->dev_addr))
- return;
-
- /* proto is not IP, do nothing */
- if (eth->h_proto != htons(ETH_P_IP))
- return;
-
- /* Divert all IP frames ? */
- if (divert->protos & DIVERT_PROTO_IP) {
- ETH_DIVERT_FRAME(skb);
- return;
- }
-
- /* Check for possible (maliciously) malformed IP frame (thanks Dave) */
- iph = (struct iphdr *) skb->data;
- if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) {
- printk(KERN_INFO "divert: malformed IP packet !\n");
- return;
- }
-
- switch (iph->protocol) {
- /* Divert all ICMP frames ? */
- case IPPROTO_ICMP:
- if (divert->protos & DIVERT_PROTO_ICMP) {
- ETH_DIVERT_FRAME(skb);
- return;
- }
- break;
-
- /* Divert all TCP frames ? */
- case IPPROTO_TCP:
- if (divert->protos & DIVERT_PROTO_TCP) {
- ETH_DIVERT_FRAME(skb);
- return;
- }
-
- /* Check for possible (maliciously) malformed IP
- * frame (thanx Dave)
- */
- tcph = (struct tcphdr *)
- (((unsigned char *)iph) + (iph->ihl<<2));
- if (((unsigned char *)(tcph+1)) >= skb_data_end) {
- printk(KERN_INFO "divert: malformed TCP packet !\n");
- return;
- }
-
- /* Divert some tcp dst/src ports only ?*/
- for (i = 0; i < MAX_DIVERT_PORTS; i++) {
- dst = divert->tcp_dst[i];
- src = divert->tcp_src[i];
- if ((dst && dst == tcph->dest) ||
- (src && src == tcph->source)) {
- ETH_DIVERT_FRAME(skb);
- return;
- }
- }
- break;
-
- /* Divert all UDP frames ? */
- case IPPROTO_UDP:
- if (divert->protos & DIVERT_PROTO_UDP) {
- ETH_DIVERT_FRAME(skb);
- return;
- }
-
- /* Check for possible (maliciously) malformed IP
- * packet (thanks Dave)
- */
- udph = (struct udphdr *)
- (((unsigned char *)iph) + (iph->ihl<<2));
- if (((unsigned char *)(udph+1)) >= skb_data_end) {
- printk(KERN_INFO
- "divert: malformed UDP packet !\n");
- return;
- }
-
- /* Divert some udp dst/src ports only ? */
- for (i = 0; i < MAX_DIVERT_PORTS; i++) {
- dst = divert->udp_dst[i];
- src = divert->udp_src[i];
- if ((dst && dst == udph->dest) ||
- (src && src == udph->source)) {
- ETH_DIVERT_FRAME(skb);
- return;
- }
- }
- break;
- }
-}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 6b0e63cacd9..1df6cd4568d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -107,6 +107,22 @@ out:
EXPORT_SYMBOL_GPL(fib_rules_unregister);
+static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
+ struct flowi *fl, int flags)
+{
+ int ret = 0;
+
+ if (rule->ifindex && (rule->ifindex != fl->iif))
+ goto out;
+
+ if ((rule->mark ^ fl->mark) & rule->mark_mask)
+ goto out;
+
+ ret = ops->match(rule, fl, flags);
+out:
+ return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
+}
+
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
int flags, struct fib_lookup_arg *arg)
{
@@ -116,10 +132,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
rcu_read_lock();
list_for_each_entry_rcu(rule, ops->rules_list, list) {
- if (rule->ifindex && (rule->ifindex != fl->iif))
- continue;
-
- if (!ops->match(rule, fl, flags))
+ if (!fib_rule_match(rule, ops, fl, flags))
continue;
err = ops->action(rule, fl, flags, arg);
@@ -179,6 +192,18 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
rule->ifindex = dev->ifindex;
}
+ if (tb[FRA_FWMARK]) {
+ rule->mark = nla_get_u32(tb[FRA_FWMARK]);
+ if (rule->mark)
+ /* compatibility: if the mark value is non-zero all bits
+ * are compared unless a mask is explicitly specified.
+ */
+ rule->mark_mask = 0xFFFFFFFF;
+ }
+
+ if (tb[FRA_FWMASK])
+ rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
@@ -250,6 +275,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
nla_strcmp(tb[FRA_IFNAME], rule->ifname))
continue;
+ if (tb[FRA_FWMARK] &&
+ (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
+ continue;
+
+ if (tb[FRA_FWMASK] &&
+ (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -273,6 +306,22 @@ errout:
return err;
}
+static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ struct fib_rule *rule)
+{
+ size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
+ + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
+ + nla_total_size(4) /* FRA_PRIORITY */
+ + nla_total_size(4) /* FRA_TABLE */
+ + nla_total_size(4) /* FRA_FWMARK */
+ + nla_total_size(4); /* FRA_FWMASK */
+
+ if (ops->nlmsg_payload)
+ payload += ops->nlmsg_payload(rule);
+
+ return payload;
+}
+
static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
u32 pid, u32 seq, int type, int flags,
struct fib_rules_ops *ops)
@@ -298,6 +347,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (rule->pref)
NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+ if (rule->mark)
+ NLA_PUT_U32(skb, FRA_FWMARK, rule->mark);
+
+ if (rule->mark_mask || rule->mark)
+ NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
+
if (ops->fill(rule, skb, nlh, frh) < 0)
goto nla_put_failure;
@@ -345,15 +400,13 @@ static void notify_rule_change(int event, struct fib_rule *rule,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in fib_rule_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
errout:
diff --git a/net/core/filter.c b/net/core/filter.c
index 6732782a5a4..0df843b667f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -178,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
- A = ntohl(get_unaligned((u32 *)ptr));
+ A = ntohl(get_unaligned((__be32 *)ptr));
continue;
}
break;
@@ -187,7 +187,7 @@ load_w:
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
- A = ntohs(get_unaligned((u16 *)ptr));
+ A = ntohs(get_unaligned((__be16 *)ptr));
continue;
}
break;
@@ -261,7 +261,7 @@ load_b:
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
- A = htons(skb->protocol);
+ A = ntohs(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
diff --git a/net/core/flow.c b/net/core/flow.c
index b16d31ae5e5..d137f971f97 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-static kmem_cache_t *flow_cachep __read_mostly;
+static struct kmem_cache *flow_cachep __read_mostly;
static int flow_lwm, flow_hwm;
@@ -211,7 +211,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
if (flow_count(cpu) > flow_hwm)
flow_cache_shrink(cpu);
- fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
+ fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
fle->next = *head;
*head = fle;
@@ -340,7 +340,6 @@ static void __devinit flow_cache_cpu_prepare(int cpu)
tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
}
-#ifdef CONFIG_HOTPLUG_CPU
static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
@@ -349,7 +348,6 @@ static int flow_cache_cpu(struct notifier_block *nfb,
__flow_cache_shrink((unsigned long)hcpu, 0);
return NOTIFY_OK;
}
-#endif /* CONFIG_HOTPLUG_CPU */
static int __init flow_cache_init(void)
{
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 65e4b56fbc7..04b249c40b5 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -158,9 +158,9 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
* call to this function will be unaligned also.
*/
int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
- int offset, unsigned int len, int *csump)
+ int offset, unsigned int len, __wsum *csump)
{
- int csum = *csump;
+ __wsum csum = *csump;
int partial_cnt = 0, err = 0;
/* Skip over the finished iovecs */
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
new file mode 100644
index 00000000000..283c2b993fb
--- /dev/null
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
+#include <linux/highmem.h>
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+ BUG_ON(in_irq());
+
+ local_bh_disable();
+#endif
+ return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+ kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+ local_bh_enable();
+#endif
+}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 4b36114744c..549a2ce951b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -34,8 +34,8 @@ enum lw_bits {
static unsigned long linkwatch_flags;
static unsigned long linkwatch_nextevent;
-static void linkwatch_event(void *dummy);
-static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+static void linkwatch_event(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
static LIST_HEAD(lweventlist);
static DEFINE_SPINLOCK(lweventlist_lock);
@@ -127,7 +127,7 @@ void linkwatch_run_queue(void)
}
-static void linkwatch_event(void *dummy)
+static void linkwatch_event(struct work_struct *dummy)
{
/* Limit the number of linkwatch events to one
* per second so that a runaway driver does not
@@ -171,10 +171,9 @@ void linkwatch_fire_event(struct net_device *dev)
unsigned long delay = linkwatch_nextevent - jiffies;
/* If we wrap around we'll delay it by at most HZ. */
- if (!delay || delay > HZ)
- schedule_work(&linkwatch_work);
- else
- schedule_delayed_work(&linkwatch_work, delay);
+ if (delay > HZ)
+ delay = 0;
+ schedule_delayed_work(&linkwatch_work, delay);
}
}
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b4b478353b2..e7300b6b407 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -251,7 +251,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
goto out_entries;
}
- n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
+ n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC);
if (!n)
goto out_entries;
@@ -577,9 +577,10 @@ void neigh_destroy(struct neighbour *neigh)
while ((hh = neigh->hh) != NULL) {
neigh->hh = hh->hh_next;
hh->hh_next = NULL;
- write_lock_bh(&hh->hh_lock);
+
+ write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
- write_unlock_bh(&hh->hh_lock);
+ write_sequnlock_bh(&hh->hh_lock);
if (atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
}
@@ -897,9 +898,9 @@ static void neigh_update_hhs(struct neighbour *neigh)
if (update) {
for (hh = neigh->hh; hh; hh = hh->hh_next) {
- write_lock_bh(&hh->hh_lock);
+ write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
- write_unlock_bh(&hh->hh_lock);
+ write_sequnlock_bh(&hh->hh_lock);
}
}
}
@@ -1089,7 +1090,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
break;
if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
- rwlock_init(&hh->hh_lock);
+ seqlock_init(&hh->hh_lock);
hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 0);
hh->hh_next = NULL;
@@ -1266,10 +1267,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
struct neigh_table *tbl)
{
- struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
if (p) {
- memcpy(p, &tbl->parms, sizeof(*p));
p->tbl = tbl;
atomic_set(&p->refcnt, 1);
INIT_RCU_HEAD(&p->rcu_head);
@@ -2410,20 +2410,27 @@ static struct file_operations neigh_stat_seq_fops = {
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_ARPD
+static inline size_t neigh_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(struct nda_cacheinfo))
+ + nla_total_size(4); /* NDA_PROBES */
+}
+
static void __neigh_notify(struct neighbour *n, int type, int flags)
{
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+ skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
err = neigh_fill_info(skb, n, 0, 0, type, flags);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in neigh_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
errout:
@@ -2618,14 +2625,14 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
int p_id, int pdev_id, char *p_name,
proc_handler *handler, ctl_handler *strategy)
{
- struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+ struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
+ sizeof(*t), GFP_KERNEL);
const char *dev_name_source = NULL;
char *dev_name = NULL;
int err = 0;
if (!t)
return -ENOBUFS;
- memcpy(t, &neigh_sysctl_template, sizeof(*t));
t->neigh_vars[0].data = &p->mcast_probes;
t->neigh_vars[1].data = &p->ucast_probes;
t->neigh_vars[2].data = &p->app_probes;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9308af060b4..823215d8e90 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -34,18 +34,12 @@
#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32
#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
-#define MAX_RETRIES 20000
-static DEFINE_SPINLOCK(skb_list_lock);
-static int nr_skbs;
-static struct sk_buff *skbs;
-
-static DEFINE_SPINLOCK(queue_lock);
-static int queue_depth;
-static struct sk_buff *queue_head, *queue_tail;
+static struct sk_buff_head skb_pool;
static atomic_t trapped;
+#define USEC_PER_POLL 50
#define NETPOLL_RX_ENABLED 1
#define NETPOLL_RX_DROP 2
@@ -56,54 +50,41 @@ static atomic_t trapped;
static void zap_completion_queue(void);
static void arp_reply(struct sk_buff *skb);
-static void queue_process(void *p)
+static void queue_process(struct work_struct *work)
{
- unsigned long flags;
+ struct netpoll_info *npinfo =
+ container_of(work, struct netpoll_info, tx_work.work);
struct sk_buff *skb;
-
- while (queue_head) {
- spin_lock_irqsave(&queue_lock, flags);
-
- skb = queue_head;
- queue_head = skb->next;
- if (skb == queue_tail)
- queue_head = NULL;
-
- queue_depth--;
-
- spin_unlock_irqrestore(&queue_lock, flags);
-
- dev_queue_xmit(skb);
- }
-}
-
-static DECLARE_WORK(send_queue, queue_process, NULL);
-
-void netpoll_queue(struct sk_buff *skb)
-{
unsigned long flags;
- if (queue_depth == MAX_QUEUE_DEPTH) {
- __kfree_skb(skb);
- return;
- }
+ while ((skb = skb_dequeue(&npinfo->txq))) {
+ struct net_device *dev = skb->dev;
- spin_lock_irqsave(&queue_lock, flags);
- if (!queue_head)
- queue_head = skb;
- else
- queue_tail->next = skb;
- queue_tail = skb;
- queue_depth++;
- spin_unlock_irqrestore(&queue_lock, flags);
+ if (!netif_device_present(dev) || !netif_running(dev)) {
+ __kfree_skb(skb);
+ continue;
+ }
- schedule_work(&send_queue);
+ local_irq_save(flags);
+ netif_tx_lock(dev);
+ if (netif_queue_stopped(dev) ||
+ dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ skb_queue_head(&npinfo->txq, skb);
+ netif_tx_unlock(dev);
+ local_irq_restore(flags);
+
+ schedule_delayed_work(&npinfo->tx_work, HZ/10);
+ return;
+ }
+ netif_tx_unlock(dev);
+ local_irq_restore(flags);
+ }
}
-static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
- unsigned short ulen, u32 saddr, u32 daddr)
+static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
+ unsigned short ulen, __be32 saddr, __be32 daddr)
{
- unsigned int psum;
+ __wsum psum;
if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
return 0;
@@ -111,7 +92,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !(u16)csum_fold(csum_add(psum, skb->csum)))
+ !csum_fold(csum_add(psum, skb->csum)))
return 0;
skb->csum = psum;
@@ -167,12 +148,11 @@ static void service_arp_queue(struct netpoll_info *npi)
arp_reply(skb);
skb = skb_dequeue(&npi->arp_tx);
}
- return;
}
void netpoll_poll(struct netpoll *np)
{
- if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+ if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
return;
/* Process pending work on NIC */
@@ -190,17 +170,15 @@ static void refill_skbs(void)
struct sk_buff *skb;
unsigned long flags;
- spin_lock_irqsave(&skb_list_lock, flags);
- while (nr_skbs < MAX_SKBS) {
+ spin_lock_irqsave(&skb_pool.lock, flags);
+ while (skb_pool.qlen < MAX_SKBS) {
skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
if (!skb)
break;
- skb->next = skbs;
- skbs = skb;
- nr_skbs++;
+ __skb_queue_tail(&skb_pool, skb);
}
- spin_unlock_irqrestore(&skb_list_lock, flags);
+ spin_unlock_irqrestore(&skb_pool.lock, flags);
}
static void zap_completion_queue(void)
@@ -219,7 +197,7 @@ static void zap_completion_queue(void)
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
- if(skb->destructor)
+ if (skb->destructor)
dev_kfree_skb_any(skb); /* put this one back */
else
__kfree_skb(skb);
@@ -229,38 +207,25 @@ static void zap_completion_queue(void)
put_cpu_var(softnet_data);
}
-static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
+static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
{
- int once = 1, count = 0;
- unsigned long flags;
- struct sk_buff *skb = NULL;
+ int count = 0;
+ struct sk_buff *skb;
zap_completion_queue();
+ refill_skbs();
repeat:
- if (nr_skbs < MAX_SKBS)
- refill_skbs();
skb = alloc_skb(len, GFP_ATOMIC);
+ if (!skb)
+ skb = skb_dequeue(&skb_pool);
if (!skb) {
- spin_lock_irqsave(&skb_list_lock, flags);
- skb = skbs;
- if (skb) {
- skbs = skb->next;
- skb->next = NULL;
- nr_skbs--;
- }
- spin_unlock_irqrestore(&skb_list_lock, flags);
- }
-
- if(!skb) {
- count++;
- if (once && (count == 1000000)) {
- printk("out of netpoll skbs!\n");
- once = 0;
+ if (++count < 10) {
+ netpoll_poll(np);
+ goto repeat;
}
- netpoll_poll(np);
- goto repeat;
+ return NULL;
}
atomic_set(&skb->users, 1);
@@ -270,50 +235,46 @@ repeat:
static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
- int status;
- struct netpoll_info *npinfo;
+ int status = NETDEV_TX_BUSY;
+ unsigned long tries;
+ struct net_device *dev = np->dev;
+ struct netpoll_info *npinfo = np->dev->npinfo;
- if (!np || !np->dev || !netif_running(np->dev)) {
- __kfree_skb(skb);
- return;
- }
+ if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
+ __kfree_skb(skb);
+ return;
+ }
- npinfo = np->dev->npinfo;
+ /* don't get messages out of order, and no recursion */
+ if (skb_queue_len(&npinfo->txq) == 0 &&
+ npinfo->poll_owner != smp_processor_id()) {
+ unsigned long flags;
- /* avoid recursion */
- if (npinfo->poll_owner == smp_processor_id() ||
- np->dev->xmit_lock_owner == smp_processor_id()) {
- if (np->drop)
- np->drop(skb);
- else
- __kfree_skb(skb);
- return;
- }
-
- do {
- npinfo->tries--;
- netif_tx_lock(np->dev);
+ local_irq_save(flags);
+ if (netif_tx_trylock(dev)) {
+ /* try until next clock tick */
+ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
+ tries > 0; --tries) {
+ if (!netif_queue_stopped(dev))
+ status = dev->hard_start_xmit(skb, dev);
- /*
- * network drivers do not expect to be called if the queue is
- * stopped.
- */
- status = NETDEV_TX_BUSY;
- if (!netif_queue_stopped(np->dev))
- status = np->dev->hard_start_xmit(skb, np->dev);
+ if (status == NETDEV_TX_OK)
+ break;
- netif_tx_unlock(np->dev);
+ /* tickle device maybe there is some cleanup */
+ netpoll_poll(np);
- /* success */
- if(!status) {
- npinfo->tries = MAX_RETRIES; /* reset */
- return;
+ udelay(USEC_PER_POLL);
+ }
+ netif_tx_unlock(dev);
}
+ local_irq_restore(flags);
+ }
- /* transmit busy */
- netpoll_poll(np);
- udelay(50);
- } while (npinfo->tries > 0);
+ if (status != NETDEV_TX_OK) {
+ skb_queue_tail(&npinfo->txq, skb);
+ schedule_delayed_work(&npinfo->tx_work,0);
+ }
}
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
@@ -340,6 +301,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
udph->check = 0;
+ udph->check = csum_tcpudp_magic(htonl(np->local_ip),
+ htonl(np->remote_ip),
+ udp_len, IPPROTO_UDP,
+ csum_partial((unsigned char *)udph, udp_len, 0));
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
@@ -373,7 +340,8 @@ static void arp_reply(struct sk_buff *skb)
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
- u32 sip, tip;
+ __be32 sip, tip;
+ unsigned char *sha;
struct sk_buff *send_skb;
struct netpoll *np = NULL;
@@ -400,9 +368,14 @@ static void arp_reply(struct sk_buff *skb)
arp->ar_op != htons(ARPOP_REQUEST))
return;
- arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
+ arp_ptr = (unsigned char *)(arp+1);
+ /* save the location of the src hw addr */
+ sha = arp_ptr;
+ arp_ptr += skb->dev->addr_len;
memcpy(&sip, arp_ptr, 4);
- arp_ptr += 4 + skb->dev->addr_len;
+ arp_ptr += 4;
+ /* if we actually cared about dst hw addr, it would get copied here */
+ arp_ptr += skb->dev->addr_len;
memcpy(&tip, arp_ptr, 4);
/* Should we ignore arp? */
@@ -425,8 +398,8 @@ static void arp_reply(struct sk_buff *skb)
if (np->dev->hard_header &&
np->dev->hard_header(send_skb, skb->dev, ptype,
- np->remote_mac, np->local_mac,
- send_skb->len) < 0) {
+ sha, np->local_mac,
+ send_skb->len) < 0) {
kfree_skb(send_skb);
return;
}
@@ -449,7 +422,7 @@ static void arp_reply(struct sk_buff *skb)
arp_ptr += np->dev->addr_len;
memcpy(arp_ptr, &tip, 4);
arp_ptr += 4;
- memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
+ memcpy(arp_ptr, sha, np->dev->addr_len);
arp_ptr += np->dev->addr_len;
memcpy(arp_ptr, &sip, 4);
@@ -464,7 +437,6 @@ int __netpoll_rx(struct sk_buff *skb)
struct netpoll_info *npi = skb->dev->npinfo;
struct netpoll *np = npi->rx_np;
-
if (!np)
goto out;
if (skb->dev->type != ARPHRD_ETHER)
@@ -537,47 +509,47 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
- if(*cur != '@') {
+ if (*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
- *delim=0;
- np->local_port=simple_strtol(cur, NULL, 10);
- cur=delim;
+ *delim = 0;
+ np->local_port = simple_strtol(cur, NULL, 10);
+ cur = delim;
}
cur++;
printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
- if(*cur != '/') {
+ if (*cur != '/') {
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
- *delim=0;
- np->local_ip=ntohl(in_aton(cur));
- cur=delim;
+ *delim = 0;
+ np->local_ip = ntohl(in_aton(cur));
+ cur = delim;
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->local_ip));
}
cur++;
- if ( *cur != ',') {
+ if (*cur != ',') {
/* parse out dev name */
if ((delim = strchr(cur, ',')) == NULL)
goto parse_failed;
- *delim=0;
+ *delim = 0;
strlcpy(np->dev_name, cur, sizeof(np->dev_name));
- cur=delim;
+ cur = delim;
}
cur++;
printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
- if ( *cur != '@' ) {
+ if (*cur != '@') {
/* dst port */
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_port=simple_strtol(cur, NULL, 10);
- cur=delim;
+ *delim = 0;
+ np->remote_port = simple_strtol(cur, NULL, 10);
+ cur = delim;
}
cur++;
printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
@@ -585,42 +557,41 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
/* dst ip */
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_ip=ntohl(in_aton(cur));
- cur=delim+1;
+ *delim = 0;
+ np->remote_ip = ntohl(in_aton(cur));
+ cur = delim + 1;
printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
- np->name, HIPQUAD(np->remote_ip));
+ np->name, HIPQUAD(np->remote_ip));
- if( *cur != 0 )
- {
+ if (*cur != 0) {
/* MAC address */
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_mac[0]=simple_strtol(cur, NULL, 16);
- cur=delim+1;
+ *delim = 0;
+ np->remote_mac[0] = simple_strtol(cur, NULL, 16);
+ cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_mac[1]=simple_strtol(cur, NULL, 16);
- cur=delim+1;
+ *delim = 0;
+ np->remote_mac[1] = simple_strtol(cur, NULL, 16);
+ cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_mac[2]=simple_strtol(cur, NULL, 16);
- cur=delim+1;
+ *delim = 0;
+ np->remote_mac[2] = simple_strtol(cur, NULL, 16);
+ cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_mac[3]=simple_strtol(cur, NULL, 16);
- cur=delim+1;
+ *delim = 0;
+ np->remote_mac[3] = simple_strtol(cur, NULL, 16);
+ cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
- *delim=0;
- np->remote_mac[4]=simple_strtol(cur, NULL, 16);
- cur=delim+1;
- np->remote_mac[5]=simple_strtol(cur, NULL, 16);
+ *delim = 0;
+ np->remote_mac[4] = simple_strtol(cur, NULL, 16);
+ cur = delim + 1;
+ np->remote_mac[5] = simple_strtol(cur, NULL, 16);
}
printk(KERN_INFO "%s: remote ethernet address "
@@ -647,34 +618,44 @@ int netpoll_setup(struct netpoll *np)
struct in_device *in_dev;
struct netpoll_info *npinfo;
unsigned long flags;
+ int err;
if (np->dev_name)
ndev = dev_get_by_name(np->dev_name);
if (!ndev) {
printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
np->name, np->dev_name);
- return -1;
+ return -ENODEV;
}
np->dev = ndev;
if (!ndev->npinfo) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
- if (!npinfo)
+ if (!npinfo) {
+ err = -ENOMEM;
goto release;
+ }
npinfo->rx_flags = 0;
npinfo->rx_np = NULL;
spin_lock_init(&npinfo->poll_lock);
npinfo->poll_owner = -1;
- npinfo->tries = MAX_RETRIES;
+
spin_lock_init(&npinfo->rx_lock);
skb_queue_head_init(&npinfo->arp_tx);
- } else
+ skb_queue_head_init(&npinfo->txq);
+ INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+ atomic_set(&npinfo->refcnt, 1);
+ } else {
npinfo = ndev->npinfo;
+ atomic_inc(&npinfo->refcnt);
+ }
if (!ndev->poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
+ err = -ENOTSUPP;
goto release;
}
@@ -685,13 +666,14 @@ int netpoll_setup(struct netpoll *np)
np->name, np->dev_name);
rtnl_lock();
- if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
+ err = dev_open(ndev);
+ rtnl_unlock();
+
+ if (err) {
printk(KERN_ERR "%s: failed to open %s\n",
- np->name, np->dev_name);
- rtnl_unlock();
+ np->name, ndev->name);
goto release;
}
- rtnl_unlock();
atleast = jiffies + HZ/10;
atmost = jiffies + 4*HZ;
@@ -729,6 +711,7 @@ int netpoll_setup(struct netpoll *np)
rcu_read_unlock();
printk(KERN_ERR "%s: no IP address for %s, aborting\n",
np->name, np->dev_name);
+ err = -EDESTADDRREQ;
goto release;
}
@@ -761,9 +744,16 @@ int netpoll_setup(struct netpoll *np)
kfree(npinfo);
np->dev = NULL;
dev_put(ndev);
- return -1;
+ return err;
}
+static int __init netpoll_init(void)
+{
+ skb_queue_head_init(&skb_pool);
+ return 0;
+}
+core_initcall(netpoll_init);
+
void netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
@@ -771,12 +761,25 @@ void netpoll_cleanup(struct netpoll *np)
if (np->dev) {
npinfo = np->dev->npinfo;
- if (npinfo && npinfo->rx_np == np) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- npinfo->rx_np = NULL;
- npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+ if (npinfo) {
+ if (npinfo->rx_np == np) {
+ spin_lock_irqsave(&npinfo->rx_lock, flags);
+ npinfo->rx_np = NULL;
+ npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+ spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+ }
+
+ np->dev->npinfo = NULL;
+ if (atomic_dec_and_test(&npinfo->refcnt)) {
+ skb_queue_purge(&npinfo->arp_tx);
+ skb_queue_purge(&npinfo->txq);
+ cancel_rearming_delayed_work(&npinfo->tx_work);
+ flush_scheduled_work();
+
+ kfree(npinfo);
+ }
}
+
dev_put(np->dev);
}
@@ -803,4 +806,3 @@ EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
EXPORT_SYMBOL(netpoll_send_udp);
EXPORT_SYMBOL(netpoll_poll);
-EXPORT_SYMBOL(netpoll_queue);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index dd023fd2830..1897a3a385d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -207,7 +207,7 @@ static struct proc_dir_entry *pg_proc_dir = NULL;
#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4)
struct flow_state {
- __u32 cur_daddr;
+ __be32 cur_daddr;
int count;
};
@@ -282,10 +282,10 @@ struct pktgen_dev {
/* If we're doing ranges, random or incremental, then this
* defines the min/max for those ranges.
*/
- __u32 saddr_min; /* inclusive, source IP address */
- __u32 saddr_max; /* exclusive, source IP address */
- __u32 daddr_min; /* inclusive, dest IP address */
- __u32 daddr_max; /* exclusive, dest IP address */
+ __be32 saddr_min; /* inclusive, source IP address */
+ __be32 saddr_max; /* exclusive, source IP address */
+ __be32 daddr_min; /* inclusive, dest IP address */
+ __be32 daddr_max; /* exclusive, dest IP address */
__u16 udp_src_min; /* inclusive, source UDP port */
__u16 udp_src_max; /* exclusive, source UDP port */
@@ -317,8 +317,8 @@ struct pktgen_dev {
__u32 cur_dst_mac_offset;
__u32 cur_src_mac_offset;
- __u32 cur_saddr;
- __u32 cur_daddr;
+ __be32 cur_saddr;
+ __be32 cur_daddr;
__u16 cur_udp_dst;
__u16 cur_udp_src;
__u32 cur_pkt_size;
@@ -350,10 +350,10 @@ struct pktgen_dev {
};
struct pktgen_hdr {
- __u32 pgh_magic;
- __u32 seq_num;
- __u32 tv_sec;
- __u32 tv_usec;
+ __be32 pgh_magic;
+ __be32 seq_num;
+ __be32 tv_sec;
+ __be32 tv_usec;
};
struct pktgen_thread {
@@ -2160,7 +2160,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for(i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
- (pktgen_random() &
+ ((__force __be32)pktgen_random() &
htonl(0x000fffff));
}
@@ -2220,29 +2220,25 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) {
pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr;
} else {
-
- if ((imn = ntohl(pkt_dev->daddr_min)) < (imx =
- ntohl(pkt_dev->
- daddr_max)))
- {
+ imn = ntohl(pkt_dev->daddr_min);
+ imx = ntohl(pkt_dev->daddr_max);
+ if (imn < imx) {
__u32 t;
+ __be32 s;
if (pkt_dev->flags & F_IPDST_RND) {
- t = ((pktgen_random() % (imx - imn)) +
- imn);
- t = htonl(t);
+ t = pktgen_random() % (imx - imn) + imn;
+ s = htonl(t);
- while (LOOPBACK(t) || MULTICAST(t)
- || BADCLASS(t) || ZERONET(t)
- || LOCAL_MCAST(t)) {
- t = ((pktgen_random() %
- (imx - imn)) + imn);
- t = htonl(t);
+ while (LOOPBACK(s) || MULTICAST(s)
+ || BADCLASS(s) || ZERONET(s)
+ || LOCAL_MCAST(s)) {
+ t = (pktgen_random() %
+ (imx - imn)) + imn;
+ s = htonl(t);
}
- pkt_dev->cur_daddr = t;
- }
-
- else {
+ pkt_dev->cur_daddr = s;
+ } else {
t = ntohl(pkt_dev->cur_daddr);
t++;
if (t > imx) {
@@ -2270,7 +2266,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < 4; i++) {
pkt_dev->cur_in6_daddr.s6_addr32[i] =
- ((pktgen_random() |
+ (((__force __be32)pktgen_random() |
pkt_dev->min_in6_daddr.s6_addr32[i]) &
pkt_dev->max_in6_daddr.s6_addr32[i]);
}
@@ -2304,6 +2300,12 @@ static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
*mpls |= MPLS_STACK_BOTTOM;
}
+static inline __be16 build_tci(unsigned int id, unsigned int cfi,
+ unsigned int prio)
+{
+ return htons(id | (cfi << 12) | (prio << 13));
+}
+
static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct pktgen_dev *pkt_dev)
{
@@ -2353,16 +2355,16 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
if (pkt_dev->vlan_id != 0xffff) {
if(pkt_dev->svlan_id != 0xffff) {
svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
- *svlan_tci = htons(pkt_dev->svlan_id);
- *svlan_tci |= pkt_dev->svlan_p << 5;
- *svlan_tci |= pkt_dev->svlan_cfi << 4;
+ *svlan_tci = build_tci(pkt_dev->svlan_id,
+ pkt_dev->svlan_cfi,
+ pkt_dev->svlan_p);
svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
*svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
}
vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
- *vlan_tci = htons(pkt_dev->vlan_id);
- *vlan_tci |= pkt_dev->vlan_p << 5;
- *vlan_tci |= pkt_dev->vlan_cfi << 4;
+ *vlan_tci = build_tci(pkt_dev->vlan_id,
+ pkt_dev->vlan_cfi,
+ pkt_dev->vlan_p);
vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
*vlan_encapsulated_proto = __constant_htons(ETH_P_IP);
}
@@ -2371,7 +2373,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
memcpy(eth, pkt_dev->hh, 12);
- *(u16 *) & eth[12] = protocol;
+ *(__be16 *) & eth[12] = protocol;
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
@@ -2491,7 +2493,7 @@ static unsigned int scan_ip6(const char *s, char ip[16])
char suffix[16];
unsigned int prefixlen = 0;
unsigned int suffixlen = 0;
- __u32 tmp;
+ __be32 tmp;
for (i = 0; i < 16; i++)
ip[i] = 0;
@@ -2689,16 +2691,16 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
if (pkt_dev->vlan_id != 0xffff) {
if(pkt_dev->svlan_id != 0xffff) {
svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
- *svlan_tci = htons(pkt_dev->svlan_id);
- *svlan_tci |= pkt_dev->svlan_p << 5;
- *svlan_tci |= pkt_dev->svlan_cfi << 4;
+ *svlan_tci = build_tci(pkt_dev->svlan_id,
+ pkt_dev->svlan_cfi,
+ pkt_dev->svlan_p);
svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
*svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
}
vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
- *vlan_tci = htons(pkt_dev->vlan_id);
- *vlan_tci |= pkt_dev->vlan_p << 5;
- *vlan_tci |= pkt_dev->vlan_cfi << 4;
+ *vlan_tci = build_tci(pkt_dev->vlan_id,
+ pkt_dev->vlan_cfi,
+ pkt_dev->vlan_p);
vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
*vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6);
}
@@ -2707,7 +2709,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
memcpy(eth, pkt_dev->hh, 12);
- *(u16 *) & eth[12] = protocol;
+ *(__be16 *) & eth[12] = protocol;
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 -
@@ -2726,11 +2728,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
udph->len = htons(datalen + sizeof(struct udphdr));
udph->check = 0; /* No checksum */
- *(u32 *) iph = __constant_htonl(0x60000000); /* Version + flow */
+ *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */
if (pkt_dev->traffic_class) {
/* Version + traffic class + flow (0) */
- *(u32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20));
+ *(__be32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20));
}
iph->hop_limit = 32;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 79ebd75fbe4..5f0818d815e 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/vmalloc.h>
#include <net/request_sock.h>
@@ -29,22 +30,31 @@
* it is absolutely not enough even at 100conn/sec. 256 cures most
* of problems. This value is adjusted to 128 for very small machines
* (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
- * Further increasing requires to change hash table size.
+ * Note : Dont forget somaxconn that may limit backlog too.
*/
int sysctl_max_syn_backlog = 256;
int reqsk_queue_alloc(struct request_sock_queue *queue,
- const int nr_table_entries)
+ unsigned int nr_table_entries)
{
- const int lopt_size = sizeof(struct listen_sock) +
- nr_table_entries * sizeof(struct request_sock *);
- struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL);
-
+ size_t lopt_size = sizeof(struct listen_sock);
+ struct listen_sock *lopt;
+
+ nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
+ nr_table_entries = max_t(u32, nr_table_entries, 8);
+ nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
+ lopt_size += nr_table_entries * sizeof(struct request_sock *);
+ if (lopt_size > PAGE_SIZE)
+ lopt = __vmalloc(lopt_size,
+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
+ else
+ lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
return -ENOMEM;
- for (lopt->max_qlen_log = 6;
- (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
+ for (lopt->max_qlen_log = 3;
+ (1 << lopt->max_qlen_log) < nr_table_entries;
lopt->max_qlen_log++);
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
@@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
{
/* make all the listen_opt local to us */
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+ size_t lopt_size = sizeof(struct listen_sock) +
+ lopt->nr_table_entries * sizeof(struct request_sock *);
if (lopt->qlen != 0) {
- int i;
+ unsigned int i;
for (i = 0; i < lopt->nr_table_entries; i++) {
struct request_sock *req;
@@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
}
BUG_TRAP(lopt->qlen == 0);
- kfree(lopt);
+ if (lopt_size > PAGE_SIZE)
+ vfree(lopt);
+ else
+ kfree(lopt);
}
EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02f3c794789..e76539a5eb5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -108,7 +108,6 @@ static const int rtm_min[RTM_NR_FAMILIES] =
[RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)),
- [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
};
@@ -213,6 +212,26 @@ nla_put_failure:
return nla_nest_cancel(skb, mx);
}
+int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
+ u32 ts, u32 tsage, long expires, u32 error)
+{
+ struct rta_cacheinfo ci = {
+ .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+ .rta_used = dst->__use,
+ .rta_clntref = atomic_read(&(dst->__refcnt)),
+ .rta_error = error,
+ .rta_id = id,
+ .rta_ts = ts,
+ .rta_tsage = tsage,
+ };
+
+ if (expires)
+ ci.rta_expires = jiffies_to_clock_t(expires);
+
+ return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
static void set_operstate(struct net_device *dev, unsigned char transition)
{
@@ -273,6 +292,25 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->tx_compressed = b->tx_compressed;
};
+static inline size_t if_nlmsg_size(int iwbuflen)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+ + nla_total_size(sizeof(struct rtnl_link_ifmap))
+ + nla_total_size(sizeof(struct rtnl_link_stats))
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+ + nla_total_size(4) /* IFLA_TXQLEN */
+ + nla_total_size(4) /* IFLA_WEIGHT */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+ + nla_total_size(1) /* IFLA_LINKMODE */
+ + nla_total_size(iwbuflen);
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
void *iwbuf, int iwbuflen, int type, u32 pid,
u32 seq, u32 change, unsigned int flags)
@@ -558,7 +596,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
struct sk_buff *nskb;
char *iw_buf = NULL, *iw = NULL;
int iw_buf_len = 0;
- int err, payload;
+ int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
@@ -587,9 +625,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
- payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
- nla_total_size(iw_buf_len));
- nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+ nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
if (nskb == NULL) {
err = -ENOBUFS;
goto errout;
@@ -597,10 +633,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
- if (err <= 0) {
- kfree_skb(nskb);
- goto errout;
- }
+ /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */
+ BUG_ON(err < 0);
err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
errout:
@@ -639,15 +673,13 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in if_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
errout:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3c23760c582..de7801d589e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -56,7 +56,6 @@
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
-#include <linux/highmem.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -67,8 +66,10 @@
#include <asm/uaccess.h>
#include <asm/system.h>
-static kmem_cache_t *skbuff_head_cache __read_mostly;
-static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+#include "kmap_skb.h"
+
+static struct kmem_cache *skbuff_head_cache __read_mostly;
+static struct kmem_cache *skbuff_fclone_cache __read_mostly;
/*
* Keep out-of-line to prevent kernel bloat.
@@ -131,6 +132,7 @@ EXPORT_SYMBOL(skb_truesize_bug);
* @gfp_mask: allocation mask
* @fclone: allocate from fclone cache instead of head cache
* and allocate a cloned (child) skb
+ * @node: numa node to allocate memory on
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
* tail room of size bytes. The object has a reference count of one.
@@ -140,9 +142,9 @@ EXPORT_SYMBOL(skb_truesize_bug);
* %GFP_ATOMIC.
*/
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
- int fclone)
+ int fclone, int node)
{
- kmem_cache_t *cache;
+ struct kmem_cache *cache;
struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
@@ -150,14 +152,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
/* Get the HEAD */
- skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
+ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
if (!skb)
goto out;
/* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
- data = kmalloc_track_caller(size + sizeof(struct skb_shared_info),
- gfp_mask);
+ data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
+ gfp_mask, node);
if (!data)
goto nodata;
@@ -209,7 +211,7 @@ nodata:
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
-struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
unsigned int size,
gfp_t gfp_mask)
{
@@ -266,9 +268,10 @@ nodata:
struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
unsigned int length, gfp_t gfp_mask)
{
+ int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1;
struct sk_buff *skb;
- skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD);
skb->dev = dev;
@@ -473,8 +476,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
#endif
C(protocol);
n->destructor = NULL;
+ C(mark);
#ifdef CONFIG_NETFILTER
- C(nfmark);
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
@@ -534,8 +537,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->pkt_type = old->pkt_type;
new->tstamp = old->tstamp;
new->destructor = NULL;
+ new->mark = old->mark;
#ifdef CONFIG_NETFILTER
- new->nfmark = old->nfmark;
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
@@ -639,6 +642,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
+ n->truesize += skb->data_len;
n->data_len = skb->data_len;
n->len = skb->len;
@@ -1239,8 +1243,8 @@ EXPORT_SYMBOL(skb_store_bits);
/* Checksum skb data. */
-unsigned int skb_checksum(const struct sk_buff *skb, int offset,
- int len, unsigned int csum)
+__wsum skb_checksum(const struct sk_buff *skb, int offset,
+ int len, __wsum csum)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
@@ -1264,7 +1268,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset,
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -1293,7 +1297,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset,
end = start + list->len;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
if (copy > len)
copy = len;
csum2 = skb_checksum(list, offset - start,
@@ -1314,8 +1318,8 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset,
/* Both of above in one bottle. */
-unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
- u8 *to, int len, unsigned int csum)
+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+ u8 *to, int len, __wsum csum)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
@@ -1341,7 +1345,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -1367,7 +1371,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list = list->next) {
- unsigned int csum2;
+ __wsum csum2;
int end;
BUG_TRAP(start <= offset + len);
@@ -1395,7 +1399,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
{
- unsigned int csum;
+ __wsum csum;
long csstart;
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -1413,9 +1417,9 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
skb->len - csstart, 0);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- long csstuff = csstart + skb->csum;
+ long csstuff = csstart + skb->csum_offset;
- *((unsigned short *)(to + csstuff)) = csum_fold(csum);
+ *((__sum16 *)(to + csstuff)) = csum_fold(csum);
}
}
@@ -1946,7 +1950,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
do {
struct sk_buff *nskb;
skb_frag_t *frag;
- int hsize, nsize;
+ int hsize;
int k;
int size;
@@ -1957,11 +1961,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
hsize = skb_headlen(skb) - offset;
if (hsize < 0)
hsize = 0;
- nsize = hsize + doffset;
- if (nsize > len + doffset || !sg)
- nsize = len + doffset;
+ if (hsize > len || !sg)
+ hsize = len;
- nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
+ nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
if (unlikely(!nskb))
goto err;
diff --git a/net/core/sock.c b/net/core/sock.c
index d472db4776c..0ed5b4f0bc4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
#include <linux/poll.h>
#include <linux/tcp.h>
#include <linux/init.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -270,7 +271,7 @@ out:
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
+int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
{
int rc = NET_RX_SUCCESS;
@@ -279,7 +280,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
skb->dev = NULL;
- bh_lock_sock(sk);
+ if (nested)
+ bh_lock_sock_nested(sk);
+ else
+ bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
/*
* trylock + unlock semantics:
@@ -806,24 +810,11 @@ lenout:
*/
static void inline sock_lock_init(struct sock *sk)
{
- spin_lock_init(&sk->sk_lock.slock);
- sk->sk_lock.owner = NULL;
- init_waitqueue_head(&sk->sk_lock.wq);
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
-
- /*
- * Mark both the sk_lock and the sk_lock.slock as a
- * per-address-family lock class:
- */
- lockdep_set_class_and_name(&sk->sk_lock.slock,
- af_family_slock_keys + sk->sk_family,
- af_family_slock_key_strings[sk->sk_family]);
- lockdep_init_map(&sk->sk_lock.dep_map,
- af_family_key_strings[sk->sk_family],
- af_family_keys + sk->sk_family, 0);
+ sock_lock_init_class_and_name(sk,
+ af_family_slock_key_strings[sk->sk_family],
+ af_family_slock_keys + sk->sk_family,
+ af_family_key_strings[sk->sk_family],
+ af_family_keys + sk->sk_family);
}
/**
@@ -837,7 +828,7 @@ struct sock *sk_alloc(int family, gfp_t priority,
struct proto *prot, int zero_it)
{
struct sock *sk = NULL;
- kmem_cache_t *slab = prot->slab;
+ struct kmem_cache *slab = prot->slab;
if (slab != NULL)
sk = kmem_cache_alloc(slab, priority);
@@ -1160,7 +1151,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
goto failure;
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- skb = alloc_skb(header_len, sk->sk_allocation);
+ skb = alloc_skb(header_len, gfp_mask);
if (skb) {
int npages;
int i;
@@ -1527,7 +1518,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
atomic_set(&sk->sk_refcnt, 1);
}
-void fastcall lock_sock(struct sock *sk)
+void fastcall lock_sock_nested(struct sock *sk, int subclass)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
@@ -1538,11 +1529,11 @@ void fastcall lock_sock(struct sock *sk)
/*
* The sk_lock has mutex_lock() semantics here:
*/
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
local_bh_enable();
}
-EXPORT_SYMBOL(lock_sock);
+EXPORT_SYMBOL(lock_sock_nested);
void fastcall release_sock(struct sock *sk)
{
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 02534131d88..1e75b158546 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -21,10 +21,6 @@ extern __u32 sysctl_rmem_max;
extern int sysctl_core_destroy_delay;
-#ifdef CONFIG_NET_DIVERT
-extern char sysctl_divert_version[];
-#endif /* CONFIG_NET_DIVERT */
-
#ifdef CONFIG_XFRM
extern u32 sysctl_xfrm_aevent_etime;
extern u32 sysctl_xfrm_aevent_rseqth;
@@ -105,16 +101,6 @@ ctl_table core_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
-#ifdef CONFIG_NET_DIVERT
- {
- .ctl_name = NET_CORE_DIVERT_VERSION,
- .procname = "divert_version",
- .data = (void *)sysctl_divert_version,
- .maxlen = 32,
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
-#endif /* CONFIG_NET_DIVERT */
#ifdef CONFIG_XFRM
{
.ctl_name = NET_CORE_AEVENT_ETIME,
diff --git a/net/core/utils.c b/net/core/utils.c
index d93fe64f669..61556065f07 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -88,7 +88,7 @@ EXPORT_SYMBOL(in_aton);
#define IN6PTON_NULL 0x20000000 /* first/tail */
#define IN6PTON_UNKNOWN 0x40000000
-static inline int digit2bin(char c, char delim)
+static inline int digit2bin(char c, int delim)
{
if (c == delim || c == '\0')
return IN6PTON_DELIM;
@@ -99,7 +99,7 @@ static inline int digit2bin(char c, char delim)
return IN6PTON_UNKNOWN;
}
-static inline int xdigit2bin(char c, char delim)
+static inline int xdigit2bin(char c, int delim)
{
if (c == delim || c == '\0')
return IN6PTON_DELIM;
@@ -113,12 +113,14 @@ static inline int xdigit2bin(char c, char delim)
return (IN6PTON_XDIGIT | (c - 'a' + 10));
if (c >= 'A' && c <= 'F')
return (IN6PTON_XDIGIT | (c - 'A' + 10));
+ if (delim == -1)
+ return IN6PTON_DELIM;
return IN6PTON_UNKNOWN;
}
int in4_pton(const char *src, int srclen,
u8 *dst,
- char delim, const char **end)
+ int delim, const char **end)
{
const char *s;
u8 *d;
@@ -173,7 +175,7 @@ EXPORT_SYMBOL(in4_pton);
int in6_pton(const char *src, int srclen,
u8 *dst,
- char delim, const char **end)
+ int delim, const char **end)
{
const char *s, *tok = NULL;
u8 *d, *dc = NULL;
diff --git a/net/core/wireless.c b/net/core/wireless.c
index ffff0da46c6..f69ab7b4408 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -748,11 +748,39 @@ static int ioctl_standard_call(struct net_device * dev,
int extra_size;
int user_length = 0;
int err;
+ int essid_compat = 0;
/* Calculate space needed by arguments. Always allocate
* for max space. Easier, and won't last long... */
extra_size = descr->max_tokens * descr->token_size;
+ /* Check need for ESSID compatibility for WE < 21 */
+ switch (cmd) {
+ case SIOCSIWESSID:
+ case SIOCGIWESSID:
+ case SIOCSIWNICKN:
+ case SIOCGIWNICKN:
+ if (iwr->u.data.length == descr->max_tokens + 1)
+ essid_compat = 1;
+ else if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+ char essid[IW_ESSID_MAX_SIZE + 1];
+
+ err = copy_from_user(essid, iwr->u.data.pointer,
+ iwr->u.data.length *
+ descr->token_size);
+ if (err)
+ return -EFAULT;
+
+ if (essid[iwr->u.data.length - 1] == '\0')
+ essid_compat = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ iwr->u.data.length -= essid_compat;
+
/* Check what user space is giving us */
if(IW_IS_SET(cmd)) {
/* Check NULL pointer */
@@ -795,7 +823,8 @@ static int ioctl_standard_call(struct net_device * dev,
#endif /* WE_IOCTL_DEBUG */
/* Create the kernel buffer */
- extra = kmalloc(extra_size, GFP_KERNEL);
+ /* kzalloc ensures NULL-termination for essid_compat */
+ extra = kzalloc(extra_size, GFP_KERNEL);
if (extra == NULL) {
return -ENOMEM;
}
@@ -819,6 +848,8 @@ static int ioctl_standard_call(struct net_device * dev,
/* Call the handler */
ret = handler(dev, &info, &(iwr->u), extra);
+ iwr->u.data.length += essid_compat;
+
/* If we have something to return to the user */
if (!ret && IW_IS_GET(cmd)) {
/* Check if there is enough buffer up there */
@@ -2099,7 +2130,7 @@ int iw_handler_set_spy(struct net_device * dev,
* The rtnl_lock() make sure we don't race with the other iw_handlers.
* This make sure wireless_spy_update() "see" that the spy list
* is temporarily disabled. */
- wmb();
+ smp_wmb();
/* Are there are addresses to copy? */
if(wrqu->data.length > 0) {
@@ -2128,7 +2159,7 @@ int iw_handler_set_spy(struct net_device * dev,
}
/* Make sure above is updated before re-enabling */
- wmb();
+ smp_wmb();
/* Enable addresses */
spydata->spy_number = wrqu->data.length;
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index e2a095d0fd8..b8a68dd4100 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -4,15 +4,15 @@ menu "DCCP Configuration (EXPERIMENTAL)"
config IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
---help---
- Datagram Congestion Control Protocol
+ Datagram Congestion Control Protocol (RFC 4340)
- From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
+ From http://www.ietf.org/rfc/rfc4340.txt:
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
congestion-controlled, unreliable datagrams. It should be suitable
for use by applications such as streaming media, Internet telephony,
- and on-line games
+ and on-line games.
To compile this protocol support as a module, choose M here: the
module will be called dccp.
@@ -38,6 +38,9 @@ config IP_DCCP_DEBUG
---help---
Only use this if you're hacking DCCP.
+ When compiling DCCP as a module, this debugging output can be toggled
+ by setting the parameter dccp_debug of the `dccp' module to 0 or 1.
+
Just say N.
config NET_DCCPPROBE
@@ -49,7 +52,7 @@ config NET_DCCPPROBE
DCCP congestion avoidance modules. If you don't understand
what was just said, you don't need it: say N.
- Documentation on how to use the packet generator can be found
+ Documentation on how to use DCCP connection probing can be found
at http://linux-net.osdl.org/index.php/DccpProbe
To compile this code as a module, choose M here: the
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 17ed99c4661..f4f8793aaff 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,13 +1,13 @@
-obj-$(CONFIG_IPV6) += dccp_ipv6.o
-
-dccp_ipv6-y := ipv6.o
-
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
dccp_ipv4-y := ipv4.o
+# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
+obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
+dccp_ipv6-y := ipv6.o
+
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 4d176d33983..a086c6312d3 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -21,8 +21,8 @@
#include <net/sock.h>
-static kmem_cache_t *dccp_ackvec_slab;
-static kmem_cache_t *dccp_ackvec_record_slab;
+static struct kmem_cache *dccp_ackvec_slab;
+static struct kmem_cache *dccp_ackvec_record_slab;
static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
{
@@ -67,15 +67,16 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT tx: " : "server tx: ";
-#endif
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
- int len = av->dccpav_vec_len + 2;
+ /* Figure out how many options do we need to represent the ackvec */
+ const u16 nr_opts = (av->dccpav_vec_len +
+ DCCP_MAX_ACKVEC_OPT_LEN - 1) /
+ DCCP_MAX_ACKVEC_OPT_LEN;
+ u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
struct timeval now;
u32 elapsed_time;
- unsigned char *to, *from;
+ const unsigned char *tail, *from;
+ unsigned char *to;
struct dccp_ackvec_record *avr;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
@@ -94,26 +95,39 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
- to = skb_push(skb, len);
- *to++ = DCCPO_ACK_VECTOR_0;
- *to++ = len;
-
+ to = skb_push(skb, len);
len = av->dccpav_vec_len;
from = av->dccpav_buf + av->dccpav_buf_head;
+ tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN;
+
+ for (i = 0; i < nr_opts; ++i) {
+ int copylen = len;
+
+ if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+ copylen = DCCP_MAX_ACKVEC_OPT_LEN;
- /* Check if buf_head wraps */
- if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) {
- const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head;
+ *to++ = DCCPO_ACK_VECTOR_0;
+ *to++ = copylen + 2;
- memcpy(to, from, tailsize);
- to += tailsize;
- len -= tailsize;
- from = av->dccpav_buf;
+ /* Check if buf_head wraps */
+ if (from + copylen > tail) {
+ const u16 tailsize = tail - from;
+
+ memcpy(to, from, tailsize);
+ to += tailsize;
+ len -= tailsize;
+ copylen -= tailsize;
+ from = av->dccpav_buf;
+ }
+
+ memcpy(to, from, copylen);
+ from += copylen;
+ to += copylen;
+ len -= copylen;
}
- memcpy(to, from, len);
/*
- * From draft-ietf-dccp-spec-11.txt:
+ * From RFC 4340, A.2:
*
* For each acknowledgement it sends, the HC-Receiver will add an
* acknowledgement record. ack_seqno will equal the HC-Receiver
@@ -129,9 +143,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
dccp_ackvec_insert_avr(av, avr);
- dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+ dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu\n",
- debug_prefix, avr->dccpavr_sent_len,
+ dccp_role(sk), avr->dccpavr_sent_len,
(unsigned long long)avr->dccpavr_ack_seqno,
(unsigned long long)avr->dccpavr_ack_ackno);
return 0;
@@ -145,7 +159,6 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
- av->dccpav_ack_ptr = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
av->dccpav_vec_len = 0;
@@ -174,13 +187,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
}
static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
- const u8 index)
+ const u32 index)
{
return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
}
static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
- const u8 index)
+ const u32 index)
{
return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
}
@@ -210,7 +223,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
gap = -new_head;
}
new_head += DCCP_MAX_ACKVEC_LEN;
- }
+ }
av->dccpav_buf_head = new_head;
@@ -224,7 +237,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
}
/*
- * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ * Implements the RFC 4340, Appendix A
*/
int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
const u64 ackno, const u8 state)
@@ -237,7 +250,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
* We may well decide to do buffer compression, etc, but for now lets
* just drop.
*
- * From Appendix A:
+ * From Appendix A.1.1 (`New Packets'):
*
* Of course, the circular buffer may overflow, either when the
* HC-Sender is sending data at a very high rate, when the
@@ -274,13 +287,13 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
/*
* A.1.2. Old Packets
*
- * When a packet with Sequence Number S arrives, and
- * S <= buf_ackno, the HC-Receiver will scan the table
- * for the byte corresponding to S. (Indexing structures
+ * When a packet with Sequence Number S <= buf_ackno
+ * arrives, the HC-Receiver will scan the table for
+ * the byte corresponding to S. (Indexing structures
* could reduce the complexity of this scan.)
*/
u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
- u8 index = av->dccpav_buf_head;
+ u32 index = av->dccpav_buf_head;
while (1) {
const u8 len = dccp_ackvec_len(av, index);
@@ -322,21 +335,18 @@ out_duplicate:
#ifdef CONFIG_IP_DCCP_DEBUG
void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
{
- if (!dccp_debug)
- return;
-
- printk("ACK vector len=%d, ackno=%llu |", len,
- (unsigned long long)ackno);
+ dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len,
+ (unsigned long long)ackno);
while (len--) {
const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
- printk("%d,%d|", state, rl);
+ dccp_pr_debug_cat("%d,%d|", state, rl);
++vector;
}
- printk("\n");
+ dccp_pr_debug_cat("\n");
}
void dccp_ackvec_print(const struct dccp_ackvec *av)
@@ -380,24 +390,20 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
*/
list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
if (ackno == avr->dccpavr_ack_seqno) {
-#ifdef CONFIG_IP_DCCP_DEBUG
- struct dccp_sock *dp = dccp_sk(sk);
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx ack: " : "server rx ack: ";
-#endif
- dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+ dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu, ACKED!\n",
- debug_prefix, 1,
+ dccp_role(sk), 1,
(unsigned long long)avr->dccpavr_ack_seqno,
(unsigned long long)avr->dccpavr_ack_ackno);
dccp_ackvec_throw_record(av, avr);
break;
- }
+ } else if (avr->dccpavr_ack_seqno > ackno)
+ break; /* old news */
}
}
static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
- struct sock *sk, u64 ackno,
+ struct sock *sk, u64 *ackno,
const unsigned char len,
const unsigned char *vector)
{
@@ -420,7 +426,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
u64 ackno_end_rl;
- dccp_set_seqno(&ackno_end_rl, ackno - rl);
+ dccp_set_seqno(&ackno_end_rl, *ackno - rl);
/*
* If our AVR sequence number is greater than the ack, go
@@ -428,25 +434,19 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
*/
list_for_each_entry_from(avr, &av->dccpav_records,
dccpavr_node) {
- if (!after48(avr->dccpavr_ack_seqno, ackno))
+ if (!after48(avr->dccpavr_ack_seqno, *ackno))
goto found;
}
/* End of the dccpav_records list, not found, exit */
break;
found:
- if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
+ if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) {
const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
-#ifdef CONFIG_IP_DCCP_DEBUG
- struct dccp_sock *dp = dccp_sk(sk);
- const char *debug_prefix =
- dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx ack: " : "server rx ack: ";
-#endif
- dccp_pr_debug("%sACK vector 0, len=%d, "
+ dccp_pr_debug("%s ACK vector 0, len=%d, "
"ack_seqno=%llu, ack_ackno=%llu, "
"ACKED!\n",
- debug_prefix, len,
+ dccp_role(sk), len,
(unsigned long long)
avr->dccpavr_ack_seqno,
(unsigned long long)
@@ -460,27 +460,23 @@ found:
*/
}
- dccp_set_seqno(&ackno, ackno_end_rl - 1);
+ dccp_set_seqno(ackno, ackno_end_rl - 1);
++vector;
}
}
int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u8 opt, const u8 *value, const u8 len)
+ u64 *ackno, const u8 opt, const u8 *value, const u8 len)
{
- if (len > DCCP_MAX_ACKVEC_LEN)
+ if (len > DCCP_MAX_ACKVEC_OPT_LEN)
return -1;
/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_ack_seq,
- len, value);
+ ackno, len, value);
return 0;
}
-static char dccp_ackvec_slab_msg[] __initdata =
- KERN_CRIT "DCCP: Unable to create ack vectors slab caches\n";
-
int __init dccp_ackvec_init(void)
{
dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
@@ -502,7 +498,7 @@ out_destroy_slab:
kmem_cache_destroy(dccp_ackvec_slab);
dccp_ackvec_slab = NULL;
out_err:
- printk(dccp_ackvec_slab_msg);
+ DCCP_CRIT("Unable to create Ack Vector slab cache");
return -ENOBUFS;
}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 2424effac7f..96504a3b16e 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -17,7 +17,9 @@
#include <linux/types.h>
/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_LEN 253
+#define DCCP_MAX_ACKVEC_OPT_LEN 253
+/* We can spread an ack vector across multiple options */
+#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
#define DCCP_ACKVEC_STATE_RECEIVED 0
#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
@@ -28,8 +30,7 @@
/** struct dccp_ackvec - ack vector
*
- * This data structure is the one defined in the DCCP draft
- * Appendix A.
+ * This data structure is the one defined in RFC 4340, Appendix A.
*
* @dccpav_buf_head - circular buffer head
* @dccpav_buf_tail - circular buffer tail
@@ -42,7 +43,6 @@
* Ack Vectors it has recently sent. For each packet sent carrying an
* Ack Vector, it remembers four variables:
*
- * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
* @dccpav_records - list of dccp_ackvec_record
* @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
*
@@ -53,9 +53,8 @@ struct dccp_ackvec {
u64 dccpav_buf_ackno;
struct list_head dccpav_records;
struct timeval dccpav_time;
- u8 dccpav_buf_head;
- u8 dccpav_ack_ptr;
- u8 dccpav_vec_len;
+ u16 dccpav_buf_head;
+ u16 dccpav_vec_len;
u8 dccpav_buf_nonce;
u8 dccpav_ack_nonce;
u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN];
@@ -78,9 +77,9 @@ struct dccp_ackvec_record {
struct list_head dccpavr_node;
u64 dccpavr_ack_seqno;
u64 dccpavr_ack_ackno;
- u8 dccpavr_ack_ptr;
+ u16 dccpavr_ack_ptr;
+ u16 dccpavr_sent_len;
u8 dccpavr_ack_nonce;
- u8 dccpavr_sent_len;
};
struct sock;
@@ -99,7 +98,8 @@ extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
struct sock *sk, const u64 ackno);
extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u8 opt, const u8 *value, const u8 len);
+ u64 *ackno, const u8 opt,
+ const u8 *value, const u8 len);
extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
@@ -138,7 +138,8 @@ static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
}
static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u8 opt, const u8 *value, const u8 len)
+ const u64 *ackno, const u8 opt,
+ const u8 *value, const u8 len)
{
return -1;
}
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index ff05e59043c..d8cf92f09e6 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -55,9 +55,9 @@ static inline void ccids_read_unlock(void)
#define ccids_read_unlock() do { } while(0)
#endif
-static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
+static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
{
- kmem_cache_t *slab;
+ struct kmem_cache *slab;
char slab_name_fmt[32], *slab_name;
va_list args;
@@ -75,7 +75,7 @@ static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
return slab;
}
-static void ccid_kmem_cache_destroy(kmem_cache_t *slab)
+static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
{
if (slab != NULL) {
const char *name = kmem_cache_name(slab);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index f7eb6c61341..c65cb2453e4 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -27,9 +27,9 @@ struct ccid_operations {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
- kmem_cache_t *ccid_hc_rx_slab;
+ struct kmem_cache *ccid_hc_rx_slab;
__u32 ccid_hc_rx_obj_size;
- kmem_cache_t *ccid_hc_tx_slab;
+ struct kmem_cache *ccid_hc_tx_slab;
__u32 ccid_hc_tx_obj_size;
int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
@@ -43,8 +43,6 @@ struct ccid_operations {
unsigned char* value);
int (*ccid_hc_rx_insert_options)(struct sock *sk,
struct sk_buff *skb);
- int (*ccid_hc_tx_insert_options)(struct sock *sk,
- struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
int (*ccid_hc_tx_parse_options)(struct sock *sk,
@@ -52,9 +50,9 @@ struct ccid_operations {
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
- struct sk_buff *skb, int len);
- void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
- int len);
+ struct sk_buff *skb);
+ void (*ccid_hc_tx_packet_sent)(struct sock *sk,
+ int more, unsigned int len);
void (*ccid_hc_rx_get_info)(struct sock *sk,
struct tcp_info *info);
void (*ccid_hc_tx_get_info)(struct sock *sk,
@@ -94,16 +92,16 @@ extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb, int len)
+ struct sk_buff *skb)
{
int rc = 0;
if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
- rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb, len);
+ rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
- int more, int len)
+ int more, unsigned int len)
{
if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
@@ -146,14 +144,6 @@ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
return rc;
}
-static inline int ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
-{
- if (ccid->ccid_ops->ccid_hc_tx_insert_options != NULL)
- return ccid->ccid_ops->ccid_hc_tx_insert_options(sk, skb);
- return 0;
-}
-
static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 32752f75044..80f46988769 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -22,19 +22,26 @@ config IP_DCCP_CCID2
for lost packets, would prefer CCID 2 to CCID 3. On-line games may
also prefer CCID 2.
- CCID 2 is further described in:
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid2-10.txt
+ CCID 2 is further described in RFC 4341,
+ http://www.ietf.org/rfc/rfc4341.txt
- This text was extracted from:
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
+ This text was extracted from RFC 4340 (sec. 10.1),
+ http://www.ietf.org/rfc/rfc4340.txt
+
+ To compile this CCID as a module, choose M here: the module will be
+ called dccp_ccid2.
If in doubt, say M.
config IP_DCCP_CCID2_DEBUG
- bool "CCID2 debug"
+ bool "CCID2 debugging messages"
depends on IP_DCCP_CCID2
---help---
- Enable CCID2 debug messages.
+ Enable CCID2-specific debugging messages.
+
+ When compiling CCID2 as a module, this debugging output can
+ additionally be toggled by setting the ccid2_debug module
+ parameter to 0 or 1.
If in doubt, say N.
@@ -53,20 +60,66 @@ config IP_DCCP_CCID3
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
- CCID 3 is further described in:
-
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid3-11.txt.
+ CCID 3 is further described in RFC 4342,
+ http://www.ietf.org/rfc/rfc4342.txt
The TFRC congestion control algorithms were initially described in
RFC 3448.
- This text was extracted from:
- http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
+ This text was extracted from RFC 4340 (sec. 10.2),
+ http://www.ietf.org/rfc/rfc4340.txt
+ To compile this CCID as a module, choose M here: the module will be
+ called dccp_ccid3.
+
If in doubt, say M.
config IP_DCCP_TFRC_LIB
depends on IP_DCCP_CCID3
def_tristate IP_DCCP_CCID3
+config IP_DCCP_CCID3_DEBUG
+ bool "CCID3 debugging messages"
+ depends on IP_DCCP_CCID3
+ ---help---
+ Enable CCID3-specific debugging messages.
+
+ When compiling CCID3 as a module, this debugging output can
+ additionally be toggled by setting the ccid3_debug module
+ parameter to 0 or 1.
+
+ If in doubt, say N.
+
+config IP_DCCP_CCID3_RTO
+ int "Use higher bound for nofeedback timer"
+ default 100
+ depends on IP_DCCP_CCID3 && EXPERIMENTAL
+ ---help---
+ Use higher lower bound for nofeedback timer expiration.
+
+ The TFRC nofeedback timer normally expires after the maximum of 4
+ RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
+ with a small RTT this can mean a high processing load and reduced
+ performance, since then the nofeedback timer is triggered very
+ frequently.
+
+ This option enables to set a higher lower bound for the nofeedback
+ value. Values in units of milliseconds can be set here.
+
+ A value of 0 disables this feature by enforcing the value specified
+ in RFC 3448. The following values have been suggested as bounds for
+ experimental use:
+ * 16-20ms to match the typical multimedia inter-frame interval
+ * 100ms as a reasonable compromise [default]
+ * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
+
+ The default of 100ms is a compromise between a large value for
+ efficient DCCP implementations, and a small value to avoid disrupting
+ the network in times of congestion.
+
+ The purpose of the nofeedback timer is to slow DCCP down when there
+ is serious network congestion: experimenting with larger values should
+ therefore not be performed on WANs.
+
+
endmenu
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 2efb505aeb3..fd38b05d6f7 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -23,7 +23,7 @@
*/
/*
- * This implementation should follow: draft-ietf-dccp-ccid2-10.txt
+ * This implementation should follow RFC 4341
*
* BUGS:
* - sequence number wrapping
@@ -33,18 +33,11 @@
#include "../dccp.h"
#include "ccid2.h"
-static int ccid2_debug;
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-#define ccid2_pr_debug(format, a...) \
- do { if (ccid2_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
- } while (0)
-#else
-#define ccid2_pr_debug(format, a...)
-#endif
+static int ccid2_debug;
+#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
-#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
{
int len = 0;
@@ -86,7 +79,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
}
#else
-#define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
+#define ccid2_pr_debug(format, a...)
+#define ccid2_hc_tx_check_sanity(hctx)
#endif
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
@@ -131,8 +125,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
return 0;
}
-static int ccid2_hc_tx_send_packet(struct sock *sk,
- struct sk_buff *skb, int len)
+static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
struct ccid2_hc_tx_sock *hctx;
@@ -274,7 +267,7 @@ static void ccid2_start_rto_timer(struct sock *sk)
jiffies + hctx->ccid2hctx_rto);
}
-static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
+static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
@@ -352,14 +345,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
- ccid2_pr_debug("Sent: seq=%llu\n", seq);
+ ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
do {
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
while (seqp != hctx->ccid2hctx_seqh) {
ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
- seqp->ccid2s_seq, seqp->ccid2s_acked,
- seqp->ccid2s_sent);
+ (unsigned long long)seqp->ccid2s_seq,
+ seqp->ccid2s_acked, seqp->ccid2s_sent);
seqp = seqp->ccid2s_next;
}
} while (0);
@@ -426,7 +419,7 @@ static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
return -1;
out_invalid_option:
- BUG_ON(1); /* should never happen... options were previously parsed ! */
+ DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
return -1;
}
@@ -480,7 +473,8 @@ static inline void ccid2_new_ack(struct sock *sk,
/* first measurement */
if (hctx->ccid2hctx_srtt == -1) {
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
- r, jiffies, seqp->ccid2s_seq);
+ r, jiffies,
+ (unsigned long long)seqp->ccid2s_seq);
ccid2_change_srtt(hctx, r);
hctx->ccid2hctx_rttvar = r >> 1;
} else {
@@ -524,8 +518,8 @@ static inline void ccid2_new_ack(struct sock *sk,
hctx->ccid2hctx_lastrtt = jiffies;
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
- hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
- hctx->ccid2hctx_rto, HZ, r);
+ hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
+ hctx->ccid2hctx_rto, HZ, r);
hctx->ccid2hctx_sent = 0;
}
@@ -618,7 +612,17 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
- seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ if (after48(ackno, hctx->ccid2hctx_high_ack))
+ hctx->ccid2hctx_high_ack = ackno;
+
+ seqp = hctx->ccid2hctx_seqt;
+ while (before48(seqp->ccid2s_seq, ackno)) {
+ seqp = seqp->ccid2s_next;
+ if (seqp == hctx->ccid2hctx_seqh) {
+ seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ break;
+ }
+ }
/* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
* this single ack. I round up.
@@ -636,8 +640,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
u64 ackno_end_rl;
dccp_set_seqno(&ackno_end_rl, ackno - rl);
- ccid2_pr_debug("ackvec start:%llu end:%llu\n", ackno,
- ackno_end_rl);
+ ccid2_pr_debug("ackvec start:%llu end:%llu\n",
+ (unsigned long long)ackno,
+ (unsigned long long)ackno_end_rl);
/* if the seqno we are analyzing is larger than the
* current ackno, then move towards the tail of our
* seqnos.
@@ -662,9 +667,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* new packet received or marked */
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
!seqp->ccid2s_acked) {
- if (state ==
+ if (state ==
DCCP_ACKVEC_STATE_ECN_MARKED) {
- ccid2_congestion_event(hctx,
+ ccid2_congestion_event(hctx,
seqp);
} else
ccid2_new_ack(sk, seqp,
@@ -672,7 +677,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
seqp->ccid2s_acked = 1;
ccid2_pr_debug("Got ack for %llu\n",
- seqp->ccid2s_seq);
+ (unsigned long long)seqp->ccid2s_seq);
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->ccid2hctx_seqt) {
@@ -695,7 +700,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* The state about what is acked should be correct now
* Check for NUMDUPACK
*/
- seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ seqp = hctx->ccid2hctx_seqt;
+ while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
+ seqp = seqp->ccid2s_next;
+ if (seqp == hctx->ccid2hctx_seqh) {
+ seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ break;
+ }
+ }
done = 0;
while (1) {
if (seqp->ccid2s_acked) {
@@ -718,7 +730,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
while (1) {
if (!seqp->ccid2s_acked) {
ccid2_pr_debug("Packet lost: %llu\n",
- seqp->ccid2s_seq);
+ (unsigned long long)seqp->ccid2s_seq);
/* XXX need to traverse from tail -> head in
* order to detect multiple congestion events in
* one ack vector.
@@ -769,6 +781,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hctx->ccid2hctx_lastrtt = 0;
hctx->ccid2hctx_rpdupack = -1;
hctx->ccid2hctx_last_cong = jiffies;
+ hctx->ccid2hctx_high_ack = 0;
hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
@@ -821,8 +834,10 @@ static struct ccid_operations ccid2 = {
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
};
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
module_param(ccid2_debug, int, 0444);
MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
+#endif
static __init int ccid2_module_init(void)
{
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 5b2ef4acb30..ebd79499c85 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,7 +35,7 @@ struct ccid2_seq {
struct ccid2_seq *ccid2s_next;
};
-#define CCID2_SEQBUF_LEN 256
+#define CCID2_SEQBUF_LEN 1024
#define CCID2_SEQBUF_MAX 128
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
@@ -72,6 +72,7 @@ struct ccid2_hc_tx_sock {
int ccid2hctx_rpdupack;
int ccid2hctx_sendwait;
unsigned long ccid2hctx_last_cong;
+ u64 ccid2hctx_high_ack;
};
struct ccid2_hc_rx_sock {
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 67d2dc0e7c6..40402c59506 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -41,32 +41,9 @@
#include "lib/tfrc.h"
#include "ccid3.h"
-/*
- * Reason for maths here is to avoid 32 bit overflow when a is big.
- * With this we get close to the limit.
- */
-static u32 usecs_div(const u32 a, const u32 b)
-{
- const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 :
- a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 :
- a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 :
- a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 :
- a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 :
- a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 :
- a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 :
- a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 :
- 100000;
- const u32 tmp = a * (USEC_PER_SEC / div);
- return (b >= 2 * div) ? tmp / (b / div) : tmp;
-}
-
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static int ccid3_debug;
-
-#ifdef CCID3_DEBUG
-#define ccid3_pr_debug(format, a...) \
- do { if (ccid3_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
- } while (0)
+#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a)
#else
#define ccid3_pr_debug(format, a...)
#endif
@@ -75,15 +52,7 @@ static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
static struct dccp_li_hist *ccid3_li_hist;
-/* TFRC sender states */
-enum ccid3_hc_tx_states {
- TFRC_SSTATE_NO_SENT = 1,
- TFRC_SSTATE_NO_FBACK,
- TFRC_SSTATE_FBACK,
- TFRC_SSTATE_TERM,
-};
-
-#ifdef CCID3_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
static char *ccid3_state_names[] = {
@@ -110,325 +79,319 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
hctx->ccid3hctx_state = state;
}
-/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
-static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
+/*
+ * Recalculate scheduled nominal send time t_nom, inter-packet interval
+ * t_ipi, and delta value. Should be called after each change to X.
+ */
+static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
{
- /*
- * If no feedback spec says t_ipi is 1 second (set elsewhere and then
- * doubles after every no feedback timer (separate function)
- */
- if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
- hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x);
-}
+ timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
-/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
-static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
-{
+ /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+ hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_x >> 6);
+
+ /* Update nominal send time with regard to the new t_ipi */
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+
+ /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
TFRC_OPSYS_HALF_TIME_GRAN);
}
-
/*
* Update X by
* If (p > 0)
- * x_calc = calcX(s, R, p);
+ * X_calc = calcX(s, R, p);
* X = max(min(X_calc, 2 * X_recv), s / t_mbi);
* Else
* If (now - tld >= R)
* X = max(min(2 * X, 2 * X_recv), s / R);
* tld = now;
- */
-static void ccid3_hc_tx_update_x(struct sock *sk)
+ *
+ * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
+ * fine-grained resolution of sending rates. This requires scaling by 2^6
+ * throughout the code. Only X_calc is unscaled (in bytes/second).
+ *
+ * If X has changed, we also update the scheduled send time t_now,
+ * the inter-packet interval t_ipi, and the delta value.
+ */
+static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
+
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ const __u64 old_x = hctx->ccid3hctx_x;
- /* To avoid large error in calcX */
- if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
- hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
- hctx->ccid3hctx_rtt,
- hctx->ccid3hctx_p);
- hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
- 2 * hctx->ccid3hctx_x_recv),
- (hctx->ccid3hctx_s /
- TFRC_MAX_BACK_OFF_TIME));
- } else {
- struct timeval now;
+ if (hctx->ccid3hctx_p > 0) {
- dccp_timestamp(sk, &now);
- if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
- hctx->ccid3hctx_rtt) {
- hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
- hctx->ccid3hctx_x) * 2,
- usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_rtt));
- hctx->ccid3hctx_t_ld = now;
- }
+ hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
+ hctx->ccid3hctx_x_recv * 2);
+ hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ TFRC_T_MBI);
+
+ } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
+ (suseconds_t)hctx->ccid3hctx_rtt >= 0) {
+
+ hctx->ccid3hctx_x =
+ max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
+ scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
+ hctx->ccid3hctx_rtt));
+ hctx->ccid3hctx_t_ld = *now;
+ }
+
+ if (hctx->ccid3hctx_x != old_x)
+ ccid3_update_send_time(hctx);
+}
+
+/*
+ * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1)
+ * @len: DCCP packet payload size in bytes
+ */
+static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
+{
+ if (unlikely(len == 0))
+ ccid3_pr_debug("Packet payload length is 0 - not updating\n");
+ else
+ hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
+ (9 * hctx->ccid3hctx_s + len) / 10;
+ /*
+ * Note: We could do a potential optimisation here - when `s' changes,
+ * recalculate sending rate and consequently t_ipi, t_delta, and
+ * t_now. This is however non-standard, and the benefits are not
+ * clear, so it is currently left out.
+ */
+}
+
+/*
+ * Update Window Counter using the algorithm from [RFC 4342, 8.1].
+ * The algorithm is not applicable if RTT < 4 microseconds.
+ */
+static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
+ struct timeval *now)
+{
+ suseconds_t delta;
+ u32 quarter_rtts;
+
+ if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */
+ return;
+
+ delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
+ DCCP_BUG_ON(delta < 0);
+
+ quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);
+
+ if (quarter_rtts > 0) {
+ hctx->ccid3hctx_t_last_win_count = *now;
+ hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5);
+ hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */
+
+ ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
}
}
static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
- unsigned long next_tmout = 0;
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
/* XXX: set some sensible MIB */
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + HZ / 5);
- goto out;
+ goto restart_timer;
}
- ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
+ ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
-
+
switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_TERM:
- goto out;
case TFRC_SSTATE_NO_FBACK:
- /* Halve send rate */
- hctx->ccid3hctx_x /= 2;
- if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
- TFRC_MAX_BACK_OFF_TIME))
- hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
- TFRC_MAX_BACK_OFF_TIME);
-
- ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
- "bytes/s\n",
- dccp_role(sk), sk,
+ /* RFC 3448, 4.4: Halve send rate directly */
+ hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ TFRC_T_MBI);
+
+ ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
+ "bytes/s\n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state),
- hctx->ccid3hctx_x);
- next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x),
- TFRC_INITIAL_TIMEOUT);
- /*
- * FIXME - not sure above calculation is correct. See section
- * 5 of CCID3 11 should adjust tx_t_ipi and double that to
- * achieve it really
- */
+ (unsigned)(hctx->ccid3hctx_x >> 6));
+ /* The value of R is still undefined and so we can not recompute
+ * the timout value. Keep initial value as per [RFC 4342, 5]. */
+ t_nfb = TFRC_INITIAL_TIMEOUT;
+ ccid3_update_send_time(hctx);
break;
case TFRC_SSTATE_FBACK:
/*
* Check if IDLE since last timeout and recv rate is less than
- * 4 packets per RTT
+ * 4 packets (in units of 64*bytes/sec) per RTT
*/
if (!hctx->ccid3hctx_idle ||
- (hctx->ccid3hctx_x_recv >=
- 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
- ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
+ (hctx->ccid3hctx_x_recv >= 4 *
+ scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
+ hctx->ccid3hctx_rtt))) {
+ struct timeval now;
+
+ ccid3_pr_debug("%s(%p, state=%s), not idle\n",
dccp_role(sk), sk,
- ccid3_tx_state_name(hctx->ccid3hctx_state));
- /* Halve sending rate */
+ ccid3_tx_state_name(hctx->ccid3hctx_state));
- /* If (X_calc > 2 * X_recv)
+ /*
+ * Modify the cached value of X_recv [RFC 3448, 4.4]
+ *
+ * If (p == 0 || X_calc > 2 * X_recv)
* X_recv = max(X_recv / 2, s / (2 * t_mbi));
* Else
* X_recv = X_calc / 4;
+ *
+ * Note that X_recv is scaled by 2^6 while X_calc is not
*/
- BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
- hctx->ccid3hctx_x_calc == 0);
-
- /* check also if p is zero -> x_calc is infinity? */
- if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
- hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
- hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
- hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
- else
- hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
-
- /* Update sending rate */
- ccid3_hc_tx_update_x(sk);
+ BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
+
+ if (hctx->ccid3hctx_p == 0 ||
+ (hctx->ccid3hctx_x_calc >
+ (hctx->ccid3hctx_x_recv >> 5))) {
+
+ hctx->ccid3hctx_x_recv =
+ max(hctx->ccid3hctx_x_recv / 2,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ (2 * TFRC_T_MBI));
+
+ if (hctx->ccid3hctx_p == 0)
+ dccp_timestamp(sk, &now);
+ } else {
+ hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+ hctx->ccid3hctx_x_recv <<= 4;
+ }
+ /* Now recalculate X [RFC 3448, 4.3, step (4)] */
+ ccid3_hc_tx_update_x(sk, &now);
}
/*
* Schedule no feedback timer to expire in
- * max(4 * R, 2 * s / X)
+ * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
+ * See comments in packet_recv() regarding the value of t_RTO.
*/
- next_tmout = max_t(u32, hctx->ccid3hctx_t_rto,
- 2 * usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x));
+ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
+ case TFRC_SSTATE_NO_SENT:
+ DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
+ /* fall through */
+ case TFRC_SSTATE_TERM:
goto out;
}
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
hctx->ccid3hctx_idle = 1;
+
+restart_timer:
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + usecs_to_jiffies(t_nfb));
out:
bh_unlock_sock(sk);
sock_put(sk);
}
-static int ccid3_hc_tx_send_packet(struct sock *sk,
- struct sk_buff *skb, int len)
+/*
+ * returns
+ * > 0: delay (in msecs) that should pass before actually sending
+ * = 0: can send immediately
+ * < 0: error condition; do not send packet
+ */
+static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- struct dccp_tx_hist_entry *new_packet;
struct timeval now;
- long delay;
- int rc = -ENOTCONN;
+ suseconds_t delay;
- BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
+ BUG_ON(hctx == NULL);
- /* Check if pure ACK or Terminating*/
/*
- * XXX: We only call this function for DATA and DATAACK, on, these
- * packets can have zero length, but why the comment about "pure ACK"?
+ * This function is called only for Data and DataAck packets. Sending
+ * zero-sized Data(Ack)s is theoretically possible, but for congestion
+ * control this case is pathological - ignore it.
*/
- if (unlikely(len == 0))
- goto out;
-
- /* See if last packet allocated was not sent */
- new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
- if (new_packet == NULL || new_packet->dccphtx_sent) {
- new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
- SLAB_ATOMIC);
-
- rc = -ENOBUFS;
- if (unlikely(new_packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough "
- "mem to add to history, send refused\n",
- __FUNCTION__, dccp_role(sk), sk);
- goto out;
- }
-
- dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
- }
+ if (unlikely(skb->len == 0))
+ return -EBADMSG;
dccp_timestamp(sk, &now);
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_SENT:
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
+ (jiffies +
+ usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
hctx->ccid3hctx_last_win_count = 0;
hctx->ccid3hctx_t_last_win_count = now;
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
- hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI;
- /* Set nominal send time for initial packet */
+ /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
+ ccid3_hc_tx_update_s(hctx, skb->len);
+ hctx->ccid3hctx_x = hctx->ccid3hctx_s;
+ hctx->ccid3hctx_x <<= 6;
+
+ /* First timeout, according to [RFC 3448, 4.2], is 1 second */
+ hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
+ /* Initial delta: minimum of 0.5 sec and t_gran/2 */
+ hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
+
+ /* Set t_0 for initial packet */
hctx->ccid3hctx_t_nom = now;
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_delta(hctx);
- rc = 0;
break;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
- delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
- hctx->ccid3hctx_delta);
- delay /= -1000;
- /* divide by -1000 is to convert to ms and get sign right */
- rc = delay > 0 ? delay : 0;
- break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
- rc = -EINVAL;
+ delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+ /*
+ * Scheduling of packet transmissions [RFC 3448, 4.6]
+ *
+ * if (t_now > t_nom - delta)
+ * // send the packet now
+ * else
+ * // send the packet in (t_nom - t_now) milliseconds.
+ */
+ if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
+ return delay / 1000L;
+
+ ccid3_hc_tx_update_win_count(hctx, &now);
break;
+ case TFRC_SSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+ return -EINVAL;
}
- /* Can we send? if so add options and add to packet history */
- if (rc == 0) {
- dp->dccps_hc_tx_insert_options = 1;
- new_packet->dccphtx_ccval =
- DCCP_SKB_CB(skb)->dccpd_ccval =
- hctx->ccid3hctx_last_win_count;
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- }
-out:
- return rc;
+ /* prepare to send now (add options etc.) */
+ dp->dccps_hc_tx_insert_options = 1;
+ DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+
+ /* set the nominal send time for the next following packet */
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+
+ return 0;
}
-static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
+ unsigned int len)
{
- const struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct timeval now;
+ struct dccp_tx_hist_entry *packet;
- BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
-
- dccp_timestamp(sk, &now);
-
- /* check if we have sent a data packet */
- if (len > 0) {
- unsigned long quarter_rtt;
- struct dccp_tx_hist_entry *packet;
-
- packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
- if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't "
- "exists in history!\n", __FUNCTION__);
- return;
- }
- if (unlikely(packet->dccphtx_sent)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in "
- "history!\n", __FUNCTION__);
- return;
- }
- packet->dccphtx_tstamp = now;
- packet->dccphtx_seqno = dp->dccps_gss;
- /*
- * Check if win_count have changed
- * Algorithm in "8.1. Window Counter Valuer" in
- * draft-ietf-dccp-ccid3-11.txt
- */
- quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
- if (likely(hctx->ccid3hctx_rtt > 8))
- quarter_rtt /= hctx->ccid3hctx_rtt / 4;
-
- if (quarter_rtt > 0) {
- hctx->ccid3hctx_t_last_win_count = now;
- hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
- min_t(unsigned long, quarter_rtt, 5)) % 16;
- ccid3_pr_debug("%s, sk=%p, window changed from "
- "%u to %u!\n",
- dccp_role(sk), sk,
- packet->dccphtx_ccval,
- hctx->ccid3hctx_last_win_count);
- }
+ BUG_ON(hctx == NULL);
- hctx->ccid3hctx_idle = 0;
- packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
- packet->dccphtx_sent = 1;
- } else
- ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
- dccp_role(sk), sk, dp->dccps_gss);
+ ccid3_hc_tx_update_s(hctx, len);
- switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_NO_SENT:
- /* if first wasn't pure ack */
- if (len != 0)
- printk(KERN_CRIT "%s: %s, First packet sent is noted "
- "as a data packet\n",
- __FUNCTION__, dccp_role(sk));
+ packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
+ if (unlikely(packet == NULL)) {
+ DCCP_CRIT("packet history - out of memory!");
return;
- case TFRC_SSTATE_NO_FBACK:
- case TFRC_SSTATE_FBACK:
- if (len > 0) {
- timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_t_ipi(hctx);
- ccid3_calc_new_delta(hctx);
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- }
- break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
- break;
}
+ dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);
+
+ dccp_timestamp(sk, &now);
+ packet->dccphtx_tstamp = now;
+ packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
+ packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
+ packet->dccphtx_sent = 1;
+ hctx->ccid3hctx_idle = 0;
}
static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -438,13 +401,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct ccid3_options_received *opt_recv;
struct dccp_tx_hist_entry *packet;
struct timeval now;
- unsigned long next_tmout;
- u32 t_elapsed;
+ unsigned long t_nfb;
u32 pinv;
- u32 x_recv;
- u32 r_sample;
+ suseconds_t r_sample, t_elapsed;
- BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
+ BUG_ON(hctx == NULL);
/* we are only interested in ACKs */
if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
@@ -453,41 +414,49 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
opt_recv = &hctx->ccid3hctx_options_received;
- t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
- x_recv = opt_recv->ccid3or_receive_rate;
- pinv = opt_recv->ccid3or_loss_event_rate;
-
switch (hctx->ccid3hctx_state) {
- case TFRC_SSTATE_NO_SENT:
- /* FIXME: what to do here? */
- return;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
- /* Calculate new round trip sample by
- * R_sample = (now - t_recvdata) - t_delay */
- /* get t_recvdata from history */
+ /* get packet from history to look up t_recvdata */
packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
- DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno "
- "%llu(%s) does't exist in history!\n",
- __FUNCTION__, dccp_role(sk), sk,
+ DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
+ "in history!\n", dccp_role(sk), sk,
(unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
return;
}
- /* Update RTT */
+ /* Update receive rate in units of 64 * bytes/second */
+ hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+ hctx->ccid3hctx_x_recv <<= 6;
+
+ /* Update loss event rate */
+ pinv = opt_recv->ccid3or_loss_event_rate;
+ if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
+ hctx->ccid3hctx_p = 0;
+ else /* can not exceed 100% */
+ hctx->ccid3hctx_p = 1000000 / pinv;
+
dccp_timestamp(sk, &now);
- r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
+
+ /*
+ * Calculate new round trip sample as per [RFC 3448, 4.3] by
+ * R_sample = (now - t_recvdata) - t_elapsed
+ */
+ r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
+ t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
+
+ DCCP_BUG_ON(r_sample < 0);
if (unlikely(r_sample <= t_elapsed))
- LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, "
- "t_elapsed=%uus\n",
- __FUNCTION__, r_sample, t_elapsed);
+ DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
+ (int)r_sample, (int)t_elapsed);
else
r_sample -= t_elapsed;
+ CCID3_RTT_SANITY_CHECK(r_sample);
- /* Update RTT estimate by
+ /* Update RTT estimate by
* If (No feedback recv)
* R = R_sample;
* Else
@@ -496,97 +465,96 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* q is a constant, RFC 3448 recomments 0.9
*/
if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
- hctx->ccid3hctx_rtt = r_sample;
- } else
- hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
- r_sample / 10;
-
- ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
- "r_sample=%us\n", dccp_role(sk), sk,
- hctx->ccid3hctx_rtt, r_sample);
-
- /* Update timeout interval */
- hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
- USEC_PER_SEC);
+ /*
+ * Larger Initial Windows [RFC 4342, sec. 5]
+ * We deviate in that we use `s' instead of `MSS'.
+ */
+ __u64 w_init = min(4 * hctx->ccid3hctx_s,
+ max(2 * hctx->ccid3hctx_s, 4380));
+ hctx->ccid3hctx_rtt = r_sample;
+ hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample);
+ hctx->ccid3hctx_t_ld = now;
- /* Update receive rate */
- hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
+ ccid3_update_send_time(hctx);
- /* Update loss event rate */
- if (pinv == ~0 || pinv == 0)
- hctx->ccid3hctx_p = 0;
- else {
- hctx->ccid3hctx_p = 1000000 / pinv;
+ ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
+ "R_sample=%dus, X=%u\n", dccp_role(sk),
+ sk, hctx->ccid3hctx_s, w_init,
+ (int)r_sample,
+ (unsigned)(hctx->ccid3hctx_x >> 6));
- if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
- hctx->ccid3hctx_p = TFRC_SMALLEST_P;
- ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
- dccp_role(sk), sk);
- }
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+ } else {
+ hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
+ (u32)r_sample) / 10;
+
+ /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
+ if (hctx->ccid3hctx_p > 0)
+ hctx->ccid3hctx_x_calc =
+ tfrc_calc_x(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p);
+ ccid3_hc_tx_update_x(sk, &now);
+
+ ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
+ "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
+ dccp_role(sk),
+ sk, hctx->ccid3hctx_rtt, (int)r_sample,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_p,
+ hctx->ccid3hctx_x_calc,
+ (unsigned)(hctx->ccid3hctx_x_recv >> 6),
+ (unsigned)(hctx->ccid3hctx_x >> 6));
}
/* unschedule no feedback timer */
sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
- /* Update sending rate */
- ccid3_hc_tx_update_x(sk);
-
- /* Update next send time */
- timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_t_ipi(hctx);
- timeval_add_usecs(&hctx->ccid3hctx_t_nom,
- hctx->ccid3hctx_t_ipi);
- ccid3_calc_new_delta(hctx);
-
/* remove all packets older than the one acked from history */
dccp_tx_hist_purge_older(ccid3_tx_hist,
&hctx->ccid3hctx_hist, packet);
/*
- * As we have calculated new ipi, delta, t_nom it is possible that
- * we now can send a packet, so wake up dccp_wait_for_ccids.
+ * As we have calculated new ipi, delta, t_nom it is possible
+ * that we now can send a packet, so wake up dccp_wait_for_ccid
*/
sk->sk_write_space(sk);
/*
+ * Update timeout interval for the nofeedback timer.
+ * We use a configuration option to increase the lower bound.
+ * This can help avoid triggering the nofeedback timer too
+ * often ('spinning') on LANs with small RTTs.
+ */
+ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+ CONFIG_IP_DCCP_CCID3_RTO *
+ (USEC_PER_SEC/1000));
+ /*
* Schedule no feedback timer to expire in
- * max(4 * R, 2 * s / X)
+ * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
*/
- next_tmout = max(hctx->ccid3hctx_t_rto,
- 2 * usecs_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x));
-
- ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
+ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
+
+ ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
"expire in %lu jiffies (%luus)\n",
- dccp_role(sk), sk,
- usecs_to_jiffies(next_tmout), next_tmout);
+ dccp_role(sk),
+ sk, usecs_to_jiffies(t_nfb), t_nfb);
- sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
- jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + usecs_to_jiffies(t_nfb));
/* set idle flag */
- hctx->ccid3hctx_idle = 1;
+ hctx->ccid3hctx_idle = 1;
break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
- dump_stack();
+ case TFRC_SSTATE_NO_SENT:
+ /*
+ * XXX when implementing bidirectional rx/tx check this again
+ */
+ DCCP_WARN("Illegal ACK received - no packet sent\n");
+ /* fall through */
+ case TFRC_SSTATE_TERM: /* ignore feedback when closing */
break;
}
}
-static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
-{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-
- BUG_ON(hctx == NULL);
-
- if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)
- DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
- return 0;
-}
-
static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
unsigned char len, u16 idx,
unsigned char *value)
@@ -611,13 +579,14 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
switch (option) {
case TFRC_OPT_LOSS_EVENT_RATE:
if (unlikely(len != 4)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid "
- "len for TFRC_OPT_LOSS_EVENT_RATE\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), invalid len %d "
+ "for TFRC_OPT_LOSS_EVENT_RATE\n",
+ dccp_role(sk), sk, len);
rc = -EINVAL;
} else {
- opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value);
- ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
+ opt_recv->ccid3or_loss_event_rate =
+ ntohl(*(__be32 *)value);
+ ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_event_rate);
}
@@ -625,20 +594,21 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
case TFRC_OPT_LOSS_INTERVALS:
opt_recv->ccid3or_loss_intervals_idx = idx;
opt_recv->ccid3or_loss_intervals_len = len;
- ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
+ ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_intervals_idx,
opt_recv->ccid3or_loss_intervals_len);
break;
case TFRC_OPT_RECEIVE_RATE:
if (unlikely(len != 4)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid "
- "len for TFRC_OPT_RECEIVE_RATE\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), invalid len %d "
+ "for TFRC_OPT_RECEIVE_RATE\n",
+ dccp_role(sk), sk, len);
rc = -EINVAL;
} else {
- opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value);
- ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
+ opt_recv->ccid3or_receive_rate =
+ ntohl(*(__be32 *)value);
+ ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_receive_rate);
}
@@ -650,22 +620,15 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
- struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
- if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
- dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
- hctx->ccid3hctx_s = dp->dccps_packet_size;
- else
- hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
-
- /* Set transmission rate to 1 packet per second */
- hctx->ccid3hctx_x = hctx->ccid3hctx_s;
- hctx->ccid3hctx_t_rto = USEC_PER_SEC;
+ hctx->ccid3hctx_s = 0;
+ hctx->ccid3hctx_rtt = 0;
hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
- hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+ hctx->ccid3hctx_no_feedback_timer.function =
+ ccid3_hc_tx_no_feedback_timer;
hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
init_timer(&hctx->ccid3hctx_no_feedback_timer);
@@ -689,14 +652,7 @@ static void ccid3_hc_tx_exit(struct sock *sk)
* RX Half Connection methods
*/
-/* TFRC receiver states */
-enum ccid3_hc_rx_states {
- TFRC_RSTATE_NO_DATA = 1,
- TFRC_RSTATE_DATA,
- TFRC_RSTATE_TERM = 127,
-};
-
-#ifdef CCID3_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
static char *ccid3_rx_state_names[] = {
@@ -722,14 +678,24 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
hcrx->ccid3hcrx_state = state;
}
+static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
+{
+ if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */
+ ccid3_pr_debug("Packet payload length is 0 - not updating\n");
+ else
+ hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
+ (9 * hcrx->ccid3hcrx_s + len) / 10;
+}
+
static void ccid3_hc_rx_send_feedback(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_rx_hist_entry *packet;
struct timeval now;
+ suseconds_t delta;
- ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+ ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
dccp_timestamp(sk, &now);
@@ -737,25 +703,22 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
case TFRC_RSTATE_NO_DATA:
hcrx->ccid3hcrx_x_recv = 0;
break;
- case TFRC_RSTATE_DATA: {
- const u32 delta = timeval_delta(&now,
- &hcrx->ccid3hcrx_tstamp_last_feedback);
- hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv,
- delta);
- }
+ case TFRC_RSTATE_DATA:
+ delta = timeval_delta(&now,
+ &hcrx->ccid3hcrx_tstamp_last_feedback);
+ DCCP_BUG_ON(delta < 0);
+ hcrx->ccid3hcrx_x_recv =
+ scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
break;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
- dump_stack();
+ case TFRC_RSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
return;
}
packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet "
- "in history!\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), no data packet in history!\n",
+ dccp_role(sk), sk);
return;
}
@@ -763,13 +726,19 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval;
hcrx->ccid3hcrx_bytes_recv = 0;
- /* Convert to multiples of 10us */
- hcrx->ccid3hcrx_elapsed_time =
- timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
+ /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
+ delta = timeval_delta(&now, &packet->dccphrx_tstamp);
+ DCCP_BUG_ON(delta < 0);
+ hcrx->ccid3hcrx_elapsed_time = delta / 10;
+
if (hcrx->ccid3hcrx_p == 0)
- hcrx->ccid3hcrx_pinv = ~0;
- else
+ hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
+ else if (hcrx->ccid3hcrx_p > 1000000) {
+ DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
+ hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
+ } else
hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+
dp->dccps_hc_rx_insert_options = 1;
dccp_send_ack(sk);
}
@@ -797,9 +766,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
hcrx->ccid3hcrx_elapsed_time)) ||
dccp_insert_option_timestamp(sk, skb) ||
dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
- &pinv, sizeof(pinv)) ||
+ &pinv, sizeof(pinv)) ||
dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
- &x_recv, sizeof(x_recv)))
+ &x_recv, sizeof(x_recv)))
return -1;
return 0;
@@ -813,12 +782,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
- u32 rtt, delta, x_recv, fval, p, tmp2;
+ u32 x_recv, p;
+ suseconds_t rtt, delta;
struct timeval tstamp = { 0, };
int interval = 0;
int win_count = 0;
int step = 0;
- u64 tmp1;
+ u64 fval;
list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
dccphrx_node) {
@@ -843,58 +813,66 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
}
if (unlikely(step == 0)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history "
- "contains no data packets!\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), packet history has no data packets!\n",
+ dccp_role(sk), sk);
return ~0;
}
if (unlikely(interval == 0)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a "
- "win_count interval > 0. Defaulting to 1\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
+ "Defaulting to 1\n", dccp_role(sk), sk);
interval = 1;
}
found:
if (!tail) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n",
- __FUNCTION__);
+ DCCP_CRIT("tail is null\n");
return ~0;
}
- rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
- ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
- dccp_role(sk), sk, rtt);
- if (rtt == 0)
- rtt = 1;
-
- dccp_timestamp(sk, &tstamp);
- delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
- x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
- if (x_recv == 0)
- x_recv = hcrx->ccid3hcrx_x_recv;
+ delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
+ DCCP_BUG_ON(delta < 0);
- tmp1 = (u64)x_recv * (u64)rtt;
- do_div(tmp1,10000000);
- tmp2 = (u32)tmp1;
+ rtt = delta * 4 / interval;
+ ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
+ dccp_role(sk), sk, (int)rtt);
- if (!tmp2) {
- LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 "
- "%s: x_recv = %u, rtt =%u\n",
- __FUNCTION__, x_recv, rtt);
+ /*
+ * Determine the length of the first loss interval via inverse lookup.
+ * Assume that X_recv can be computed by the throughput equation
+ * s
+ * X_recv = --------
+ * R * fval
+ * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
+ */
+ if (rtt == 0) { /* would result in divide-by-zero */
+ DCCP_WARN("RTT==0\n");
return ~0;
}
- fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
- /* do not alter order above or you will get overflow on 32 bit */
+ dccp_timestamp(sk, &tstamp);
+ delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
+ DCCP_BUG_ON(delta <= 0);
+
+ x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+ if (x_recv == 0) { /* would also trigger divide-by-zero */
+ DCCP_WARN("X_recv==0\n");
+ if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+ DCCP_BUG("stored value of X_recv is zero");
+ return ~0;
+ }
+ }
+
+ fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
+ fval = scaled_div32(fval, x_recv);
p = tfrc_calc_x_reverse_lookup(fval);
- ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
+
+ ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
"loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
if (p == 0)
return ~0;
else
- return 1000000 / p;
+ return 1000000 / p;
}
static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
@@ -924,11 +902,10 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
/* new loss event detected */
/* calculate last interval length */
seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
- entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC);
+ entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
if (entry == NULL) {
- printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__);
- dump_stack();
+ DCCP_BUG("out of memory - can not allocate entry");
return;
}
@@ -949,7 +926,8 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
struct dccp_rx_hist_entry *packet)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
+ struct dccp_rx_hist_entry *rx_hist =
+ dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
u64 seqno = packet->dccphrx_seqno;
u64 tmp_seqno;
int loss = 0;
@@ -977,7 +955,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
dccp_inc_seqno(&tmp_seqno);
while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
tmp_seqno, &ccval)) {
- hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+ hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
hcrx->ccid3hcrx_ccval_nonloss = ccval;
dccp_inc_seqno(&tmp_seqno);
}
@@ -1003,13 +981,11 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
const struct dccp_options_received *opt_recv;
struct dccp_rx_hist_entry *packet;
struct timeval now;
- u8 win_count;
- u32 p_prev, rtt_prev, r_sample, t_elapsed;
- int loss;
+ u32 p_prev, rtt_prev;
+ suseconds_t r_sample, t_elapsed;
+ int loss, payload_size;
- BUG_ON(hcrx == NULL ||
- !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
- hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
+ BUG_ON(hcrx == NULL);
opt_recv = &dccp_sk(sk)->dccps_options_received;
@@ -1026,12 +1002,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
r_sample = timeval_usecs(&now);
t_elapsed = opt_recv->dccpor_elapsed_time * 10;
+ DCCP_BUG_ON(r_sample < 0);
if (unlikely(r_sample <= t_elapsed))
- LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, "
- "t_elapsed=%uus\n",
- __FUNCTION__, r_sample, t_elapsed);
+ DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
+ r_sample, t_elapsed);
else
r_sample -= t_elapsed;
+ CCID3_RTT_SANITY_CHECK(r_sample);
if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
hcrx->ccid3hcrx_rtt = r_sample;
@@ -1040,8 +1017,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
r_sample / 10;
if (rtt_prev != hcrx->ccid3hcrx_rtt)
- ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
- dccp_role(sk), hcrx->ccid3hcrx_rtt,
+ ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
+ dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
opt_recv->dccpor_elapsed_time);
break;
case DCCP_PKT_DATA:
@@ -1051,52 +1028,48 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
- skb, SLAB_ATOMIC);
+ skb, GFP_ATOMIC);
if (unlikely(packet == NULL)) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to "
- "add rx packet to history, consider it lost!\n",
- __FUNCTION__, dccp_role(sk), sk);
+ DCCP_WARN("%s(%p), Not enough mem to add rx packet "
+ "to history, consider it lost!\n", dccp_role(sk), sk);
return;
}
- win_count = packet->dccphrx_ccval;
-
loss = ccid3_hc_rx_detect_loss(sk, packet);
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
return;
+ payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+ ccid3_hc_rx_update_s(hcrx, payload_size);
+
switch (hcrx->ccid3hcrx_state) {
case TFRC_RSTATE_NO_DATA:
- ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
- "feedback\n",
- dccp_role(sk), sk,
+ ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
+ "feedback\n", dccp_role(sk), sk,
dccp_state_name(sk->sk_state), skb);
ccid3_hc_rx_send_feedback(sk);
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
return;
case TFRC_RSTATE_DATA:
- hcrx->ccid3hcrx_bytes_recv += skb->len -
- dccp_hdr(skb)->dccph_doff * 4;
+ hcrx->ccid3hcrx_bytes_recv += payload_size;
if (loss)
break;
dccp_timestamp(sk, &now);
- if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
- hcrx->ccid3hcrx_rtt) {
+ if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
+ (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
hcrx->ccid3hcrx_tstamp_last_ack = now;
ccid3_hc_rx_send_feedback(sk);
}
return;
- default:
- printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
- __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
- dump_stack();
+ case TFRC_RSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
return;
}
/* Dealing with packet loss */
- ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
+ ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state));
p_prev = hcrx->ccid3hcrx_p;
@@ -1108,10 +1081,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* Scaling up by 1000000 as fixed decimal */
if (i_mean != 0)
hcrx->ccid3hcrx_p = 1000000 / i_mean;
- } else {
- printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__);
- dump_stack();
- }
+ } else
+ DCCP_BUG("empty loss history");
if (hcrx->ccid3hcrx_p > p_prev) {
ccid3_hc_rx_send_feedback(sk);
@@ -1121,23 +1092,17 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
- struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
- ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
-
- if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
- dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
- hcrx->ccid3hcrx_s = dp->dccps_packet_size;
- else
- hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
+ ccid3_pr_debug("entry\n");
hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
- hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */
+ hcrx->ccid3hcrx_s = 0;
+ hcrx->ccid3hcrx_rtt = 0;
return 0;
}
@@ -1166,9 +1131,9 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
BUG_ON(hcrx == NULL);
- info->tcpi_ca_state = hcrx->ccid3hcrx_state;
- info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
- info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
+ info->tcpi_ca_state = hcrx->ccid3hcrx_state;
+ info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+ info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
}
static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
@@ -1249,7 +1214,6 @@ static struct ccid_operations ccid3 = {
.ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
.ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
- .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
.ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
.ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock),
.ccid_hc_rx_init = ccid3_hc_rx_init,
@@ -1261,9 +1225,11 @@ static struct ccid_operations ccid3 = {
.ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt,
.ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt,
};
-
+
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
module_param(ccid3_debug, int, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+#endif
static __init int ccid3_module_init(void)
{
@@ -1282,7 +1248,7 @@ static __init int ccid3_module_init(void)
goto out_free_tx;
rc = ccid_register(&ccid3);
- if (rc != 0)
+ if (rc != 0)
goto out_free_loss_interval_history;
out:
return rc;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0a2cb7536d2..15776a88c09 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,22 +42,24 @@
#include <linux/tfrc.h>
#include "../ccid.h"
-#define TFRC_MIN_PACKET_SIZE 16
-#define TFRC_STD_PACKET_SIZE 256
-#define TFRC_MAX_PACKET_SIZE 65535
-
-/* Two seconds as per CCID3 spec */
+/* Two seconds as per RFC 3448 4.2 */
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
-#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4)
-
/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
-/* In seconds */
-#define TFRC_MAX_BACK_OFF_TIME 64
+/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
+#define TFRC_T_MBI 64
+
+/* What we think is a reasonable upper limit on RTT values */
+#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC))
-#define TFRC_SMALLEST_P 40
+#define CCID3_RTT_SANITY_CHECK(rtt) do { \
+ if (rtt > CCID3_SANE_RTT_MAX) { \
+ DCCP_CRIT("RTT (%d) too large, substituting %d", \
+ (int)rtt, (int)CCID3_SANE_RTT_MAX); \
+ rtt = CCID3_SANE_RTT_MAX; \
+ } } while (0)
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
@@ -73,26 +75,36 @@ struct ccid3_options_received {
u32 ccid3or_receive_rate;
};
-/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+ TFRC_SSTATE_NO_SENT = 1,
+ TFRC_SSTATE_NO_FBACK,
+ TFRC_SSTATE_FBACK,
+ TFRC_SSTATE_TERM,
+};
+
+/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
*
- * @ccid3hctx_state - Sender state
- * @ccid3hctx_x - Current sending rate
- * @ccid3hctx_x_recv - Receive rate
- * @ccid3hctx_x_calc - Calculated send (?) rate
- * @ccid3hctx_s - Packet size
- * @ccid3hctx_rtt - Estimate of current round trip time in usecs
- * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
- * @ccid3hctx_last_win_count - Last window counter sent
- * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
- * with last_win_count value sent
- * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
- * @ccid3hctx_idle - FIXME
- * @ccid3hctx_t_ld - Time last doubled during slow start
- * @ccid3hctx_t_nom - Nominal send time of next packet
- * @ccid3hctx_t_ipi - Interpacket (send) interval
- * @ccid3hctx_delta - Send timer delta
- * @ccid3hctx_hist - Packet history
- */
+ * @ccid3hctx_x - Current sending rate in 64 * bytes per second
+ * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
+ * @ccid3hctx_x_calc - Calculated rate in bytes per second
+ * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+ * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+ * @ccid3hctx_s - Packet size in bytes
+ * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
+ * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
+ * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
+ * @ccid3hctx_last_win_count - Last window counter sent
+ * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
+ * with last_win_count value sent
+ * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+ * @ccid3hctx_idle - Flag indicating that sender is idling
+ * @ccid3hctx_t_ld - Time last doubled during slow start
+ * @ccid3hctx_t_nom - Nominal send time of next packet
+ * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
+ * @ccid3hctx_hist - Packet history
+ * @ccid3hctx_options_received - Parsed set of retrieved options
+ */
struct ccid3_hc_tx_sock {
struct tfrc_tx_info ccid3hctx_tfrc;
#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
@@ -103,7 +115,7 @@ struct ccid3_hc_tx_sock {
#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
u16 ccid3hctx_s;
- u8 ccid3hctx_state;
+ enum ccid3_hc_tx_states ccid3hctx_state:8;
u8 ccid3hctx_last_win_count;
u8 ccid3hctx_idle;
struct timeval ccid3hctx_t_last_win_count;
@@ -115,23 +127,48 @@ struct ccid3_hc_tx_sock {
struct ccid3_options_received ccid3hctx_options_received;
};
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+ TFRC_RSTATE_NO_DATA = 1,
+ TFRC_RSTATE_DATA,
+ TFRC_RSTATE_TERM = 127,
+};
+
+/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
+ *
+ * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
+ * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
+ * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4)
+ * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number
+ * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal
+ * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
+ * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
+ * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
+ * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
+ * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
+ * @ccid3hcrx_hist - Packet history
+ * @ccid3hcrx_li_hist - Loss Interval History
+ * @ccid3hcrx_s - Received packet size in bytes
+ * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
+ * @ccid3hcrx_elapsed_time - Time since packet reception
+ */
struct ccid3_hc_rx_sock {
- struct tfrc_rx_info ccid3hcrx_tfrc;
-#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
-#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
-#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
- u64 ccid3hcrx_seqno_nonloss:48,
- ccid3hcrx_ccval_nonloss:4,
- ccid3hcrx_state:8,
- ccid3hcrx_ccval_last_counter:4;
- u32 ccid3hcrx_bytes_recv;
- struct timeval ccid3hcrx_tstamp_last_feedback;
- struct timeval ccid3hcrx_tstamp_last_ack;
- struct list_head ccid3hcrx_hist;
- struct list_head ccid3hcrx_li_hist;
- u16 ccid3hcrx_s;
- u32 ccid3hcrx_pinv;
- u32 ccid3hcrx_elapsed_time;
+ struct tfrc_rx_info ccid3hcrx_tfrc;
+#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
+#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
+#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
+ u64 ccid3hcrx_seqno_nonloss:48,
+ ccid3hcrx_ccval_nonloss:4,
+ ccid3hcrx_ccval_last_counter:4;
+ enum ccid3_hc_rx_states ccid3hcrx_state:8;
+ u32 ccid3hcrx_bytes_recv;
+ struct timeval ccid3hcrx_tstamp_last_feedback;
+ struct timeval ccid3hcrx_tstamp_last_ack;
+ struct list_head ccid3hcrx_hist;
+ struct list_head ccid3hcrx_li_hist;
+ u16 ccid3hcrx_s;
+ u32 ccid3hcrx_pinv;
+ u32 ccid3hcrx_elapsed_time;
};
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 906c81ab9d4..0a0baef16b3 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -13,7 +13,7 @@
#include <linux/module.h>
#include <net/sock.h>
-
+#include "../../dccp.h"
#include "loss_interval.h"
struct dccp_li_hist *dccp_li_hist_new(const char *name)
@@ -109,7 +109,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
i_tot = max(i_tot0, i_tot1);
if (!w_tot) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__);
+ DCCP_WARN("w_tot = 0\n");
return 1;
}
@@ -125,10 +125,10 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
int i;
for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
- entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
+ entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC);
if (entry == NULL) {
dccp_li_hist_purge(hist, list);
- dump_stack();
+ DCCP_BUG("loss interval list entry is NULL");
return 0;
}
entry->dccplih_interval = ~0;
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 0ae85f0340b..eb257014dd7 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -20,7 +20,7 @@
#define DCCP_LI_HIST_IVAL_F_LENGTH 8
struct dccp_li_hist {
- kmem_cache_t *dccplih_slab;
+ struct kmem_cache *dccplih_slab;
};
extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index b876c9c81c6..2e8ef42721e 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -36,9 +36,100 @@
#include <linux/module.h>
#include <linux/string.h>
-
#include "packet_history.h"
+/*
+ * Transmitter History Routines
+ */
+struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
+{
+ struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+ static const char dccp_tx_hist_mask[] = "tx_hist_%s";
+ char *slab_name;
+
+ if (hist == NULL)
+ goto out;
+
+ slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
+ GFP_ATOMIC);
+ if (slab_name == NULL)
+ goto out_free_hist;
+
+ sprintf(slab_name, dccp_tx_hist_mask, name);
+ hist->dccptxh_slab = kmem_cache_create(slab_name,
+ sizeof(struct dccp_tx_hist_entry),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (hist->dccptxh_slab == NULL)
+ goto out_free_slab_name;
+out:
+ return hist;
+out_free_slab_name:
+ kfree(slab_name);
+out_free_hist:
+ kfree(hist);
+ hist = NULL;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
+
+void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
+{
+ const char* name = kmem_cache_name(hist->dccptxh_slab);
+
+ kmem_cache_destroy(hist->dccptxh_slab);
+ kfree(name);
+ kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
+
+struct dccp_tx_hist_entry *
+ dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
+{
+ struct dccp_tx_hist_entry *packet = NULL, *entry;
+
+ list_for_each_entry(entry, list, dccphtx_node)
+ if (entry->dccphtx_seqno == seq) {
+ packet = entry;
+ break;
+ }
+
+ return packet;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
+
+void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
+{
+ struct dccp_tx_hist_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, dccphtx_node) {
+ list_del_init(&entry->dccphtx_node);
+ dccp_tx_hist_entry_delete(hist, entry);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
+
+void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+ struct list_head *list,
+ struct dccp_tx_hist_entry *packet)
+{
+ struct dccp_tx_hist_entry *next;
+
+ list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
+ list_del_init(&packet->dccphtx_node);
+ dccp_tx_hist_entry_delete(hist, packet);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
+
+/*
+ * Receiver History Routines
+ */
struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
{
struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
@@ -83,18 +174,24 @@ void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
-void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
+int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+ u8 *ccval)
{
- struct dccp_rx_hist_entry *entry, *next;
+ struct dccp_rx_hist_entry *packet = NULL, *entry;
- list_for_each_entry_safe(entry, next, list, dccphrx_node) {
- list_del_init(&entry->dccphrx_node);
- kmem_cache_free(hist->dccprxh_slab, entry);
- }
-}
+ list_for_each_entry(entry, list, dccphrx_node)
+ if (entry->dccphrx_seqno == seq) {
+ packet = entry;
+ break;
+ }
-EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
+ if (packet)
+ *ccval = packet->dccphrx_ccval;
+ return packet != NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
struct dccp_rx_hist_entry *
dccp_rx_hist_find_data_packet(const struct list_head *list)
{
@@ -184,110 +281,18 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
-struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
-{
- struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
- static const char dccp_tx_hist_mask[] = "tx_hist_%s";
- char *slab_name;
-
- if (hist == NULL)
- goto out;
-
- slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
- GFP_ATOMIC);
- if (slab_name == NULL)
- goto out_free_hist;
-
- sprintf(slab_name, dccp_tx_hist_mask, name);
- hist->dccptxh_slab = kmem_cache_create(slab_name,
- sizeof(struct dccp_tx_hist_entry),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (hist->dccptxh_slab == NULL)
- goto out_free_slab_name;
-out:
- return hist;
-out_free_slab_name:
- kfree(slab_name);
-out_free_hist:
- kfree(hist);
- hist = NULL;
- goto out;
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
-
-void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
-{
- const char* name = kmem_cache_name(hist->dccptxh_slab);
-
- kmem_cache_destroy(hist->dccptxh_slab);
- kfree(name);
- kfree(hist);
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
-
-struct dccp_tx_hist_entry *
- dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
-{
- struct dccp_tx_hist_entry *packet = NULL, *entry;
-
- list_for_each_entry(entry, list, dccphtx_node)
- if (entry->dccphtx_seqno == seq) {
- packet = entry;
- break;
- }
-
- return packet;
-}
-
-EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
-
-int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
- u8 *ccval)
-{
- struct dccp_rx_hist_entry *packet = NULL, *entry;
-
- list_for_each_entry(entry, list, dccphrx_node)
- if (entry->dccphrx_seqno == seq) {
- packet = entry;
- break;
- }
-
- if (packet)
- *ccval = packet->dccphrx_ccval;
-
- return packet != NULL;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
-
-void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
- struct list_head *list,
- struct dccp_tx_hist_entry *packet)
+void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
{
- struct dccp_tx_hist_entry *next;
+ struct dccp_rx_hist_entry *entry, *next;
- list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
- list_del_init(&packet->dccphtx_node);
- dccp_tx_hist_entry_delete(hist, packet);
+ list_for_each_entry_safe(entry, next, list, dccphrx_node) {
+ list_del_init(&entry->dccphrx_node);
+ kmem_cache_free(hist->dccprxh_slab, entry);
}
}
-EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
-
-void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
-{
- struct dccp_tx_hist_entry *entry, *next;
-
- list_for_each_entry_safe(entry, next, list, dccphtx_node) {
- list_del_init(&entry->dccphtx_node);
- dccp_tx_hist_entry_delete(hist, entry);
- }
-}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
-EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
"Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 067cf1c85a3..1f960c19ea1 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -49,43 +49,27 @@
#define TFRC_WIN_COUNT_PER_RTT 4
#define TFRC_WIN_COUNT_LIMIT 16
+/*
+ * Transmitter History data structures and declarations
+ */
struct dccp_tx_hist_entry {
struct list_head dccphtx_node;
u64 dccphtx_seqno:48,
- dccphtx_ccval:4,
dccphtx_sent:1;
u32 dccphtx_rtt;
struct timeval dccphtx_tstamp;
};
-struct dccp_rx_hist_entry {
- struct list_head dccphrx_node;
- u64 dccphrx_seqno:48,
- dccphrx_ccval:4,
- dccphrx_type:4;
- u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
- struct timeval dccphrx_tstamp;
-};
-
struct dccp_tx_hist {
- kmem_cache_t *dccptxh_slab;
+ struct kmem_cache *dccptxh_slab;
};
extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
-extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
-
-struct dccp_rx_hist {
- kmem_cache_t *dccprxh_slab;
-};
-
-extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
-extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
-extern struct dccp_rx_hist_entry *
- dccp_rx_hist_find_data_packet(const struct list_head *list);
+extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
static inline struct dccp_tx_hist_entry *
- dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
- const gfp_t prio)
+ dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
+ const gfp_t prio)
{
struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
prio);
@@ -96,18 +80,20 @@ static inline struct dccp_tx_hist_entry *
return entry;
}
-static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
- struct dccp_tx_hist_entry *entry)
+static inline struct dccp_tx_hist_entry *
+ dccp_tx_hist_head(struct list_head *list)
{
- if (entry != NULL)
- kmem_cache_free(hist->dccptxh_slab, entry);
+ struct dccp_tx_hist_entry *head = NULL;
+
+ if (!list_empty(list))
+ head = list_entry(list->next, struct dccp_tx_hist_entry,
+ dccphtx_node);
+ return head;
}
extern struct dccp_tx_hist_entry *
dccp_tx_hist_find_entry(const struct list_head *list,
const u64 seq);
-extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
- u8 *ccval);
static inline void dccp_tx_hist_add_entry(struct list_head *list,
struct dccp_tx_hist_entry *entry)
@@ -115,30 +101,45 @@ static inline void dccp_tx_hist_add_entry(struct list_head *list,
list_add(&entry->dccphtx_node, list);
}
+static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
+ struct dccp_tx_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccptxh_slab, entry);
+}
+
+extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
+ struct list_head *list);
+
extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
struct list_head *list,
struct dccp_tx_hist_entry *next);
-extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
- struct list_head *list);
+/*
+ * Receiver History data structures and declarations
+ */
+struct dccp_rx_hist_entry {
+ struct list_head dccphrx_node;
+ u64 dccphrx_seqno:48,
+ dccphrx_ccval:4,
+ dccphrx_type:4;
+ u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
+ struct timeval dccphrx_tstamp;
+};
-static inline struct dccp_tx_hist_entry *
- dccp_tx_hist_head(struct list_head *list)
-{
- struct dccp_tx_hist_entry *head = NULL;
+struct dccp_rx_hist {
+ struct kmem_cache *dccprxh_slab;
+};
- if (!list_empty(list))
- head = list_entry(list->next, struct dccp_tx_hist_entry,
- dccphtx_node);
- return head;
-}
+extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
+extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
static inline struct dccp_rx_hist_entry *
- dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
- const struct sock *sk,
- const u32 ndp,
- const struct sk_buff *skb,
- const gfp_t prio)
+ dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
+ const struct sock *sk,
+ const u32 ndp,
+ const struct sk_buff *skb,
+ const gfp_t prio)
{
struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
prio);
@@ -156,18 +157,8 @@ static inline struct dccp_rx_hist_entry *
return entry;
}
-static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
- struct dccp_rx_hist_entry *entry)
-{
- if (entry != NULL)
- kmem_cache_free(hist->dccprxh_slab, entry);
-}
-
-extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
- struct list_head *list);
-
static inline struct dccp_rx_hist_entry *
- dccp_rx_hist_head(struct list_head *list)
+ dccp_rx_hist_head(struct list_head *list)
{
struct dccp_rx_hist_entry *head = NULL;
@@ -177,6 +168,27 @@ static inline struct dccp_rx_hist_entry *
return head;
}
+extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+ u8 *ccval);
+extern struct dccp_rx_hist_entry *
+ dccp_rx_hist_find_data_packet(const struct list_head *list);
+
+extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+ struct list_head *rx_list,
+ struct list_head *li_list,
+ struct dccp_rx_hist_entry *packet,
+ u64 nonloss_seqno);
+
+static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
+ struct dccp_rx_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccprxh_slab, entry);
+}
+
+extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
+ struct list_head *list);
+
static inline int
dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
{
@@ -184,12 +196,6 @@ static inline int
entry->dccphrx_type == DCCP_PKT_DATAACK;
}
-extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
- struct list_head *rx_list,
- struct list_head *li_list,
- struct dccp_rx_hist_entry *packet,
- u64 nonloss_seqno);
-
extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
struct list_head *li_list, u8 *win_loss);
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 45f30f59ea2..faf5f7e219e 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -13,8 +13,29 @@
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
-
#include <linux/types.h>
+#include <asm/div64.h>
+
+/* integer-arithmetic divisions of type (a * 1000000)/b */
+static inline u64 scaled_div(u64 a, u32 b)
+{
+ BUG_ON(b==0);
+ a *= 1000000;
+ do_div(a, b);
+ return a;
+}
+
+static inline u32 scaled_div32(u64 a, u32 b)
+{
+ u64 result = scaled_div(a, b);
+
+ if (result > UINT_MAX) {
+ DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U",
+ (unsigned long long)a, b);
+ return UINT_MAX;
+ }
+ return result;
+}
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 44076e0c659..90009fd77e1 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -13,16 +13,83 @@
*/
#include <linux/module.h>
-
-#include <asm/div64.h>
-
+#include "../../dccp.h"
#include "tfrc.h"
#define TFRC_CALC_X_ARRSIZE 500
+#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */
+#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE)
-#define TFRC_CALC_X_SPLIT 50000
-/* equivalent to 0.05 */
-
+/*
+ TFRC TCP Reno Throughput Equation Lookup Table for f(p)
+
+ The following two-column lookup table implements a part of the TCP throughput
+ equation from [RFC 3448, sec. 3.1]:
+
+ s
+ X_calc = --------------------------------------------------------------
+ R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3))
+
+ Where:
+ X is the transmit rate in bytes/second
+ s is the packet size in bytes
+ R is the round trip time in seconds
+ p is the loss event rate, between 0 and 1.0, of the number of loss
+ events as a fraction of the number of packets transmitted
+ t_RTO is the TCP retransmission timeout value in seconds
+ b is the number of packets acknowledged by a single TCP ACK
+
+ We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes:
+
+ s
+ X_calc = -------------------------------------------------------
+ R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3))
+
+ which we can break down into:
+
+ s
+ X_calc = ---------
+ R * f(p)
+
+ where f(p) is given for 0 < p <= 1 by:
+
+ f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3)
+
+ Since this is kernel code, floating-point arithmetic is avoided in favour of
+ integer arithmetic. This means that nearly all fractional parameters are
+ scaled by 1000000:
+ * the parameters p and R
+ * the return result f(p)
+ The lookup table therefore actually tabulates the following function g(q):
+
+ g(q) = 1000000 * f(q/1000000)
+
+ Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer
+ granularity for the practically more relevant case of small values of p (up to
+ 5%), the second column is used; the first one ranges up to 100%. This split
+ corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also
+ determines the smallest resolution possible with this lookup table:
+
+ TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE
+
+ The entire table is generated by:
+ for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) {
+ lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE);
+ lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE);
+ }
+
+ With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1,
+ lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%)
+ lookup[M][0] = g(1000000) = 1000000 * f(100%)
+ lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%)
+ lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%)
+
+ In summary, the two columns represent f(p) for the following ranges:
+ * The first column is for 0.002 <= p <= 1.0
+ * The second column is for 0.0001 <= p <= 0.05
+ Where the columns overlap, the second (finer-grained) is given preference,
+ i.e. the first column is used only for p >= 0.05.
+ */
static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
{ 37172, 8172 },
{ 53499, 11567 },
@@ -526,117 +593,105 @@ static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
{ 243315981, 271305 }
};
-/* Calculate the send rate as per section 3.1 of RFC3448
-
-Returns send rate in bytes per second
-
-Integer maths and lookups are used as not allowed floating point in kernel
-
-The function for Xcalc as per section 3.1 of RFC3448 is:
-
-X = s
- -------------------------------------------------------------
- R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
-
-where
-X is the trasmit rate in bytes/second
-s is the packet size in bytes
-R is the round trip time in seconds
-p is the loss event rate, between 0 and 1.0, of the number of loss events
- as a fraction of the number of packets transmitted
-t_RTO is the TCP retransmission timeout value in seconds
-b is the number of packets acknowledged by a single TCP acknowledgement
-
-we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
-
-X = s
- -----------------------------------------------------------------------
- R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
-
-
-which we can break down into:
-
-X = s
- --------
- R * f(p)
-
-where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
-
-Function parameters:
-s - bytes
-R - RTT in usecs
-p - loss rate (decimal fraction multiplied by 1,000,000)
-
-Returns Xcalc in bytes per second
-
-DON'T alter this code unless you run test cases against it as the code
-has been manipulated to stop underflow/overlow.
+/* return largest index i such that fval <= lookup[i][small] */
+static inline u32 tfrc_binsearch(u32 fval, u8 small)
+{
+ u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1;
+
+ while (low < high) {
+ try = (low + high) / 2;
+ if (fval <= tfrc_calc_x_lookup[try][small])
+ high = try;
+ else
+ low = try + 1;
+ }
+ return high;
+}
-*/
+/**
+ * tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448
+ *
+ * @s: packet size in bytes
+ * @R: RTT scaled by 1000000 (i.e., microseconds)
+ * @p: loss ratio estimate scaled by 1000000
+ * Returns X_calc in bytes per second (not scaled).
+ */
u32 tfrc_calc_x(u16 s, u32 R, u32 p)
{
- int index;
+ u16 index;
u32 f;
- u64 tmp1, tmp2;
+ u64 result;
- if (p < TFRC_CALC_X_SPLIT)
- index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
- else
- index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
+ /* check against invalid parameters and divide-by-zero */
+ BUG_ON(p > 1000000); /* p must not exceed 100% */
+ BUG_ON(p == 0); /* f(0) = 0, divide by zero */
+ if (R == 0) { /* possible divide by zero */
+ DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc.");
+ return ~0U;
+ }
- if (index < 0)
- /* p should be 0 unless there is a bug in my code */
- index = 0;
+ if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
+ if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
+ DCCP_WARN("Value of p (%d) below resolution. "
+ "Substituting %d\n", p, TFRC_SMALLEST_P);
+ index = 0;
+ } else /* 0.0001 <= p <= 0.05 */
+ index = p/TFRC_SMALLEST_P - 1;
- if (R == 0)
- R = 1; /* RTT can't be zero or else divide by zero */
+ f = tfrc_calc_x_lookup[index][1];
- BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
+ } else { /* 0.05 < p <= 1.00 */
+ index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1;
- if (p >= TFRC_CALC_X_SPLIT)
f = tfrc_calc_x_lookup[index][0];
- else
- f = tfrc_calc_x_lookup[index][1];
-
- tmp1 = ((u64)s * 100000000);
- tmp2 = ((u64)R * (u64)f);
- do_div(tmp2, 10000);
- do_div(tmp1, tmp2);
- /* Don't alter above math unless you test due to overflow on 32 bit */
-
- return (u32)tmp1;
+ }
+
+ /*
+ * Compute X = s/(R*f(p)) in bytes per second.
+ * Since f(p) and R are both scaled by 1000000, we need to multiply by
+ * 1000000^2. To avoid overflow, the result is computed in two stages.
+ * This works under almost all reasonable operational conditions, for a
+ * wide range of parameters. Yet, should some strange combination of
+ * parameters result in overflow, the use of scaled_div32 will catch
+ * this and return UINT_MAX - which is a logically adequate consequence.
+ */
+ result = scaled_div(s, R);
+ return scaled_div32(result, f);
}
EXPORT_SYMBOL_GPL(tfrc_calc_x);
/*
- * args: fvalue - function value to match
- * returns: p closest to that value
+ * tfrc_calc_x_reverse_lookup - try to find p given f(p)
*
- * both fvalue and p are multiplied by 1,000,000 to use ints
+ * @fvalue: function value to match, scaled by 1000000
+ * Returns closest match for p, also scaled by 1000000
*/
u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
{
- int ctr = 0;
- int small;
+ int index;
- if (fvalue < tfrc_calc_x_lookup[0][1])
+ if (fvalue == 0) /* f(p) = 0 whenever p = 0 */
return 0;
- if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
- small = 1;
- else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
+ /* Error cases. */
+ if (fvalue < tfrc_calc_x_lookup[0][1]) {
+ DCCP_WARN("fvalue %d smaller than resolution\n", fvalue);
+ return tfrc_calc_x_lookup[0][1];
+ }
+ if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
+ DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue);
return 1000000;
- else
- small = 0;
-
- while (fvalue > tfrc_calc_x_lookup[ctr][small])
- ctr++;
+ }
- if (small)
- return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
- else
- return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
+ if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) {
+ index = tfrc_binsearch(fvalue, 1);
+ return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE;
+ }
+
+ /* else ... it must be in the coarse-grained column */
+ index = tfrc_binsearch(fvalue, 0);
+ return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0a21be437ed..a0900bf98e6 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -18,15 +18,33 @@
#include <net/tcp.h>
#include "ackvec.h"
+/*
+ * DCCP - specific warning and debugging macros.
+ */
+#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \
+ __FUNCTION__, ##a)
+#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \
+ __FILE__, __LINE__, __FUNCTION__)
+#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0)
+#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \
+ DCCP_BUG("\"%s\" holds (exception!)", \
+ __stringify(cond)); \
+ } while (0)
+
+#ifdef MODULE
+#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
+ printk(fmt, ##args); \
+ } while(0)
+#else
+#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args)
+#endif
+#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
+ "%s: " fmt, __FUNCTION__, ##a)
+
#ifdef CONFIG_IP_DCCP_DEBUG
extern int dccp_debug;
-
-#define dccp_pr_debug(format, a...) \
- do { if (dccp_debug) \
- printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
- } while (0)
-#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
- printk(format, ##a); } while (0)
+#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
+#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
@@ -35,22 +53,26 @@ extern int dccp_debug;
extern struct inet_hashinfo dccp_hashinfo;
extern atomic_t dccp_orphan_count;
-extern int dccp_tw_count;
-extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
-/* FIXME: Right size this */
-#define DCCP_MAX_OPT_LEN 128
-
-#define DCCP_MAX_PACKET_HDR 32
-
-#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+/*
+ * Set safe upper bounds for header and option length. Since Data Offset is 8
+ * bits (RFC 4340, sec. 5.1), the total header length can never be more than
+ * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1):
+ * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR
+ * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
+ * Hence a safe upper bound for the maximum option length is 1020-28 = 992
+ */
+#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
+#define DCCP_MAX_PACKET_HDR 28
+#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
+#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
-/* draft-ietf-dccp-spec-11.txt initial RTO value */
+/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
/* Maximal interval between probes for local resources. */
@@ -58,6 +80,18 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+/* sysctl variables for DCCP */
+extern int sysctl_dccp_request_retries;
+extern int sysctl_dccp_retries1;
+extern int sysctl_dccp_retries2;
+extern int sysctl_dccp_feat_sequence_window;
+extern int sysctl_dccp_feat_rx_ccid;
+extern int sysctl_dccp_feat_tx_ccid;
+extern int sysctl_dccp_feat_ack_ratio;
+extern int sysctl_dccp_feat_send_ack_vector;
+extern int sysctl_dccp_feat_send_ndp_count;
+extern int sysctl_dccp_tx_qlen;
+
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
@@ -123,10 +157,36 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
#define DCCP_ADD_STATS_USER(field, val) \
SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+/*
+ * Checksumming routines
+ */
+static inline int dccp_csum_coverage(const struct sk_buff *skb)
+{
+ const struct dccp_hdr* dh = dccp_hdr(skb);
+
+ if (dh->dccph_cscov == 0)
+ return skb->len;
+ return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32);
+}
+
+static inline void dccp_csum_outgoing(struct sk_buff *skb)
+{
+ int cov = dccp_csum_coverage(skb);
+
+ if (cov >= skb->len)
+ dccp_hdr(skb)->dccph_cscov = 0;
+
+ skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
+}
+
+extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
+
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
@@ -147,18 +207,7 @@ extern const char *dccp_state_name(const int state);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
-static inline void dccp_openreq_init(struct request_sock *req,
- struct dccp_sock *dp,
- struct sk_buff *skb)
-{
- /*
- * FIXME: fill in the other req fields from the DCCP options
- * received
- */
- inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->acked = 0;
- req->rcv_wnd = 0;
-}
+extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
@@ -217,14 +266,9 @@ extern void dccp_shutdown(struct sock *sk, int how);
extern int inet_dccp_listen(struct socket *sock, int backlog);
extern unsigned int dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
-extern void dccp_v4_send_check(struct sock *sk, int len,
- struct sk_buff *skb);
extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
-extern int dccp_v4_checksum(const struct sk_buff *skb,
- const __be32 saddr, const __be32 daddr);
-
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
@@ -388,6 +432,7 @@ static inline void timeval_sub_usecs(struct timeval *tv,
tv->tv_sec--;
tv->tv_usec += USEC_PER_SEC;
}
+ DCCP_BUG_ON(tv->tv_sec < 0);
}
#ifdef CONFIG_SYSCTL
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index a1b0682ee77..95b6927ec65 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -12,7 +12,6 @@
#include <linux/module.h>
-#include "dccp.h"
#include "ccid.h"
#include "feat.h"
@@ -23,9 +22,17 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
{
struct dccp_opt_pend *opt;
- dccp_pr_debug("feat change type=%d feat=%d\n", type, feature);
+ dccp_feat_debug(type, feature, *val);
- /* XXX sanity check feat change request */
+ if (!dccp_feat_is_valid_type(type)) {
+ DCCP_WARN("option type %d invalid in negotiation\n", type);
+ return 1;
+ }
+ if (!dccp_feat_is_valid_length(type, feature, len)) {
+ DCCP_WARN("invalid length %d\n", len);
+ return 1;
+ }
+ /* XXX add further sanity checks */
/* check if that feature is already being negotiated */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
@@ -95,14 +102,14 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
/* XXX taking only u8 vals */
static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
{
- dccp_pr_debug("changing [%d] feat %d to %d\n", type, feat, val);
+ dccp_feat_debug(type, feat, val);
switch (feat) {
case DCCPF_CCID:
return dccp_feat_update_ccid(sk, type, val);
default:
- dccp_pr_debug("IMPLEMENT changing [%d] feat %d to %d\n",
- type, feat, val);
+ dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
+ dccp_feat_typename(type), feat);
break;
}
return 0;
@@ -162,7 +169,8 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
break;
default:
- WARN_ON(1); /* XXX implement res */
+ DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
+ /* XXX implement res */
return -EFAULT;
}
@@ -265,10 +273,10 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
u8 *copy;
int rc;
- /* NN features must be change L */
- if (type == DCCPO_CHANGE_R) {
- dccp_pr_debug("received CHANGE_R %d for NN feat %d\n",
- type, feature);
+ /* NN features must be Change L (sec. 6.3.2) */
+ if (type != DCCPO_CHANGE_L) {
+ dccp_pr_debug("received %s for NN feature %d\n",
+ dccp_feat_typename(type), feature);
return -EFAULT;
}
@@ -279,12 +287,11 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
if (opt == NULL)
return -ENOMEM;
- copy = kmalloc(len, GFP_ATOMIC);
+ copy = kmemdup(val, len, GFP_ATOMIC);
if (copy == NULL) {
kfree(opt);
return -ENOMEM;
}
- memcpy(copy, val, len);
opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
opt->dccpop_feat = feature;
@@ -299,7 +306,8 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
return rc;
}
- dccp_pr_debug("Confirming NN feature %d (val=%d)\n", feature, *copy);
+ dccp_feat_debug(type, feature, *copy);
+
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
return 0;
@@ -318,14 +326,19 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
return;
}
- opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R :
- DCCPO_CONFIRM_L;
+ switch (type) {
+ case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break;
+ case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break;
+ default: DCCP_WARN("invalid type %d\n", type); return;
+
+ }
opt->dccpop_feat = feature;
opt->dccpop_val = NULL;
opt->dccpop_len = 0;
/* change feature */
- dccp_pr_debug("Empty confirm feature %d type %d\n", feature, type);
+ dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
+
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
}
@@ -359,7 +372,7 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
{
int rc;
- dccp_pr_debug("got feat change type=%d feat=%d\n", type, feature);
+ dccp_feat_debug(type, feature, *val);
/* figure out if it's SP or NN feature */
switch (feature) {
@@ -375,6 +388,8 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
/* XXX implement other features */
default:
+ dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
+ dccp_feat_typename(type), feature);
rc = -EFAULT;
break;
}
@@ -403,20 +418,27 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
u8 t;
struct dccp_opt_pend *opt;
struct dccp_minisock *dmsk = dccp_msk(sk);
- int rc = 1;
+ int found = 0;
int all_confirmed = 1;
- dccp_pr_debug("got feat confirm type=%d feat=%d\n", type, feature);
-
- /* XXX sanity check type & feat */
+ dccp_feat_debug(type, feature, *val);
/* locate our change request */
- t = type == DCCPO_CONFIRM_L ? DCCPO_CHANGE_R : DCCPO_CHANGE_L;
+ switch (type) {
+ case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
+ case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
+ default: DCCP_WARN("invalid type %d\n", type);
+ return 1;
+
+ }
+ /* XXX sanity check feature value */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
if (!opt->dccpop_conf && opt->dccpop_type == t &&
opt->dccpop_feat == feature) {
- /* we found it */
+ found = 1;
+ dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
+
/* XXX do sanity check */
opt->dccpop_conf = 1;
@@ -425,9 +447,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
dccp_feat_update(sk, opt->dccpop_type,
opt->dccpop_feat, *val);
- dccp_pr_debug("feat %d type %d confirmed %d\n",
- feature, type, *val);
- rc = 0;
+ /* XXX check the return value of dccp_feat_update */
break;
}
@@ -446,9 +466,9 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
}
- if (rc)
- dccp_pr_debug("feat %d type %d never requested\n",
- feature, type);
+ if (!found)
+ dccp_pr_debug("%s(%d, ...) never requested\n",
+ dccp_feat_typename(type), feature);
return 0;
}
@@ -501,20 +521,18 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
struct dccp_opt_pend *newopt;
/* copy the value of the option */
- u8 *val = kmalloc(opt->dccpop_len, GFP_ATOMIC);
+ u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
if (val == NULL)
goto out_clean;
- memcpy(val, opt->dccpop_val, opt->dccpop_len);
- newopt = kmalloc(sizeof(*newopt), GFP_ATOMIC);
+ newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
if (newopt == NULL) {
kfree(val);
goto out_clean;
}
/* insert the option */
- memcpy(newopt, opt, sizeof(*newopt));
newopt->dccpop_val = val;
list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
@@ -545,10 +563,9 @@ static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
u8 *val, u8 len)
{
int rc = -ENOMEM;
- u8 *copy = kmalloc(len, GFP_KERNEL);
+ u8 *copy = kmemdup(val, len, GFP_KERNEL);
if (copy != NULL) {
- memcpy(copy, val, len);
rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
if (rc)
kfree(copy);
@@ -583,3 +600,45 @@ out:
}
EXPORT_SYMBOL_GPL(dccp_feat_init);
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+const char *dccp_feat_typename(const u8 type)
+{
+ switch(type) {
+ case DCCPO_CHANGE_L: return("ChangeL");
+ case DCCPO_CONFIRM_L: return("ConfirmL");
+ case DCCPO_CHANGE_R: return("ChangeR");
+ case DCCPO_CONFIRM_R: return("ConfirmR");
+ /* the following case must not appear in feature negotation */
+ default: dccp_pr_debug("unknown type %d [BUG!]\n", type);
+ }
+ return NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_typename);
+
+const char *dccp_feat_name(const u8 feat)
+{
+ static const char *feature_names[] = {
+ [DCCPF_RESERVED] = "Reserved",
+ [DCCPF_CCID] = "CCID",
+ [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
+ [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
+ [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
+ [DCCPF_ACK_RATIO] = "Ack Ratio",
+ [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
+ [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
+ [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
+ [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
+ };
+ if (feat >= DCCPF_MIN_CCID_SPECIFIC)
+ return "CCID-specific";
+
+ if (dccp_feat_is_reserved(feat))
+ return feature_names[DCCPF_RESERVED];
+
+ return feature_names[feat];
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_name);
+#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index cee553d416c..2c373ad7edc 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -12,9 +12,46 @@
*/
#include <linux/types.h>
+#include "dccp.h"
-struct sock;
-struct dccp_minisock;
+static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len)
+{
+ /* sec. 6.1: Confirm has at least length 3,
+ * sec. 6.2: Change has at least length 4 */
+ if (len < 3)
+ return 1;
+ if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R))
+ return 1;
+ /* XXX: add per-feature length validation (sec. 6.6.8) */
+ return 0;
+}
+
+static inline int dccp_feat_is_reserved(const u8 feat)
+{
+ return (feat > DCCPF_DATA_CHECKSUM &&
+ feat < DCCPF_MIN_CCID_SPECIFIC) ||
+ feat == DCCPF_RESERVED;
+}
+
+/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */
+static inline int dccp_feat_is_valid_type(const u8 optnum)
+{
+ return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R;
+
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern const char *dccp_feat_typename(const u8 type);
+extern const char *dccp_feat_name(const u8 feat);
+
+static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
+{
+ dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
+ dccp_feat_name(feat), feat, val);
+}
+#else
+#define dccp_feat_debug(type, feat, val)
+#endif /* CONFIG_IP_DCCP_DEBUG */
extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
u8 *val, u8 len, gfp_t gfp);
@@ -26,11 +63,4 @@ extern void dccp_feat_clean(struct dccp_minisock *dmsk);
extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
extern int dccp_feat_init(struct dccp_minisock *dmsk);
-extern int dccp_feat_default_sequence_window;
-extern int dccp_feat_default_rx_ccid;
-extern int dccp_feat_default_tx_ccid;
-extern int dccp_feat_default_ack_ratio;
-extern int dccp_feat_default_send_ack_vector;
-extern int dccp_feat_default_send_ndp_count;
-
#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 7f9dc6ac58c..565bc80557c 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -1,6 +1,6 @@
/*
* net/dccp/input.c
- *
+ *
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
@@ -82,7 +82,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
* Otherwise,
* Drop packet and return
*/
- if (dh->dccph_type == DCCP_PKT_SYNC ||
+ if (dh->dccph_type == DCCP_PKT_SYNC ||
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh) &&
@@ -128,21 +128,18 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
DCCP_PKT_WITHOUT_ACK_SEQ))
dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
} else {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
- "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
- "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
- "sending SYNC...\n",
- dccp_packet_name(dh->dccph_type),
- (unsigned long long) lswl,
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_seq,
- (unsigned long long) dp->dccps_swh,
- (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
+ DCCP_WARN("DCCP: Step 6 failed for %s packet, "
+ "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
+ "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
+ "sending SYNC...\n", dccp_packet_name(dh->dccph_type),
+ (unsigned long long) lswl,
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq,
+ (unsigned long long) dp->dccps_swh,
+ (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
- (unsigned long long) lawl,
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_ack_seq,
- (unsigned long long) dp->dccps_awh);
+ (unsigned long long) lawl,
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (unsigned long long) dp->dccps_awh);
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
return -1;
}
@@ -188,8 +185,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
dccp_rcv_close(sk, skb);
return 0;
case DCCP_PKT_REQUEST:
- /* Step 7
- * or (S.is_server and P.type == Response)
+ /* Step 7
+ * or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state >= OPEN and P.type == Request
* and P.seqno >= S.OSR)
@@ -216,11 +213,11 @@ send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNCACK);
/*
- * From the draft:
+ * From RFC 4340, sec. 5.7
*
* As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
* MAY have non-zero-length application data areas, whose
- * contents * receivers MUST ignore.
+ * contents receivers MUST ignore.
*/
goto discard;
}
@@ -251,8 +248,18 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
DCCP_ACKVEC_STATE_RECEIVED))
goto discard;
- ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
- ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+ /*
+ * Deliver to the CCID module in charge.
+ * FIXME: Currently DCCP operates one-directional only, i.e. a listening
+ * server is not at the same time a connecting client. There is
+ * not much sense in delivering to both rx/tx sides at the moment
+ * (only one is active at a time); when moving to bidirectional
+ * service, this needs to be revised.
+ */
+ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
+ ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+ else
+ ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
return __dccp_rcv_established(sk, skb, dh, len);
discard:
@@ -267,7 +274,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
const struct dccp_hdr *dh,
const unsigned len)
{
- /*
+ /*
* Step 4: Prepare sequence numbers in REQUEST
* If S.state == REQUEST,
* If (P.type == Response or P.type == Reset)
@@ -335,7 +342,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
* from the Response * /
* S.state := PARTOPEN
* Set PARTOPEN timer
- * Continue with S.state == PARTOPEN
+ * Continue with S.state == PARTOPEN
* / * Step 12 will send the Ack completing the
* three-way handshake * /
*/
@@ -366,7 +373,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
*/
__kfree_skb(skb);
return 0;
- }
+ }
dccp_send_ack(sk);
return -1;
}
@@ -374,7 +381,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
out_invalid_packet:
/* dccp_v4_do_rcv will send a reset */
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
- return 1;
+ return 1;
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -431,29 +438,25 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/*
* Step 3: Process LISTEN state
- * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
*
* If S.state == LISTEN,
- * If P.type == Request or P contains a valid Init Cookie
- * option,
- * * Must scan the packet's options to check for an Init
- * Cookie. Only the Init Cookie is processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *
- * * Generate a new socket and switch to that socket *
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookie
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- * Continue with S.state == RESPOND
- * * A Response packet will be generated in Step 11 *
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- *
- * NOTE: the check for the packet types is done in
- * dccp_rcv_state_process
+ * If P.type == Request or P contains a valid Init Cookie option,
+ * (* Must scan the packet's options to check for Init
+ * Cookies. Only Init Cookies are processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *)
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
+ * Cookies Continue with S.state == RESPOND
+ * (* A Response packet will be generated in Step 11 *)
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
*/
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
@@ -485,14 +488,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_ACKVEC_STATE_RECEIVED))
- goto discard;
+ DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_ACKVEC_STATE_RECEIVED))
+ goto discard;
- ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
- ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+ /* XXX see the comments in dccp_rcv_established about this */
+ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
+ ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+ else
+ ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
}
/*
@@ -574,7 +580,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
}
- if (!queued) {
+ if (!queued) {
discard:
__kfree_skb(skb);
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7e746c4c168..90c74b4adb7 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -113,13 +113,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* OK, now commit destination to socket. */
sk_setup_caps(sk, &rt->u.dst);
- dp->dccps_gar =
- dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
- inet->daddr,
- inet->sport,
- usin->sin_port);
- dccp_update_gss(sk, dp->dccps_iss);
-
+ dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr,
+ inet->sport, inet->dport);
inet->id = dp->dccps_iss ^ jiffies;
err = dccp_connect(sk);
@@ -162,7 +157,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
/* We don't check in the destentry if pmtu discovery is forbidden
* on this route. We just assume that no packet_to_big packets
* are send back when pmtu discovery is not active.
- * There is a small race when the user changes this flag in the
+ * There is a small race when the user changes this flag in the
* route, but I think that's acceptable.
*/
if ((dst = __sk_dst_check(sk, 0)) == NULL)
@@ -183,7 +178,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
dccp_sync_mss(sk, mtu);
/*
- * From: draft-ietf-dccp-spec-11.txt
+ * From RFC 4340, sec. 14.1:
*
* DCCP-Sync packets are the best choice for upward
* probing, since DCCP-Sync probes do not risk application
@@ -193,86 +188,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
} /* else let the usual retransmit timer handle it */
}
-static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb,
- struct request_sock *req)
-{
- int err;
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_ack_bits);
- struct sk_buff *skb;
-
- if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
- return;
-
- skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- /* Reserve space for headers. */
- skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
-
- skb->dst = dst_clone(rxskb->dst);
-
- skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_ack_len);
-
- /* Build DCCP header and checksum it. */
- dh->dccph_type = DCCP_PKT_ACK;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_ack_len / 4;
- dh->dccph_x = 1;
-
- dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
-
- bh_lock_sock(dccp_v4_ctl_socket->sk);
- err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
- rxskb->nh.iph->daddr,
- rxskb->nh.iph->saddr, NULL);
- bh_unlock_sock(dccp_v4_ctl_socket->sk);
-
- if (err == NET_XMIT_CN || err == 0) {
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
- }
-}
-
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
- struct dst_entry *dst)
-{
- int err = -1;
- struct sk_buff *skb;
-
- /* First, grab a route. */
-
- if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
- goto out;
-
- skb = dccp_make_response(sk, dst, req);
- if (skb != NULL) {
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr,
- ireq->rmt_addr);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
- ireq->rmt_addr,
- ireq->opt);
- if (err == NET_XMIT_CN)
- err = 0;
- }
-
-out:
- dst_release(dst);
- return err;
-}
-
/*
* This routine is called by the ICMP module when it gets some sort of error
* condition. If err < 0 then the socket should be closed and the error
@@ -329,7 +244,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
seq = dccp_hdr_seq(skb);
if (sk->sk_state != DCCP_LISTEN &&
!between48(seq, dp->dccps_swl, dp->dccps_swh)) {
- NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -429,19 +344,24 @@ out:
sock_put(sk);
}
-/* This routine computes an IPv4 DCCP checksum. */
-void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
+ __be32 src, __be32 dst)
+{
+ return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
+}
+
+void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb)
{
const struct inet_sock *inet = inet_sk(sk);
struct dccp_hdr *dh = dccp_hdr(skb);
- dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr);
}
EXPORT_SYMBOL_GPL(dccp_v4_send_check);
-static inline u64 dccp_v4_init_sequence(const struct sock *sk,
- const struct sk_buff *skb)
+static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
{
return secure_dccp_sequence_number(skb->nh.iph->daddr,
skb->nh.iph->saddr,
@@ -449,93 +369,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
dccp_hdr(skb)->dccph_sport);
}
-int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
-{
- struct inet_request_sock *ireq;
- struct dccp_sock dp;
- struct request_sock *req;
- struct dccp_request_sock *dreq;
- const __be32 saddr = skb->nh.iph->saddr;
- const __be32 daddr = skb->nh.iph->daddr;
- const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
-
- /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
- if (((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST)) {
- reset_code = DCCP_RESET_CODE_NO_CONNECTION;
- goto drop;
- }
-
- if (dccp_bad_service_code(sk, service)) {
- reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
- goto drop;
- }
- /*
- * TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if (inet_csk_reqsk_queue_is_full(sk))
- goto drop;
-
- /*
- * Accept backlog is full. If we have already queued enough
- * of warm entries in syn queue, drop request. It is better than
- * clogging syn queue with openreqs with exponentially increasing
- * timeout.
- */
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
- goto drop;
-
- req = reqsk_alloc(sk->sk_prot->rsk_prot);
- if (req == NULL)
- goto drop;
-
- if (dccp_parse_options(sk, skb))
- goto drop_and_free;
-
- dccp_openreq_init(req, &dp, skb);
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
- ireq = inet_rsk(req);
- ireq->loc_addr = daddr;
- ireq->rmt_addr = saddr;
- req->rcv_wnd = dccp_feat_default_sequence_window;
- ireq->opt = NULL;
-
- /*
- * Step 3: Process LISTEN state
- *
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- *
- * In fact we defer setting S.GSR, S.SWL, S.SWH to
- * dccp_create_openreq_child.
- */
- dreq = dccp_rsk(req);
- dreq->dreq_isr = dcb->dccpd_seq;
- dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
- dreq->dreq_service = service;
-
- if (dccp_v4_send_response(sk, req, NULL))
- goto drop_and_free;
-
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- return 0;
-
-drop_and_free:
- reqsk_free(req);
-drop:
- DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
- dcb->dccpd_reset_code = reset_code;
- return -1;
-}
-
-EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
-
/*
* The three way handshake has completed - we got a valid ACK or DATAACK -
* now create the new socket.
@@ -621,47 +454,6 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr,
- const __be32 daddr)
-{
- const struct dccp_hdr* dh = dccp_hdr(skb);
- int checksum_len;
- u32 tmp;
-
- if (dh->dccph_cscov == 0)
- checksum_len = skb->len;
- else {
- checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
- checksum_len = checksum_len < skb->len ? checksum_len :
- skb->len;
- }
-
- tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
- return csum_tcpudp_magic(saddr, daddr, checksum_len,
- IPPROTO_DCCP, tmp);
-}
-
-EXPORT_SYMBOL_GPL(dccp_v4_checksum);
-
-static int dccp_v4_verify_checksum(struct sk_buff *skb,
- const __be32 saddr, const __be32 daddr)
-{
- struct dccp_hdr *dh = dccp_hdr(skb);
- int checksum_len;
- u32 tmp;
-
- if (dh->dccph_cscov == 0)
- checksum_len = skb->len;
- else {
- checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
- checksum_len = checksum_len < skb->len ? checksum_len :
- skb->len;
- }
- tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
- return csum_tcpudp_magic(saddr, daddr, checksum_len,
- IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
-}
-
static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
struct sk_buff *skb)
{
@@ -675,7 +467,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
.uli_u = { .ports =
{ .sport = dccp_hdr(skb)->dccph_dport,
.dport = dccp_hdr(skb)->dccph_sport }
- }
+ }
};
security_skb_classify_flow(skb, &fl);
@@ -687,7 +479,37 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
return &rt->u.dst;
}
-static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+ struct dst_entry *dst)
+{
+ int err = -1;
+ struct sk_buff *skb;
+
+ /* First, grab a route. */
+
+ if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+ goto out;
+
+ skb = dccp_make_response(sk, dst, req);
+ if (skb != NULL) {
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct dccp_hdr *dh = dccp_hdr(skb);
+
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
+ ireq->rmt_addr);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+ ireq->rmt_addr,
+ ireq->opt);
+ err = net_xmit_eval(err);
+ }
+
+out:
+ dst_release(dst);
+ return err;
+}
+
+static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
@@ -696,7 +518,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct dst_entry *dst;
- u64 seqno;
+ u64 seqno = 0;
/* Never send a reset in response to a reset. */
if (rxdh->dccph_type == DCCP_PKT_RESET)
@@ -718,9 +540,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
- skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_reset_len);
+ dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
/* Build DCCP header and checksum it. */
dh->dccph_type = DCCP_PKT_RESET;
@@ -731,17 +551,16 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
dccp_hdr_reset(skb)->dccph_reset_code =
DCCP_SKB_CB(rxskb)->dccpd_reset_code;
- /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
- seqno = 0;
+ /* See "8.3.1. Abnormal Termination" in RFC 4340 */
if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
dccp_hdr_set_seq(dh, seqno);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
- dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
- rxskb->nh.iph->daddr);
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
+ rxskb->nh.iph->daddr);
bh_lock_sock(dccp_v4_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
@@ -749,7 +568,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
rxskb->nh.iph->saddr, NULL);
bh_unlock_sock(dccp_v4_ctl_socket->sk);
- if (err == NET_XMIT_CN || err == 0) {
+ if (net_xmit_eval(err) == 0) {
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
}
@@ -757,6 +576,103 @@ out:
dst_release(dst);
}
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+ kfree(inet_rsk(req)->opt);
+}
+
+static struct request_sock_ops dccp_request_sock_ops __read_mostly = {
+ .family = PF_INET,
+ .obj_size = sizeof(struct dccp_request_sock),
+ .rtx_syn_ack = dccp_v4_send_response,
+ .send_ack = dccp_reqsk_send_ack,
+ .destructor = dccp_v4_reqsk_destructor,
+ .send_reset = dccp_v4_ctl_send_reset,
+};
+
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq;
+ struct request_sock *req;
+ struct dccp_request_sock *dreq;
+ const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
+
+ /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+ if (((struct rtable *)skb->dst)->rt_flags &
+ (RTCF_BROADCAST | RTCF_MULTICAST)) {
+ reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+ goto drop;
+ }
+
+ if (dccp_bad_service_code(sk, service)) {
+ reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+ goto drop;
+ }
+ /*
+ * TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ if (inet_csk_reqsk_queue_is_full(sk))
+ goto drop;
+
+ /*
+ * Accept backlog is full. If we have already queued enough
+ * of warm entries in syn queue, drop request. It is better than
+ * clogging syn queue with openreqs with exponentially increasing
+ * timeout.
+ */
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ goto drop;
+
+ req = reqsk_alloc(&dccp_request_sock_ops);
+ if (req == NULL)
+ goto drop;
+
+ if (dccp_parse_options(sk, skb))
+ goto drop_and_free;
+
+ dccp_reqsk_init(req, skb);
+
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
+ ireq = inet_rsk(req);
+ ireq->loc_addr = skb->nh.iph->daddr;
+ ireq->rmt_addr = skb->nh.iph->saddr;
+ ireq->opt = NULL;
+
+ /*
+ * Step 3: Process LISTEN state
+ *
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ *
+ * In fact we defer setting S.GSR, S.SWL, S.SWH to
+ * dccp_create_openreq_child.
+ */
+ dreq = dccp_rsk(req);
+ dreq->dreq_isr = dcb->dccpd_seq;
+ dreq->dreq_iss = dccp_v4_init_sequence(skb);
+ dreq->dreq_service = service;
+
+ if (dccp_v4_send_response(sk, req, NULL))
+ goto drop_and_free;
+
+ inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ return 0;
+
+drop_and_free:
+ reqsk_free(req);
+drop:
+ DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ dcb->dccpd_reset_code = reset_code;
+ return -1;
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_hdr *dh = dccp_hdr(skb);
@@ -769,24 +685,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
/*
* Step 3: Process LISTEN state
- * If S.state == LISTEN,
- * If P.type == Request or P contains a valid Init Cookie
- * option,
- * * Must scan the packet's options to check for an Init
- * Cookie. Only the Init Cookie is processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *
- * * Generate a new socket and switch to that socket *
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookie
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- * Continue with S.state == RESPOND
- * * A Response packet will be generated in Step 11 *
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
+ * If P.type == Request or P contains a valid Init Cookie option,
+ * (* Must scan the packet's options to check for Init
+ * Cookies. Only Init Cookies are processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *)
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
+ * Continue with S.state == RESPOND
+ * (* A Response packet will be generated in Step 11 *)
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
*
* NOTE: the check for the packet types is done in
* dccp_rcv_state_process
@@ -809,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v4_ctl_send_reset(skb);
+ dccp_v4_ctl_send_reset(sk, skb);
discard:
kfree_skb(skb);
return 0;
@@ -817,60 +732,74 @@ discard:
EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
+/**
+ * dccp_invalid_packet - check for malformed packets
+ * Implements RFC 4340, 8.5: Step 1: Check header basics
+ * Packets that fail these checks are ignored and do not receive Resets.
+ */
int dccp_invalid_packet(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
+ unsigned int cscov;
if (skb->pkt_type != PACKET_HOST)
return 1;
+ /* If the packet is shorter than 12 bytes, drop packet and return */
if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
+ DCCP_WARN("pskb_may_pull failed\n");
return 1;
}
dh = dccp_hdr(skb);
- /* If the packet type is not understood, drop packet and return */
+ /* If P.type is not understood, drop packet and return */
if (dh->dccph_type >= DCCP_PKT_INVALID) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
+ DCCP_WARN("invalid packet type\n");
return 1;
}
/*
- * If P.Data Offset is too small for packet type, or too large for
- * packet, drop packet and return
+ * If P.Data Offset is too small for packet type, drop packet and return
*/
if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
- "too small 1\n",
- dh->dccph_doff);
+ DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff);
return 1;
}
-
+ /*
+ * If P.Data Offset is too too large for packet, drop packet and return
+ */
if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
- "too small 2\n",
- dh->dccph_doff);
+ DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff);
return 1;
}
- dh = dccp_hdr(skb);
-
/*
* If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
* has short sequence numbers), drop packet and return
*/
- if (dh->dccph_x == 0 &&
- dh->dccph_type != DCCP_PKT_DATA &&
- dh->dccph_type != DCCP_PKT_ACK &&
- dh->dccph_type != DCCP_PKT_DATAACK) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
- "nor DataAck and P.X == 0\n",
- dccp_packet_name(dh->dccph_type));
+ if (dh->dccph_type >= DCCP_PKT_DATA &&
+ dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) {
+ DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
+ dccp_packet_name(dh->dccph_type));
return 1;
}
+ /*
+ * If P.CsCov is too large for the packet size, drop packet and return.
+ * This must come _before_ checksumming (not as RFC 4340 suggests).
+ */
+ cscov = dccp_csum_coverage(skb);
+ if (cscov > skb->len) {
+ DCCP_WARN("P.CsCov %u exceeds packet length %d\n",
+ dh->dccph_cscov, skb->len);
+ return 1;
+ }
+
+ /* If header checksum is incorrect, drop packet and return.
+ * (This step is completed in the AF-dependent functions.) */
+ skb->csum = skb_checksum(skb, 0, cscov, 0);
+
return 0;
}
@@ -881,17 +810,16 @@ static int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
struct sock *sk;
+ int min_cov;
- /* Step 1: Check header basics: */
+ /* Step 1: Check header basics */
if (dccp_invalid_packet(skb))
goto discard_it;
- /* If the header checksum is incorrect, drop packet and return */
- if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
- skb->nh.iph->daddr) < 0) {
- LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
- __FUNCTION__);
+ /* Step 1: If header checksum is incorrect, drop packet and return */
+ if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+ DCCP_WARN("dropped packet with invalid checksum\n");
goto discard_it;
}
@@ -913,23 +841,20 @@ static int dccp_v4_rcv(struct sk_buff *skb)
dccp_pr_debug_cat("\n");
} else {
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
- dccp_pr_debug_cat(", ack=%llu\n",
- (unsigned long long)
+ dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
/* Step 2:
- * Look up flow ID in table and get corresponding socket */
+ * Look up flow ID in table and get corresponding socket */
sk = __inet_lookup(&dccp_hashinfo,
skb->nh.iph->saddr, dh->dccph_sport,
skb->nh.iph->daddr, dh->dccph_dport,
inet_iif(skb));
- /*
+ /*
* Step 2:
- * If no socket ...
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
+ * If no socket ...
*/
if (sk == NULL) {
dccp_pr_debug("failed to look up flow ID in table and "
@@ -937,51 +862,61 @@ static int dccp_v4_rcv(struct sk_buff *skb)
goto no_dccp_socket;
}
- /*
+ /*
* Step 2:
- * ... or S.state == TIMEWAIT,
+ * ... or S.state == TIMEWAIT,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
-
if (sk->sk_state == DCCP_TIME_WAIT) {
- dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
- "do_time_wait\n");
- goto do_time_wait;
+ dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
+ inet_twsk_put(inet_twsk(sk));
+ goto no_dccp_socket;
+ }
+
+ /*
+ * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
+ * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+ * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
+ */
+ min_cov = dccp_sk(sk)->dccps_pcrlen;
+ if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) {
+ dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
+ dh->dccph_cscov, min_cov);
+ /* FIXME: "Such packets SHOULD be reported using Data Dropped
+ * options (Section 11.7) with Drop Code 0, Protocol
+ * Constraints." */
+ goto discard_and_relse;
}
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
nf_reset(skb);
- return sk_receive_skb(sk, skb);
+ return sk_receive_skb(sk, skb, 1);
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
/*
* Step 2:
+ * If no socket ...
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(skb);
+ dccp_v4_ctl_send_reset(sk, skb);
}
discard_it:
- /* Discard frame. */
kfree_skb(skb);
return 0;
discard_and_relse:
sock_put(sk);
goto discard_it;
-
-do_time_wait:
- inet_twsk_put(inet_twsk(sk));
- goto no_dccp_socket;
}
static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
@@ -1015,20 +950,6 @@ static int dccp_v4_init_sock(struct sock *sk)
return err;
}
-static void dccp_v4_reqsk_destructor(struct request_sock *req)
-{
- kfree(inet_rsk(req)->opt);
-}
-
-static struct request_sock_ops dccp_request_sock_ops = {
- .family = PF_INET,
- .obj_size = sizeof(struct dccp_request_sock),
- .rtx_syn_ack = dccp_v4_send_response,
- .send_ack = dccp_v4_reqsk_send_ack,
- .destructor = dccp_v4_reqsk_destructor,
- .send_reset = dccp_v4_ctl_send_reset,
-};
-
static struct timewait_sock_ops dccp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct inet_timewait_sock),
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7171a78671a..6b91a9dd041 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -36,13 +36,6 @@
/* Socket used for sending RSTs and ACKs */
static struct socket *dccp_v6_ctl_socket;
-static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
-static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
- struct request_sock *req);
-static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb);
-
-static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-
static struct inet_connection_sock_af_ops dccp_ipv6_mapped;
static struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
@@ -65,205 +58,37 @@ static void dccp_v6_hash(struct sock *sk)
}
}
-static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len,
- struct in6_addr *saddr,
- struct in6_addr *daddr,
- unsigned long base)
+/* add pseudo-header to DCCP checksum stored in skb->csum */
+static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
+ struct in6_addr *saddr,
+ struct in6_addr *daddr)
{
- return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base);
+ return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
}
-static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+static inline void dccp_v6_send_check(struct sock *sk, int unused_value,
+ struct sk_buff *skb)
{
- const struct dccp_hdr *dh = dccp_hdr(skb);
-
- if (skb->protocol == htons(ETH_P_IPV6))
- return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
- dh->dccph_dport,
- dh->dccph_sport);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct dccp_hdr *dh = dccp_hdr(skb);
- return secure_dccp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
- dh->dccph_dport,
- dh->dccph_sport);
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
}
-static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len)
+static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+ __be16 sport, __be16 dport )
{
- struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- struct in6_addr *saddr = NULL, *final_p = NULL, final;
- struct flowi fl;
- struct dst_entry *dst;
- int addr_type;
- int err;
-
- dp->dccps_role = DCCP_ROLE_CLIENT;
-
- if (addr_len < SIN6_LEN_RFC2133)
- return -EINVAL;
-
- if (usin->sin6_family != AF_INET6)
- return -EAFNOSUPPORT;
-
- memset(&fl, 0, sizeof(fl));
-
- if (np->sndflow) {
- fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
- IP6_ECN_flow_init(fl.fl6_flowlabel);
- if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
- struct ip6_flowlabel *flowlabel;
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
- if (flowlabel == NULL)
- return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
- fl6_sock_release(flowlabel);
- }
- }
- /*
- * connect() to INADDR_ANY means loopback (BSD'ism).
- */
- if (ipv6_addr_any(&usin->sin6_addr))
- usin->sin6_addr.s6_addr[15] = 1;
-
- addr_type = ipv6_addr_type(&usin->sin6_addr);
-
- if (addr_type & IPV6_ADDR_MULTICAST)
- return -ENETUNREACH;
-
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
- if (addr_len >= sizeof(struct sockaddr_in6) &&
- usin->sin6_scope_id) {
- /* If interface is set while binding, indices
- * must coincide.
- */
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id)
- return -EINVAL;
-
- sk->sk_bound_dev_if = usin->sin6_scope_id;
- }
-
- /* Connect to link-local address requires an interface */
- if (!sk->sk_bound_dev_if)
- return -EINVAL;
- }
-
- ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
- np->flow_label = fl.fl6_flowlabel;
-
- /*
- * DCCP over IPv4
- */
- if (addr_type == IPV6_ADDR_MAPPED) {
- u32 exthdrlen = icsk->icsk_ext_hdr_len;
- struct sockaddr_in sin;
-
- SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
- if (__ipv6_only_sock(sk))
- return -ENETUNREACH;
-
- sin.sin_family = AF_INET;
- sin.sin_port = usin->sin6_port;
- sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
-
- icsk->icsk_af_ops = &dccp_ipv6_mapped;
- sk->sk_backlog_rcv = dccp_v4_do_rcv;
-
- err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
- if (err) {
- icsk->icsk_ext_hdr_len = exthdrlen;
- icsk->icsk_af_ops = &dccp_ipv6_af_ops;
- sk->sk_backlog_rcv = dccp_v6_do_rcv;
- goto failure;
- } else {
- ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
- inet->saddr);
- ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
- inet->rcv_saddr);
- }
-
- return err;
- }
-
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
-
- fl.proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = usin->sin6_port;
- fl.fl_ip_sport = inet->sport;
- security_sk_classify_flow(sk, &fl);
-
- if (np->opt != NULL && np->opt->srcrt != NULL) {
- const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-
- ipv6_addr_copy(&final, &fl.fl6_dst);
- ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
- final_p = &final;
- }
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
- goto failure;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = xfrm_lookup(&dst, &fl, sk, 0);
- if (err < 0)
- goto failure;
-
- if (saddr == NULL) {
- saddr = &fl.fl6_src;
- ipv6_addr_copy(&np->rcv_saddr, saddr);
- }
-
- /* set the source address */
- ipv6_addr_copy(&np->saddr, saddr);
- inet->rcv_saddr = LOOPBACK4_IPV6;
-
- __ip6_dst_store(sk, dst, NULL, NULL);
-
- icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
-
- inet->dport = usin->sin6_port;
-
- dccp_set_state(sk, DCCP_REQUESTING);
- err = inet6_hash_connect(&dccp_death_row, sk);
- if (err)
- goto late_failure;
- /* FIXME */
-#if 0
- dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
- inet->sport,
- inet->dport);
-#endif
- err = dccp_connect(sk);
- if (err)
- goto late_failure;
+ return secure_tcpv6_sequence_number(saddr, daddr, sport, dport);
+}
- return 0;
+static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
+{
+ return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
+ skb->nh.ipv6h->saddr.s6_addr32,
+ dccp_hdr(skb)->dccph_dport,
+ dccp_hdr(skb)->dccph_sport );
-late_failure:
- dccp_set_state(sk, DCCP_CLOSED);
- __sk_dst_reset(sk);
-failure:
- inet->dport = 0;
- sk->sk_route_caps = 0;
- return err;
}
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -277,7 +102,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
- &hdr->saddr, dh->dccph_sport, skb->dev->ifindex);
+ &hdr->saddr, dh->dccph_sport, inet6_iif(skb));
if (sk == NULL) {
ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
@@ -464,16 +289,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
if (skb != NULL) {
struct dccp_hdr *dh = dccp_hdr(skb);
- dh->dccph_checksum = dccp_v6_check(dh, skb->len,
- &ireq6->loc_addr,
- &ireq6->rmt_addr,
- csum_partial((char *)dh,
- skb->len,
- skb->csum));
+ dh->dccph_checksum = dccp_v6_csum_finish(skb,
+ &ireq6->loc_addr,
+ &ireq6->rmt_addr);
ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
err = ip6_xmit(sk, skb, &fl, opt, 0);
- if (err == NET_XMIT_CN)
- err = 0;
+ err = net_xmit_eval(err);
}
done:
@@ -489,32 +310,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet6_rsk(req)->pktopts);
}
-static struct request_sock_ops dccp6_request_sock_ops = {
- .family = AF_INET6,
- .obj_size = sizeof(struct dccp6_request_sock),
- .rtx_syn_ack = dccp_v6_send_response,
- .send_ack = dccp_v6_reqsk_send_ack,
- .destructor = dccp_v6_reqsk_destructor,
- .send_reset = dccp_v6_ctl_send_reset,
-};
-
-static struct timewait_sock_ops dccp6_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
-};
-
-static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr,
- len, IPPROTO_DCCP,
- csum_partial((char *)dh,
- dh->dccph_doff << 2,
- skb->csum));
-}
-
-static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
@@ -522,7 +318,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct flowi fl;
- u64 seqno;
+ u64 seqno = 0;
if (rxdh->dccph_type == DCCP_PKT_RESET)
return;
@@ -533,13 +329,11 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
GFP_ATOMIC);
if (skb == NULL)
- return;
+ return;
skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
- skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_reset_len);
+ dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
/* Swap the send and the receive. */
dh->dccph_type = DCCP_PKT_RESET;
@@ -550,21 +344,21 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
dccp_hdr_reset(skb)->dccph_reset_code =
DCCP_SKB_CB(rxskb)->dccpd_reset_code;
- /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
- seqno = 0;
+ /* See "8.3.1. Abnormal Termination" in RFC 4340 */
if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
dccp_hdr_set_seq(dh, seqno);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+ dccp_csum_outgoing(skb);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr,
+ &rxskb->nh.ipv6h->daddr);
memset(&fl, 0, sizeof(fl));
ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
- dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
- sizeof(*dh), IPPROTO_DCCP,
- skb->csum);
+
fl.proto = IPPROTO_DCCP;
fl.oif = inet6_iif(rxskb);
fl.fl_ip_dport = dh->dccph_dport;
@@ -584,60 +378,14 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
kfree_skb(skb);
}
-static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
- struct request_sock *req)
-{
- struct flowi fl;
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_ack_bits);
- struct sk_buff *skb;
-
- skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
- GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
-
- skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_hdr_ack_len);
-
- /* Build DCCP header and checksum it. */
- dh->dccph_type = DCCP_PKT_ACK;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_ack_len / 4;
- dh->dccph_x = 1;
-
- dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
- DCCP_SKB_CB(rxskb)->dccpd_seq);
-
- memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
-
- /* FIXME: calculate checksum, IPv4 also should... */
-
- fl.proto = IPPROTO_DCCP;
- fl.oif = inet6_iif(rxskb);
- fl.fl_ip_dport = dh->dccph_dport;
- fl.fl_ip_sport = dh->dccph_sport;
- security_req_classify_flow(req, &fl);
-
- if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
- if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
- ip6_xmit(dccp_v6_ctl_socket->sk, skb, &fl, NULL, 0);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- return;
- }
- }
-
- kfree_skb(skb);
-}
+static struct request_sock_ops dccp6_request_sock_ops = {
+ .family = AF_INET6,
+ .obj_size = sizeof(struct dccp6_request_sock),
+ .rtx_syn_ack = dccp_v6_send_response,
+ .send_ack = dccp_reqsk_send_ack,
+ .destructor = dccp_v6_reqsk_destructor,
+ .send_reset = dccp_v6_ctl_send_reset,
+};
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
@@ -672,13 +420,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct inet_request_sock *ireq;
- struct dccp_sock dp;
struct request_sock *req;
struct dccp_request_sock *dreq;
struct inet6_request_sock *ireq6;
struct ipv6_pinfo *np = inet6_sk(sk);
- const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
+ const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -691,7 +437,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_bad_service_code(sk, service)) {
reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
- }
+ }
/*
* There are no SYN attacks on IPv6, yet...
*/
@@ -701,22 +447,21 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet6_reqsk_alloc(sk->sk_prot->rsk_prot);
+ req = inet6_reqsk_alloc(&dccp6_request_sock_ops);
if (req == NULL)
goto drop;
- /* FIXME: process options */
+ if (dccp_parse_options(sk, skb))
+ goto drop_and_free;
- dccp_openreq_init(req, &dp, skb);
+ dccp_reqsk_init(req, skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
ireq6 = inet6_rsk(req);
- ireq = inet_rsk(req);
ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
- req->rcv_wnd = dccp_feat_default_sequence_window;
ireq6->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
@@ -735,14 +480,14 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
/*
* Step 3: Process LISTEN state
*
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*
- * In fact we defer setting S.GSR, S.SWL, S.SWH to
- * dccp_create_openreq_child.
+ * In fact we defer setting S.GSR, S.SWL, S.SWH to
+ * dccp_create_openreq_child.
*/
dreq = dccp_rsk(req);
dreq->dreq_isr = dcb->dccpd_seq;
- dreq->dreq_iss = dccp_v6_init_sequence(sk, skb);
+ dreq->dreq_iss = dccp_v6_init_sequence(skb);
dreq->dreq_service = service;
if (dccp_v6_send_response(sk, req, NULL))
@@ -992,14 +737,46 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
+ /*
+ * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
+ * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
+ */
opt_skb = skb_clone(skb, GFP_ATOMIC);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
+ if (opt_skb) {
+ /* XXX This is where we would goto ipv6_pktoptions. */
+ __kfree_skb(opt_skb);
+ }
return 0;
}
+ /*
+ * Step 3: Process LISTEN state
+ * If S.state == LISTEN,
+ * If P.type == Request or P contains a valid Init Cookie option,
+ * (* Must scan the packet's options to check for Init
+ * Cookies. Only Init Cookies are processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *)
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
+ * Continue with S.state == RESPOND
+ * (* A Response packet will be generated in Step 11 *)
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ *
+ * NOTE: the check for the packet types is done in
+ * dccp_rcv_state_process
+ */
if (sk->sk_state == DCCP_LISTEN) {
struct sock *nsk = dccp_v6_hnd_req(sk, skb);
@@ -1010,7 +787,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* otherwise we just shortcircuit this and continue with
* the new socket..
*/
- if (nsk != sk) {
+ if (nsk != sk) {
if (dccp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb != NULL)
@@ -1021,10 +798,14 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
+ if (opt_skb) {
+ /* XXX This is where we would goto ipv6_pktoptions. */
+ __kfree_skb(opt_skb);
+ }
return 0;
reset:
- dccp_v6_ctl_send_reset(skb);
+ dccp_v6_ctl_send_reset(sk, skb);
discard:
if (opt_skb != NULL)
__kfree_skb(opt_skb);
@@ -1037,12 +818,20 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
const struct dccp_hdr *dh;
struct sk_buff *skb = *pskb;
struct sock *sk;
+ int min_cov;
- /* Step 1: Check header basics: */
+ /* Step 1: Check header basics */
if (dccp_invalid_packet(skb))
goto discard_it;
+ /* Step 1: If header checksum is incorrect, drop packet and return. */
+ if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr)) {
+ DCCP_WARN("dropped packet with invalid checksum\n");
+ goto discard_it;
+ }
+
dh = dccp_hdr(skb);
DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
@@ -1054,63 +843,247 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
/* Step 2:
- * Look up flow ID in table and get corresponding socket */
+ * Look up flow ID in table and get corresponding socket */
sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
dh->dccph_sport,
&skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
inet6_iif(skb));
/*
* Step 2:
- * If no socket ...
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
+ * If no socket ...
*/
- if (sk == NULL)
+ if (sk == NULL) {
+ dccp_pr_debug("failed to look up flow ID in table and "
+ "get corresponding socket\n");
goto no_dccp_socket;
+ }
/*
* Step 2:
- * ... or S.state == TIMEWAIT,
+ * ... or S.state == TIMEWAIT,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
- if (sk->sk_state == DCCP_TIME_WAIT)
- goto do_time_wait;
+ if (sk->sk_state == DCCP_TIME_WAIT) {
+ dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
+ inet_twsk_put(inet_twsk(sk));
+ goto no_dccp_socket;
+ }
+
+ /*
+ * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
+ * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+ * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
+ */
+ min_cov = dccp_sk(sk)->dccps_pcrlen;
+ if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) {
+ dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
+ dh->dccph_cscov, min_cov);
+ /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
+ goto discard_and_relse;
+ }
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- return sk_receive_skb(sk, skb) ? -1 : 0;
+ return sk_receive_skb(sk, skb, 1) ? -1 : 0;
no_dccp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
/*
* Step 2:
+ * If no socket ...
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(skb);
+ dccp_v6_ctl_send_reset(sk, skb);
}
-discard_it:
-
- /*
- * Discard frame
- */
+discard_it:
kfree_skb(skb);
return 0;
discard_and_relse:
sock_put(sk);
goto discard_it;
+}
+
+static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct in6_addr *saddr = NULL, *final_p = NULL, final;
+ struct flowi fl;
+ struct dst_entry *dst;
+ int addr_type;
+ int err;
+
+ dp->dccps_role = DCCP_ROLE_CLIENT;
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (usin->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ memset(&fl, 0, sizeof(fl));
+
+ if (np->sndflow) {
+ fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+ IP6_ECN_flow_init(fl.fl6_flowlabel);
+ if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
+ struct ip6_flowlabel *flowlabel;
+ flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+ fl6_sock_release(flowlabel);
+ }
+ }
+ /*
+ * connect() to INADDR_ANY means loopback (BSD'ism).
+ */
+ if (ipv6_addr_any(&usin->sin6_addr))
+ usin->sin6_addr.s6_addr[15] = 1;
+
+ addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+ if (addr_type & IPV6_ADDR_MULTICAST)
+ return -ENETUNREACH;
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ usin->sin6_scope_id) {
+ /* If interface is set while binding, indices
+ * must coincide.
+ */
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != usin->sin6_scope_id)
+ return -EINVAL;
-do_time_wait:
- inet_twsk_put(inet_twsk(sk));
- goto no_dccp_socket;
+ sk->sk_bound_dev_if = usin->sin6_scope_id;
+ }
+
+ /* Connect to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if)
+ return -EINVAL;
+ }
+
+ ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+ np->flow_label = fl.fl6_flowlabel;
+
+ /*
+ * DCCP over IPv4
+ */
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ u32 exthdrlen = icsk->icsk_ext_hdr_len;
+ struct sockaddr_in sin;
+
+ SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+
+ if (__ipv6_only_sock(sk))
+ return -ENETUNREACH;
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = usin->sin6_port;
+ sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+ icsk->icsk_af_ops = &dccp_ipv6_mapped;
+ sk->sk_backlog_rcv = dccp_v4_do_rcv;
+
+ err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+ if (err) {
+ icsk->icsk_ext_hdr_len = exthdrlen;
+ icsk->icsk_af_ops = &dccp_ipv6_af_ops;
+ sk->sk_backlog_rcv = dccp_v6_do_rcv;
+ goto failure;
+ } else {
+ ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
+ inet->saddr);
+ ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
+ inet->rcv_saddr);
+ }
+
+ return err;
+ }
+
+ if (!ipv6_addr_any(&np->rcv_saddr))
+ saddr = &np->rcv_saddr;
+
+ fl.proto = IPPROTO_DCCP;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_dport = usin->sin6_port;
+ fl.fl_ip_sport = inet->sport;
+ security_sk_classify_flow(sk, &fl);
+
+ if (np->opt != NULL && np->opt->srcrt != NULL) {
+ const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err)
+ goto failure;
+
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+ err = xfrm_lookup(&dst, &fl, sk, 0);
+ if (err < 0)
+ goto failure;
+
+ if (saddr == NULL) {
+ saddr = &fl.fl6_src;
+ ipv6_addr_copy(&np->rcv_saddr, saddr);
+ }
+
+ /* set the source address */
+ ipv6_addr_copy(&np->saddr, saddr);
+ inet->rcv_saddr = LOOPBACK4_IPV6;
+
+ __ip6_dst_store(sk, dst, NULL, NULL);
+
+ icsk->icsk_ext_hdr_len = 0;
+ if (np->opt != NULL)
+ icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+ np->opt->opt_nflen);
+
+ inet->dport = usin->sin6_port;
+
+ dccp_set_state(sk, DCCP_REQUESTING);
+ err = inet6_hash_connect(&dccp_death_row, sk);
+ if (err)
+ goto late_failure;
+
+ dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
+ np->daddr.s6_addr32,
+ inet->sport, inet->dport);
+ err = dccp_connect(sk);
+ if (err)
+ goto late_failure;
+
+ return 0;
+
+late_failure:
+ dccp_set_state(sk, DCCP_CLOSED);
+ __sk_dst_reset(sk);
+failure:
+ inet->dport = 0;
+ sk->sk_route_caps = 0;
+ return err;
}
static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
@@ -1173,6 +1146,10 @@ static int dccp_v6_destroy_sock(struct sock *sk)
return inet6_destroy_sock(sk);
}
+static struct timewait_sock_ops dccp6_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
+};
+
static struct proto dccp_v6_prot = {
.name = "DCCPv6",
.owner = THIS_MODULE,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 9045438d6b3..6656bb497c7 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -11,6 +11,7 @@
*/
#include <linux/dccp.h>
+#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
@@ -31,8 +32,7 @@ struct inet_timewait_death_row dccp_death_row = {
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&dccp_death_row),
.twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
- inet_twdr_twkill_work,
- &dccp_death_row),
+ inet_twdr_twkill_work),
/* Short-time timewait calendar */
.twcal_hand = -1,
@@ -83,8 +83,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
- "table overflow\n");
+ DCCP_WARN("time wait bucket table overflow\n");
}
dccp_done(sk);
@@ -97,8 +96,8 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
/*
* Step 3: Process LISTEN state
*
- * // Generate a new socket and switch to that socket
- * Set S := new socket for this port pair
+ * (* Generate a new socket and switch to that socket *)
+ * Set S := new socket for this port pair
*/
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
@@ -147,9 +146,9 @@ out_free:
/*
* Step 3: Process LISTEN state
*
- * Choose S.ISS (initial seqno) or set from Init Cookie
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
- * Cookie
+ * Choose S.ISS (initial seqno) or set from Init Cookies
+ * Initialize S.GAR := S.ISS
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
*/
/* See dccp_v4_conn_request */
@@ -183,7 +182,7 @@ out_free:
EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
-/*
+/*
* Process an incoming packet for RESPOND sockets represented
* as an request_sock.
*/
@@ -195,15 +194,17 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
- if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
- dccp_rsk(req)->dreq_isr)) {
- struct dccp_request_sock *dreq = dccp_rsk(req);
+ struct dccp_request_sock *dreq = dccp_rsk(req);
+ if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
dccp_pr_debug("Retransmitted REQUEST\n");
- /* Send another RESPONSE packet */
- dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
- dccp_set_seqno(&dreq->dreq_isr,
- DCCP_SKB_CB(skb)->dccpd_seq);
+ dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+ /*
+ * Send another RESPONSE packet
+ * To protect against Request floods, increment retrans
+ * counter (backoff, monitored by dccp_response_timer).
+ */
+ req->retrans++;
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
@@ -243,7 +244,7 @@ listen_overflow:
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(skb);
+ req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
@@ -283,3 +284,19 @@ int dccp_child_process(struct sock *parent, struct sock *child,
}
EXPORT_SYMBOL_GPL(dccp_child_process);
+
+void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk)
+{
+ DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
+}
+
+EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
+
+void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
+{
+ inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+ inet_rsk(req)->acked = 0;
+ req->rcv_wnd = sysctl_dccp_feat_sequence_window;
+}
+
+EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 07a34696ac9..c03ba61eb6d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -22,23 +22,23 @@
#include "dccp.h"
#include "feat.h"
-int dccp_feat_default_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
-int dccp_feat_default_rx_ccid = DCCPF_INITIAL_CCID;
-int dccp_feat_default_tx_ccid = DCCPF_INITIAL_CCID;
-int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
-int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
-int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
+int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
+int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
+int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
+int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
+int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
+int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-EXPORT_SYMBOL_GPL(dccp_feat_default_sequence_window);
+EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
- dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window;
- dmsk->dccpms_rx_ccid = dccp_feat_default_rx_ccid;
- dmsk->dccpms_tx_ccid = dccp_feat_default_tx_ccid;
- dmsk->dccpms_ack_ratio = dccp_feat_default_ack_ratio;
- dmsk->dccpms_send_ack_vector = dccp_feat_default_send_ack_vector;
- dmsk->dccpms_send_ndp_count = dccp_feat_default_send_ndp_count;
+ dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
+ dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
+ dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
+ dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
+ dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
+ dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
}
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
@@ -60,12 +60,9 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx opt: " : "server rx opt: ";
-#endif
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+ u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh +
@@ -119,7 +116,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
goto out_invalid_option;
opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
- dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
+ dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk),
opt_recv->dccpor_ndp);
break;
case DCCPO_CHANGE_L:
@@ -153,7 +150,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
break;
if (dccp_msk(sk)->dccpms_send_ack_vector &&
- dccp_ackvec_parse(sk, skb, opt, value, len))
+ dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
goto out_invalid_option;
break;
case DCCPO_TIMESTAMP:
@@ -165,8 +162,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
dccp_timestamp(sk, &dp->dccps_timestamp_time);
- dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
- debug_prefix, opt_recv->dccpor_timestamp,
+ dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
+ dccp_role(sk), opt_recv->dccpor_timestamp,
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
break;
@@ -176,8 +173,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
- dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
- debug_prefix,
+ dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
+ "ackno=%llu, ", dccp_role(sk),
opt_recv->dccpor_timestamp_echo,
len + 2,
(unsigned long long)
@@ -211,11 +208,11 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
- dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
- elapsed_time);
+ dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
+ dccp_role(sk), elapsed_time);
break;
/*
- * From draft-ietf-dccp-spec-11.txt:
+ * From RFC 4340, sec. 10.3:
*
* Option numbers 128 through 191 are for
* options sent from the HC-Sender to the
@@ -242,9 +239,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
}
break;
default:
- pr_info("DCCP(%p): option %d(len=%d) not "
- "implemented, ignoring\n",
- sk, opt, len);
+ DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
+ "implemented, ignoring", sk, opt, len);
break;
}
@@ -261,7 +257,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
- pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
+ DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
return -1;
}
@@ -451,8 +447,7 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
u8 *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
- LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small"
- " to insert feature %d option!\n", feat);
+ DCCP_WARN("packet too small for feature %d option!\n", feat);
return -1;
}
@@ -465,8 +460,10 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
if (len)
memcpy(to, val, len);
- dccp_pr_debug("option %d feat %d len %d\n", type, feat, len);
+ dccp_pr_debug("%s(%s (%d), ...), length %d\n",
+ dccp_feat_typename(type),
+ dccp_feat_name(feat), feat, len);
return 0;
}
@@ -560,11 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
return -1;
dp->dccps_hc_rx_insert_options = 0;
}
- if (dp->dccps_hc_tx_insert_options) {
- if (ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb))
- return -1;
- dp->dccps_hc_tx_insert_options = 0;
- }
/* Feature negotiation */
/* Data packets can't do feat negotiation */
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 7102e3aed4c..82456965908 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -1,6 +1,6 @@
/*
* net/dccp/output.c
- *
+ *
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
@@ -88,16 +88,15 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
return -EPROTO;
}
- skb->h.raw = skb_push(skb, dccp_header_size);
- dh = dccp_hdr(skb);
/* Build DCCP header and checksum it. */
- memset(dh, 0, dccp_header_size);
+ dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_type = dcb->dccpd_type;
dh->dccph_sport = inet->sport;
dh->dccph_dport = inet->dport;
dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
dh->dccph_ccval = dcb->dccpd_ccval;
+ dh->dccph_cscov = dp->dccps_pcslen;
/* XXX For now we're using only 48 bits sequence numbers */
dh->dccph_x = 1;
@@ -117,7 +116,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
break;
}
- icsk->icsk_af_ops->send_check(sk, skb->len, skb);
+ icsk->icsk_af_ops->send_check(sk, 0, skb);
if (set_ack)
dccp_event_ack_sent(sk);
@@ -125,17 +124,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = icsk->icsk_af_ops->queue_xmit(skb, 0);
- if (err <= 0)
- return err;
-
- /* NET_XMIT_CN is special. It does not guarantee,
- * that this packet is lost. It tells that device
- * is about to start to drop packets or already
- * drops some packets of the same priority and
- * invokes us to send less aggressively.
- */
- return err == NET_XMIT_CN ? 0 : err;
+ err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0);
+ return net_xmit_eval(err);
}
return -ENOBUFS;
}
@@ -185,14 +175,12 @@ void dccp_write_space(struct sock *sk)
/**
* dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
* @sk: socket to wait for
- * @timeo: for how long
*/
-static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
- long *timeo)
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
DEFINE_WAIT(wait);
- long delay;
+ unsigned long delay;
int rc;
while (1) {
@@ -200,22 +188,16 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
if (sk->sk_err)
goto do_error;
- if (!*timeo)
- goto do_nonblock;
if (signal_pending(current))
goto do_interrupted;
- rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
- skb->len);
+ rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (rc <= 0)
break;
delay = msecs_to_jiffies(rc);
- if (delay > *timeo || delay < 0)
- goto do_nonblock;
-
sk->sk_write_pending++;
release_sock(sk);
- *timeo -= schedule_timeout(delay);
+ schedule_timeout(delay);
lock_sock(sk);
sk->sk_write_pending--;
}
@@ -226,11 +208,8 @@ out:
do_error:
rc = -EPIPE;
goto out;
-do_nonblock:
- rc = -EAGAIN;
- goto out;
do_interrupted:
- rc = sock_intr_errno(*timeo);
+ rc = -EINTR;
goto out;
}
@@ -251,12 +230,9 @@ void dccp_write_xmit(struct sock *sk, int block)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
- long timeo = 30000; /* If a packet is taking longer than 2 secs
- we have other issues */
while ((skb = skb_peek(&sk->sk_write_queue))) {
- int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
- skb->len);
+ int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (err > 0) {
if (!block) {
@@ -264,12 +240,9 @@ void dccp_write_xmit(struct sock *sk, int block)
msecs_to_jiffies(err)+jiffies);
break;
} else
- err = dccp_wait_for_ccid(sk, skb, &timeo);
- if (err) {
- printk(KERN_CRIT "%s:err at dccp_wait_for_ccid"
- " %d\n", __FUNCTION__, err);
- dump_stack();
- }
+ err = dccp_wait_for_ccid(sk, skb);
+ if (err && err != -EINTR)
+ DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
}
skb_dequeue(&sk->sk_write_queue);
@@ -291,14 +264,13 @@ void dccp_write_xmit(struct sock *sk, int block)
err = dccp_transmit_skb(sk, skb);
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
- if (err) {
- printk(KERN_CRIT "%s:err from "
- "ccid_hc_tx_packet_sent %d\n",
- __FUNCTION__, err);
- dump_stack();
- }
- } else
+ if (err)
+ DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
+ err);
+ } else {
+ dccp_pr_debug("packet discarded\n");
kfree(skb);
+ }
}
}
@@ -329,9 +301,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
- skb->csum = 0;
dreq = dccp_rsk(req);
+ if (inet_rsk(req)->acked) /* increase ISS upon retransmission */
+ dccp_inc_seqno(&dreq->dreq_iss);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
@@ -340,10 +313,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
return NULL;
}
- skb->h.raw = skb_push(skb, dccp_header_size);
-
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_header_size);
+ /* Build and checksum header */
+ dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_rsk(req)->rmt_port;
@@ -355,6 +326,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
+ dccp_csum_outgoing(skb);
+
+ /* We use `acked' to remember that a Response was already sent. */
+ inet_rsk(req)->acked = 1;
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
@@ -363,7 +338,6 @@ EXPORT_SYMBOL_GPL(dccp_make_response);
static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
const enum dccp_reset_codes code)
-
{
struct dccp_hdr *dh;
struct dccp_sock *dp = dccp_sk(sk);
@@ -379,7 +353,6 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
- skb->csum = 0;
dccp_inc_seqno(&dp->dccps_gss);
@@ -392,10 +365,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
return NULL;
}
- skb->h.raw = skb_push(skb, dccp_header_size);
-
- dh = dccp_hdr(skb);
- memset(dh, 0, dccp_header_size);
+ dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_sk(sk)->dport;
@@ -407,7 +377,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
dccp_hdr_reset(skb)->dccph_reset_code = code;
- inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb);
+ inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
@@ -426,9 +396,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
code);
if (skb != NULL) {
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0);
- if (err == NET_XMIT_CN)
- err = 0;
+ err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0);
+ return net_xmit_eval(err);
}
}
@@ -449,17 +418,21 @@ static inline void dccp_connect_init(struct sock *sk)
dccp_sync_mss(sk, dst_mtu(dst));
- dccp_update_gss(sk, dp->dccps_iss);
- /*
+ /*
* SWL and AWL are initially adjusted so that they are not less than
* the initial Sequence Numbers received and sent, respectively:
* SWL := max(GSR + 1 - floor(W/4), ISR),
* AWL := max(GSS - W' + 1, ISS).
* These adjustments MUST be applied only at the beginning of the
* connection.
- */
+ */
+ dccp_update_gss(sk, dp->dccps_iss);
dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
+ /* S.GAR - greatest valid acknowledgement number received on a non-Sync;
+ * initialized to S.ISS (sec. 8.5) */
+ dp->dccps_gar = dp->dccps_iss;
+
icsk->icsk_retransmits = 0;
init_timer(&dp->dccps_xmit_timer);
dp->dccps_xmit_timer.data = (unsigned long)sk;
@@ -481,7 +454,6 @@ int dccp_connect(struct sock *sk)
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
- skb->csum = 0;
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
@@ -513,7 +485,6 @@ void dccp_send_ack(struct sock *sk)
/* Reserve space for headers */
skb_reserve(skb, sk->sk_prot->max_header);
- skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
dccp_transmit_skb(sk, skb);
}
@@ -567,7 +538,6 @@ void dccp_send_sync(struct sock *sk, const u64 seq,
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
- skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
DCCP_SKB_CB(skb)->dccpd_seq = seq;
@@ -593,7 +563,6 @@ void dccp_send_close(struct sock *sk, const int active)
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
- skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 146496fce2e..f81e37de35d 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -106,8 +106,10 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
static struct jprobe dccp_send_probe = {
- .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, },
- .entry = (kprobe_opcode_t *)&jdccp_sendmsg,
+ .kp = {
+ .symbol_name = "dccp_sendmsg",
+ },
+ .entry = JPROBE_ENTRY(jdccp_sendmsg),
};
static int dccpprobe_open(struct inode *inode, struct file *file)
@@ -160,6 +162,8 @@ static __init int dccpprobe_init(void)
init_waitqueue_head(&dccpw.wait);
spin_lock_init(&dccpw.lock);
dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
+ if (IS_ERR(dccpw.fifo))
+ return PTR_ERR(dccpw.fifo);
if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
goto err0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 72cbdcfc2c6..63b3fa20e14 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -52,6 +52,9 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
EXPORT_SYMBOL_GPL(dccp_hashinfo);
+/* the maximum queue length for tx in packets. 0 is no limit */
+int sysctl_dccp_tx_qlen __read_mostly = 5;
+
void dccp_set_state(struct sock *sk, const int state)
{
const int oldstate = sk->sk_state;
@@ -193,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
sk, GFP_KERNEL);
dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
sk, GFP_KERNEL);
- if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
+ if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
dp->dccps_hc_tx_ccid == NULL)) {
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
@@ -212,6 +215,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dccp_init_xmit_timers(sk);
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
+ icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
sk->sk_write_space = dccp_write_space;
icsk->icsk_sync_mss = dccp_sync_mss;
@@ -262,12 +266,12 @@ int dccp_destroy_sock(struct sock *sk)
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
-static inline int dccp_listen_start(struct sock *sk)
+static inline int dccp_listen_start(struct sock *sk, int backlog)
{
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
- return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+ return inet_csk_listen_start(sk, backlog);
}
int dccp_disconnect(struct sock *sk, int flags)
@@ -386,7 +390,7 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
- if (service == DCCP_SERVICE_INVALID_VALUE ||
+ if (service == DCCP_SERVICE_INVALID_VALUE ||
optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
return -EINVAL;
@@ -451,9 +455,8 @@ out_free_val:
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
- struct dccp_sock *dp;
- int err;
- int val;
+ struct dccp_sock *dp = dccp_sk(sk);
+ int val, err = 0;
if (optlen < sizeof(int))
return -EINVAL;
@@ -465,14 +468,11 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
return dccp_setsockopt_service(sk, val, optval, optlen);
lock_sock(sk);
- dp = dccp_sk(sk);
- err = 0;
-
switch (optname) {
case DCCP_SOCKOPT_PACKET_SIZE:
- dp->dccps_packet_size = val;
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ err = 0;
break;
-
case DCCP_SOCKOPT_CHANGE_L:
if (optlen != sizeof(struct dccp_so_feat))
err = -EINVAL;
@@ -481,7 +481,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
(struct dccp_so_feat __user *)
optval);
break;
-
case DCCP_SOCKOPT_CHANGE_R:
if (optlen != sizeof(struct dccp_so_feat))
err = -EINVAL;
@@ -490,12 +489,26 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
(struct dccp_so_feat __user *)
optval);
break;
-
+ case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
+ if (val < 0 || val > 15)
+ err = -EINVAL;
+ else
+ dp->dccps_pcslen = val;
+ break;
+ case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
+ if (val < 0 || val > 15)
+ err = -EINVAL;
+ else {
+ dp->dccps_pcrlen = val;
+ /* FIXME: add feature negotiation,
+ * ChangeL(MinimumChecksumCoverage, val) */
+ }
+ break;
default:
err = -ENOPROTOOPT;
break;
}
-
+
release_sock(sk);
return err;
}
@@ -569,12 +582,17 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case DCCP_SOCKOPT_PACKET_SIZE:
- val = dp->dccps_packet_size;
- len = sizeof(dp->dccps_packet_size);
- break;
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ return 0;
case DCCP_SOCKOPT_SERVICE:
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
+ case DCCP_SOCKOPT_SEND_CSCOV:
+ val = dp->dccps_pcslen;
+ break;
+ case DCCP_SOCKOPT_RECV_CSCOV:
+ val = dp->dccps_pcrlen;
+ break;
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
len, (u32 __user *)optval, optlen);
@@ -630,6 +648,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
return -EMSGSIZE;
lock_sock(sk);
+
+ if (sysctl_dccp_tx_qlen &&
+ (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+ rc = -EAGAIN;
+ goto out_release;
+ }
+
timeo = sock_sndtimeo(sk, noblock);
/*
@@ -788,7 +813,7 @@ int inet_dccp_listen(struct socket *sock, int backlog)
* FIXME: here it probably should be sk->sk_prot->listen_start
* see tcp_listen_start
*/
- err = dccp_listen_start(sk);
+ err = dccp_listen_start(sk, backlog);
if (err)
goto out;
}
@@ -805,7 +830,7 @@ EXPORT_SYMBOL_GPL(inet_dccp_listen);
static const unsigned char dccp_new_state[] = {
/* current state: new state: action: */
[0] = DCCP_CLOSED,
- [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
+ [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_REQUESTING] = DCCP_CLOSED,
[DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
[DCCP_LISTEN] = DCCP_CLOSED,
@@ -1008,8 +1033,7 @@ static int __init dccp_init(void)
} while (!dccp_hashinfo.ehash && --ehash_order > 0);
if (!dccp_hashinfo.ehash) {
- printk(KERN_CRIT "Failed to allocate DCCP "
- "established hash table\n");
+ DCCP_CRIT("Failed to allocate DCCP established hash table");
goto out_free_bind_bucket_cachep;
}
@@ -1031,7 +1055,7 @@ static int __init dccp_init(void)
} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
if (!dccp_hashinfo.bhash) {
- printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
+ DCCP_CRIT("Failed to allocate DCCP bind hash table");
goto out_free_dccp_ehash;
}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 38bc157876f..fdcfca3e920 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
+#include "dccp.h"
#include "feat.h"
#ifndef CONFIG_SYSCTL
@@ -19,53 +20,76 @@
static struct ctl_table dccp_default_table[] = {
{
- .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW,
.procname = "seq_window",
- .data = &dccp_feat_default_sequence_window,
- .maxlen = sizeof(dccp_feat_default_sequence_window),
+ .data = &sysctl_dccp_feat_sequence_window,
+ .maxlen = sizeof(sysctl_dccp_feat_sequence_window),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_RX_CCID,
.procname = "rx_ccid",
- .data = &dccp_feat_default_rx_ccid,
- .maxlen = sizeof(dccp_feat_default_rx_ccid),
+ .data = &sysctl_dccp_feat_rx_ccid,
+ .maxlen = sizeof(sysctl_dccp_feat_rx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_TX_CCID,
.procname = "tx_ccid",
- .data = &dccp_feat_default_tx_ccid,
- .maxlen = sizeof(dccp_feat_default_tx_ccid),
+ .data = &sysctl_dccp_feat_tx_ccid,
+ .maxlen = sizeof(sysctl_dccp_feat_tx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_ACK_RATIO,
.procname = "ack_ratio",
- .data = &dccp_feat_default_ack_ratio,
- .maxlen = sizeof(dccp_feat_default_ack_ratio),
+ .data = &sysctl_dccp_feat_ack_ratio,
+ .maxlen = sizeof(sysctl_dccp_feat_ack_ratio),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_SEND_ACKVEC,
.procname = "send_ackvec",
- .data = &dccp_feat_default_send_ack_vector,
- .maxlen = sizeof(dccp_feat_default_send_ack_vector),
+ .data = &sysctl_dccp_feat_send_ack_vector,
+ .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_DCCP_DEFAULT_SEND_NDP,
.procname = "send_ndp",
- .data = &dccp_feat_default_send_ndp_count,
- .maxlen = sizeof(dccp_feat_default_send_ndp_count),
+ .data = &sysctl_dccp_feat_send_ndp_count,
+ .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count),
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "request_retries",
+ .data = &sysctl_dccp_request_retries,
+ .maxlen = sizeof(sysctl_dccp_request_retries),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "retries1",
+ .data = &sysctl_dccp_retries1,
+ .maxlen = sizeof(sysctl_dccp_retries1),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "retries2",
+ .data = &sysctl_dccp_retries2,
+ .maxlen = sizeof(sysctl_dccp_retries2),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tx_qlen",
+ .data = &sysctl_dccp_tx_qlen,
+ .maxlen = sizeof(sysctl_dccp_tx_qlen),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+
{ .ctl_name = 0, }
};
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 8447742f561..e5348f369c6 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -1,6 +1,6 @@
/*
* net/dccp/timer.c
- *
+ *
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
@@ -15,15 +15,10 @@
#include "dccp.h"
-static void dccp_write_timer(unsigned long data);
-static void dccp_keepalive_timer(unsigned long data);
-static void dccp_delack_timer(unsigned long data);
-
-void dccp_init_xmit_timers(struct sock *sk)
-{
- inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
- &dccp_keepalive_timer);
-}
+/* sysctl variables governing numbers of retransmission attempts */
+int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_dccp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_dccp_retries2 __read_mostly = TCP_RETR2;
static void dccp_write_err(struct sock *sk)
{
@@ -44,11 +39,10 @@ static int dccp_write_timeout(struct sock *sk)
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
dst_negative_advice(&sk->sk_dst_cache);
- retry_until = icsk->icsk_syn_retries ? :
- /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
+ retry_until = icsk->icsk_syn_retries ?
+ : sysctl_dccp_request_retries;
} else {
- if (icsk->icsk_retransmits >=
- /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
+ if (icsk->icsk_retransmits >= sysctl_dccp_retries1) {
/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
black hole detection. :-(
@@ -72,7 +66,7 @@ static int dccp_write_timeout(struct sock *sk)
dst_negative_advice(&sk->sk_dst_cache);
}
- retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
+ retry_until = sysctl_dccp_retries2;
/*
* FIXME: see tcp_write_timout and tcp_out_of_resources
*/
@@ -86,53 +80,6 @@ static int dccp_write_timeout(struct sock *sk)
return 0;
}
-/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
-{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later. */
- icsk->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
- sk_reset_timer(sk, &icsk->icsk_delack_timer,
- jiffies + TCP_DELACK_MIN);
- goto out;
- }
-
- if (sk->sk_state == DCCP_CLOSED ||
- !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
- goto out;
- if (time_after(icsk->icsk_ack.timeout, jiffies)) {
- sk_reset_timer(sk, &icsk->icsk_delack_timer,
- icsk->icsk_ack.timeout);
- goto out;
- }
-
- icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
-
- if (inet_csk_ack_scheduled(sk)) {
- if (!icsk->icsk_ack.pingpong) {
- /* Delayed ACK missed: inflate ATO. */
- icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
- icsk->icsk_rto);
- } else {
- /* Delayed ACK missed: leave pingpong mode and
- * deflate ATO.
- */
- icsk->icsk_ack.pingpong = 0;
- icsk->icsk_ack.ato = TCP_ATO_MIN;
- }
- dccp_send_ack(sk);
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
- }
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
/*
* The DCCP retransmit timer.
*/
@@ -142,7 +89,7 @@ static void dccp_retransmit_timer(struct sock *sk)
/* retransmit timer is used for feature negotiation throughout
* connection. In this case, no packet is re-transmitted, but rather an
- * ack is generated and pending changes are splaced into its options.
+ * ack is generated and pending changes are placed into its options.
*/
if (sk->sk_send_head == NULL) {
dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
@@ -154,12 +101,14 @@ static void dccp_retransmit_timer(struct sock *sk)
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
- * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
- * (PARTOPEN timer), etc).
- */
+ * packet types. The only packets eligible for retransmission are:
+ * -- Requests in client-REQUEST state (sec. 8.1.1)
+ * -- Acks in client-PARTOPEN state (sec. 8.1.5)
+ * -- CloseReq in server-CLOSEREQ state (sec. 8.3)
+ * -- Close in node-CLOSING state (sec. 8.3) */
BUG_TRAP(sk->sk_send_head != NULL);
- /*
+ /*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
@@ -194,7 +143,7 @@ backoff:
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
DCCP_RTO_MAX);
- if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
+ if (icsk->icsk_retransmits > sysctl_dccp_retries1)
__sk_dst_reset(sk);
out:;
}
@@ -251,7 +200,7 @@ static void dccp_keepalive_timer(unsigned long data)
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
- /* Try again later. */
+ /* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
@@ -264,3 +213,56 @@ out:
bh_unlock_sock(sk);
sock_put(sk);
}
+
+/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
+static void dccp_delack_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ icsk->icsk_ack.blocked = 1;
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+ sk_reset_timer(sk, &icsk->icsk_delack_timer,
+ jiffies + TCP_DELACK_MIN);
+ goto out;
+ }
+
+ if (sk->sk_state == DCCP_CLOSED ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ goto out;
+ if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+ sk_reset_timer(sk, &icsk->icsk_delack_timer,
+ icsk->icsk_ack.timeout);
+ goto out;
+ }
+
+ icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
+
+ if (inet_csk_ack_scheduled(sk)) {
+ if (!icsk->icsk_ack.pingpong) {
+ /* Delayed ACK missed: inflate ATO. */
+ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
+ icsk->icsk_rto);
+ } else {
+ /* Delayed ACK missed: leave pingpong mode and
+ * deflate ATO.
+ */
+ icsk->icsk_ack.pingpong = 0;
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
+ dccp_send_ack(sk);
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+ }
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+void dccp_init_xmit_timers(struct sock *sk)
+{
+ inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
+ &dccp_keepalive_timer);
+}
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 36e72cb145b..7914fd619c5 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -41,11 +41,3 @@ config DECNET_ROUTER
See <file:Documentation/networking/decnet.txt> for more information.
-config DECNET_ROUTE_FWMARK
- bool "DECnet: use FWMARK value as routing key (EXPERIMENTAL)"
- depends on DECNET_ROUTER && NETFILTER
- help
- If you say Y here, you will be able to specify different routes for
- packets with different FWMARK ("firewalling mark") values
- (see ipchains(8), "-m" argument).
-
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 3456cd33183..21f20f21dd3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -166,7 +166,7 @@ static struct hlist_head *dn_find_list(struct sock *sk)
if (scp->addr.sdn_flags & SDF_WILD)
return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
- return &dn_sk_hash[scp->addrloc & DN_SK_HASH_MASK];
+ return &dn_sk_hash[dn_ntohs(scp->addrloc) & DN_SK_HASH_MASK];
}
/*
@@ -180,7 +180,7 @@ static int check_port(__le16 port)
if (port == 0)
return -1;
- sk_for_each(sk, node, &dn_sk_hash[port & DN_SK_HASH_MASK]) {
+ sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(port) & DN_SK_HASH_MASK]) {
struct dn_scp *scp = DN_SK(sk);
if (scp->addrloc == port)
return -1;
@@ -194,12 +194,12 @@ static unsigned short port_alloc(struct sock *sk)
static unsigned short port = 0x2000;
unsigned short i_port = port;
- while(check_port(++port) != 0) {
+ while(check_port(dn_htons(++port)) != 0) {
if (port == i_port)
return 0;
}
- scp->addrloc = port;
+ scp->addrloc = dn_htons(port);
return 1;
}
@@ -418,7 +418,7 @@ struct sock *dn_find_by_skb(struct sk_buff *skb)
struct dn_scp *scp;
read_lock(&dn_hash_lock);
- sk_for_each(sk, node, &dn_sk_hash[cb->dst_port & DN_SK_HASH_MASK]) {
+ sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(cb->dst_port) & DN_SK_HASH_MASK]) {
scp = DN_SK(sk);
if (cb->src != dn_saddr2dn(&scp->peer))
continue;
@@ -1016,13 +1016,14 @@ static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc)
static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
{
- unsigned char *ptr = skb->data;
-
- opt->opt_optl = *ptr++;
- opt->opt_status = 0;
- memcpy(opt->opt_data, ptr, opt->opt_optl);
- skb_pull(skb, dn_ntohs(opt->opt_optl) + 1);
-
+ unsigned char *ptr = skb->data;
+ u16 len = *ptr++; /* yes, it's 8bit on the wire */
+
+ BUG_ON(len > 16); /* we've checked the contents earlier */
+ opt->opt_optl = dn_htons(len);
+ opt->opt_status = 0;
+ memcpy(opt->opt_data, ptr, len);
+ skb_pull(skb, len + 1);
}
static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 01861feb608..fc6f3c023a5 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -38,7 +38,6 @@
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/sysctl.h>
#include <linux/notifier.h>
#include <asm/uaccess.h>
@@ -47,6 +46,7 @@
#include <net/dst.h>
#include <net/flow.h>
#include <net/fib_rules.h>
+#include <net/netlink.h>
#include <net/dn.h>
#include <net/dn_dev.h>
#include <net/dn_route.h>
@@ -73,7 +73,7 @@ static BLOCKING_NOTIFIER_HEAD(dnaddr_chain);
static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
static void dn_dev_delete(struct net_device *dev);
-static void rtmsg_ifa(int event, struct dn_ifaddr *ifa);
+static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa);
static int dn_eth_up(struct net_device *);
static void dn_eth_down(struct net_device *);
@@ -167,8 +167,7 @@ static int dn_forwarding_proc(ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context);
+ void __user *newval, size_t newlen);
static struct dn_dev_sysctl_table {
struct ctl_table_header *sysctl_header;
@@ -255,12 +254,10 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
struct dn_dev_sysctl_table *t;
int i;
- t = kmalloc(sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
if (t == NULL)
return;
- memcpy(t, &dn_dev_sysctl, sizeof(*t));
-
for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
long offset = (long)t->dn_dev_vars[i].data;
t->dn_dev_vars[i].data = ((char *)parms) + offset;
@@ -349,8 +346,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
#ifdef CONFIG_DECNET_ROUTER
struct net_device *dev = table->extra1;
@@ -442,7 +438,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
}
}
- rtmsg_ifa(RTM_DELADDR, ifa1);
+ dn_ifaddr_notify(RTM_DELADDR, ifa1);
blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
if (destroy) {
dn_dev_free_ifa(ifa1);
@@ -477,7 +473,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
ifa->ifa_next = dn_db->ifa_list;
dn_db->ifa_list = ifa;
- rtmsg_ifa(RTM_NEWADDR, ifa);
+ dn_ifaddr_notify(RTM_NEWADDR, ifa);
blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
return 0;
@@ -647,41 +643,62 @@ static struct dn_dev *dn_dev_by_index(int ifindex)
return dn_dev;
}
-static int dn_dev_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct nla_policy dn_ifa_policy[IFA_MAX+1] __read_mostly = {
+ [IFA_ADDRESS] = { .type = NLA_U16 },
+ [IFA_LOCAL] = { .type = NLA_U16 },
+ [IFA_LABEL] = { .type = NLA_STRING,
+ .len = IFNAMSIZ - 1 },
+};
+
+static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
+ struct nlattr *tb[IFA_MAX+1];
struct dn_dev *dn_db;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct ifaddrmsg *ifm;
struct dn_ifaddr *ifa, **ifap;
+ int err = -EADDRNOTAVAIL;
+
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ if (err < 0)
+ goto errout;
+ ifm = nlmsg_data(nlh);
if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
- return -EADDRNOTAVAIL;
+ goto errout;
+
+ for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) {
+ if (tb[IFA_LOCAL] &&
+ nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
+ continue;
- for(ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) {
- void *tmp = rta[IFA_LOCAL-1];
- if ((tmp && memcmp(RTA_DATA(tmp), &ifa->ifa_local, 2)) ||
- (rta[IFA_LABEL-1] && rtattr_strcmp(rta[IFA_LABEL-1], ifa->ifa_label)))
+ if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
continue;
dn_dev_del_ifa(dn_db, ifap, 1);
return 0;
}
- return -EADDRNOTAVAIL;
+errout:
+ return err;
}
-static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct rtattr **rta = arg;
+ struct nlattr *tb[IFA_MAX+1];
struct net_device *dev;
struct dn_dev *dn_db;
- struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct ifaddrmsg *ifm;
struct dn_ifaddr *ifa;
- int rv;
+ int err;
- if (rta[IFA_LOCAL-1] == NULL)
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[IFA_LOCAL] == NULL)
return -EINVAL;
+ ifm = nlmsg_data(nlh);
if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
return -ENODEV;
@@ -695,69 +712,77 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a
if ((ifa = dn_dev_alloc_ifa()) == NULL)
return -ENOBUFS;
- if (!rta[IFA_ADDRESS - 1])
- rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
- memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL-1]), 2);
- memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS-1]), 2);
+ if (tb[IFA_ADDRESS] == NULL)
+ tb[IFA_ADDRESS] = tb[IFA_LOCAL];
+
+ ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]);
+ ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]);
ifa->ifa_flags = ifm->ifa_flags;
ifa->ifa_scope = ifm->ifa_scope;
ifa->ifa_dev = dn_db;
- if (rta[IFA_LABEL-1])
- rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL-1], IFNAMSIZ);
+
+ if (tb[IFA_LABEL])
+ nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
- rv = dn_dev_insert_ifa(dn_db, ifa);
- if (rv)
+ err = dn_dev_insert_ifa(dn_db, ifa);
+ if (err)
dn_dev_free_ifa(ifa);
- return rv;
+
+ return err;
}
-static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
- u32 pid, u32 seq, int event, unsigned int flags)
+static inline size_t dn_ifaddr_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ + nla_total_size(2) /* IFA_ADDRESS */
+ + nla_total_size(2); /* IFA_LOCAL */
+}
+
+static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
+ u32 pid, u32 seq, int event, unsigned int flags)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
- ifm = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+ ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_DECnet;
ifm->ifa_prefixlen = 16;
ifm->ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT;
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+
if (ifa->ifa_address)
- RTA_PUT(skb, IFA_ADDRESS, 2, &ifa->ifa_address);
+ NLA_PUT_LE16(skb, IFA_ADDRESS, ifa->ifa_address);
if (ifa->ifa_local)
- RTA_PUT(skb, IFA_LOCAL, 2, &ifa->ifa_local);
+ NLA_PUT_LE16(skb, IFA_LOCAL, ifa->ifa_local);
if (ifa->ifa_label[0])
- RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
+
+ return nlmsg_end(skb, nlh);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
-static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
+static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
{
struct sk_buff *skb;
- int payload = sizeof(struct ifaddrmsg) + 128;
int err = -ENOBUFS;
- skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL);
+ skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
- err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+ /* failure implies BUG in dn_ifaddr_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
errout:
@@ -765,39 +790,43 @@ errout:
rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
}
-static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
- int idx, dn_idx;
- int s_idx, s_dn_idx;
+ int idx, dn_idx = 0, skip_ndevs, skip_naddr;
struct net_device *dev;
struct dn_dev *dn_db;
struct dn_ifaddr *ifa;
- s_idx = cb->args[0];
- s_dn_idx = dn_idx = cb->args[1];
+ skip_ndevs = cb->args[0];
+ skip_naddr = cb->args[1];
+
read_lock(&dev_base_lock);
- for(dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
- if (idx < s_idx)
+ for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ if (idx < skip_ndevs)
continue;
- if (idx > s_idx)
- s_dn_idx = 0;
+ else if (idx > skip_ndevs) {
+ /* Only skip over addresses for first dev dumped
+ * in this iteration (idx == skip_ndevs) */
+ skip_naddr = 0;
+ }
+
if ((dn_db = dev->dn_ptr) == NULL)
continue;
- for(ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) {
- if (dn_idx < s_dn_idx)
+ for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
+ ifa = ifa->ifa_next, dn_idx++) {
+ if (dn_idx < skip_naddr)
continue;
- if (dn_dev_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDR,
- NLM_F_MULTI) <= 0)
+ if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR,
+ NLM_F_MULTI) < 0)
goto done;
}
}
done:
read_unlock(&dev_base_lock);
+
cb->args[0] = idx;
cb->args[1] = dn_idx;
@@ -1414,9 +1443,9 @@ static struct file_operations dn_dev_seq_fops = {
static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
{
- [RTM_NEWADDR - RTM_BASE] = { .doit = dn_dev_rtm_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = dn_dev_rtm_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_dev_dump_ifaddr, },
+ [RTM_NEWADDR - RTM_BASE] = { .doit = dn_nl_newaddr, },
+ [RTM_DELADDR - RTM_BASE] = { .doit = dn_nl_deladdr, },
+ [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_nl_dump_ifaddr, },
#ifdef CONFIG_DECNET_ROUTER
[RTM_NEWROUTE - RTM_BASE] = { .doit = dn_fib_rtm_newroute, },
[RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, },
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index ff0ebe99137..7322bb36e82 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -591,7 +591,6 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 72ecc6e62ec..39a6cf7fb56 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -360,9 +360,9 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
scp->max_window = decnet_no_fc_max_cwnd;
if (skb->len > 0) {
- unsigned char dlen = *skb->data;
+ u16 dlen = *skb->data;
if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->conndata_in.opt_optl = dn_htons((__u16)dlen);
+ scp->conndata_in.opt_optl = dn_htons(dlen);
memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen);
}
}
@@ -404,9 +404,9 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
memset(scp->discdata_in.opt_data, 0, 16);
if (skb->len > 0) {
- unsigned char dlen = *skb->data;
+ u16 dlen = *skb->data;
if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->discdata_in.opt_optl = dn_htons((__u16)dlen);
+ scp->discdata_in.opt_optl = dn_htons(dlen);
memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen);
}
}
@@ -804,7 +804,7 @@ got_it:
goto free_out;
}
- return sk_receive_skb(sk, skb);
+ return sk_receive_skb(sk, skb, 0);
}
return dn_nsp_no_socket(skb, reason);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index c2e21cd89b3..b342e4e8f5f 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -526,7 +526,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
struct nsp_conn_init_msg *msg;
__u8 len = (__u8)dn_ntohs(scp->conndata_out.opt_optl);
- if ((skb = dn_alloc_skb(sk, 50 + dn_ntohs(scp->conndata_out.opt_optl), gfp)) == NULL)
+ if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
return;
msg = (struct nsp_conn_init_msg *)skb_put(skb, sizeof(*msg));
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 23489f7232d..9881933167b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -269,9 +269,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
{
return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) |
(fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) |
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- (fl1->nl_u.dn_u.fwmark ^ fl2->nl_u.dn_u.fwmark) |
-#endif
+ (fl1->mark ^ fl2->mark) |
(fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) |
(fl1->oif ^ fl2->oif) |
(fl1->iif ^ fl2->iif)) == 0;
@@ -882,10 +880,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
{ .daddr = oldflp->fld_dst,
.saddr = oldflp->fld_src,
.scope = RT_SCOPE_UNIVERSE,
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- .fwmark = oldflp->fld_fwmark
-#endif
} },
+ .mark = oldflp->mark,
.iif = loopback_dev.ifindex,
.oif = oldflp->oif };
struct dn_route *rt = NULL;
@@ -903,7 +899,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
"dn_route_output_slow: dst=%04x src=%04x mark=%d"
" iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst),
dn_ntohs(oldflp->fld_src),
- oldflp->fld_fwmark, loopback_dev.ifindex, oldflp->oif);
+ oldflp->mark, loopback_dev.ifindex, oldflp->oif);
/* If we have an output interface, verify its a DECnet device */
if (oldflp->oif) {
@@ -1108,9 +1104,7 @@ make_route:
rt->fl.fld_dst = oldflp->fld_dst;
rt->fl.oif = oldflp->oif;
rt->fl.iif = 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- rt->fl.fld_fwmark = oldflp->fld_fwmark;
-#endif
+ rt->fl.mark = oldflp->mark;
rt->rt_saddr = fl.fld_src;
rt->rt_daddr = fl.fld_dst;
@@ -1178,9 +1172,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
rt = rcu_dereference(rt->u.rt_next)) {
if ((flp->fld_dst == rt->fl.fld_dst) &&
(flp->fld_src == rt->fl.fld_src) &&
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- (flp->fld_fwmark == rt->fl.fld_fwmark) &&
-#endif
+ (flp->mark == rt->fl.mark) &&
(rt->fl.iif == 0) &&
(rt->fl.oif == flp->oif)) {
rt->u.dst.lastuse = jiffies;
@@ -1235,10 +1227,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
{ .daddr = cb->dst,
.saddr = cb->src,
.scope = RT_SCOPE_UNIVERSE,
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- .fwmark = skb->nfmark
-#endif
} },
+ .mark = skb->mark,
.iif = skb->dev->ifindex };
struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
int err = -EINVAL;
@@ -1385,7 +1375,7 @@ make_route:
rt->fl.fld_dst = cb->dst;
rt->fl.oif = 0;
rt->fl.iif = in_dev->ifindex;
- rt->fl.fld_fwmark = fl.fld_fwmark;
+ rt->fl.mark = fl.mark;
rt->u.dst.flags = DST_HOST;
rt->u.dst.neighbour = neigh;
@@ -1457,9 +1447,7 @@ int dn_route_input(struct sk_buff *skb)
if ((rt->fl.fld_src == cb->src) &&
(rt->fl.fld_dst == cb->dst) &&
(rt->fl.oif == 0) &&
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- (rt->fl.fld_fwmark == skb->nfmark) &&
-#endif
+ (rt->fl.mark == skb->mark) &&
(rt->fl.iif == cb->iif)) {
rt->u.dst.lastuse = jiffies;
dst_hold(&rt->u.dst);
@@ -1481,7 +1469,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
struct rtmsg *r;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
- struct rta_cacheinfo ci;
+ long expires;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
r = NLMSG_DATA(nlh);
@@ -1514,16 +1502,10 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
goto rtattr_failure;
- ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
- ci.rta_used = rt->u.dst.__use;
- ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
- if (rt->u.dst.expires)
- ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies);
- else
- ci.rta_expires = 0;
- ci.rta_error = rt->u.dst.error;
- ci.rta_id = ci.rta_ts = ci.rta_tsage = 0;
- RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+ expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
+ if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires,
+ rt->u.dst.error) < 0)
+ goto rtattr_failure;
if (rt->fl.iif)
RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1604,8 +1586,6 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
if (err == 0)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 3e0c882c90b..e32d0c3d5a9 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -45,10 +45,6 @@ struct dn_fib_rule
__le16 dstmask;
__le16 srcmap;
u8 flags;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- u32 fwmark;
- u32 fwmask;
-#endif
};
static struct dn_fib_rule default_rule = {
@@ -112,30 +108,21 @@ errout:
}
static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
- [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
- [FRA_PRIORITY] = { .type = NLA_U32 },
+ FRA_GENERIC_POLICY,
[FRA_SRC] = { .type = NLA_U16 },
[FRA_DST] = { .type = NLA_U16 },
- [FRA_FWMARK] = { .type = NLA_U32 },
- [FRA_FWMASK] = { .type = NLA_U32 },
- [FRA_TABLE] = { .type = NLA_U32 },
};
static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- u16 daddr = fl->fld_dst;
- u16 saddr = fl->fld_src;
+ __le16 daddr = fl->fld_dst;
+ __le16 saddr = fl->fld_src;
if (((saddr ^ r->src) & r->srcmask) ||
((daddr ^ r->dst) & r->dstmask))
return 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask)
- return 0;
-#endif
-
return 1;
}
@@ -169,20 +156,6 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_DST])
r->dst = nla_get_u16(tb[FRA_DST]);
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- if (tb[FRA_FWMARK]) {
- r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
- if (r->fwmark)
- /* compatibility: if the mark value is non-zero all bits
- * are compared unless a mask is explicitly specified.
- */
- r->fwmask = 0xFFFFFFFF;
- }
-
- if (tb[FRA_FWMASK])
- r->fwmask = nla_get_u32(tb[FRA_FWMASK]);
-#endif
-
r->src_len = frh->src_len;
r->srcmask = dnet_make_mask(r->src_len);
r->dst_len = frh->dst_len;
@@ -203,14 +176,6 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (r->dst_len != frh->dst_len))
return 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
- return 0;
-
- if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK])))
- return 0;
-#endif
-
if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
return 0;
@@ -248,12 +213,6 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->src_len = r->src_len;
frh->tos = 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
- if (r->fwmark)
- NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
- if (r->fwmask || r->fwmark)
- NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask);
-#endif
if (r->dst_len)
NLA_PUT_U16(skb, FRA_DST, r->dst);
if (r->src_len)
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 317904bb589..13b2421991b 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n
static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
static DEFINE_RWLOCK(dn_fib_tables_lock);
-static kmem_cache_t *dn_hash_kmem __read_mostly;
+static struct kmem_cache *dn_hash_kmem __read_mostly;
static int dn_fib_hash_zombies;
static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
@@ -263,6 +263,32 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
return 0;
}
+static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
+{
+ size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(2) /* RTA_DST */
+ + nla_total_size(4); /* RTA_PRIORITY */
+
+ /* space for nested metrics */
+ payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
+
+ if (fi->fib_nhs) {
+ /* Also handles the special case fib_nhs == 1 */
+
+ /* each nexthop is packed in an attribute */
+ size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+
+ /* may contain a gateway attribute */
+ nhsize += nla_total_size(4);
+
+ /* all nexthops are packed in a nested attribute */
+ payload += nla_total_size(fi->fib_nhs * nhsize);
+ }
+
+ return payload;
+}
+
static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
struct dn_fib_info *fi, unsigned int flags)
@@ -335,17 +361,15 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
u32 pid = req ? req->pid : 0;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
f->fn_type, f->fn_scope, &f->fn_key, z,
DN_FIB_INFO(f), 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in dn_fib_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
errout:
@@ -566,7 +590,7 @@ create:
replace:
err = -ENOBUFS;
- new_f = kmem_cache_alloc(dn_hash_kmem, SLAB_KERNEL);
+ new_f = kmem_cache_alloc(dn_hash_kmem, GFP_KERNEL);
if (new_f == NULL)
goto out;
@@ -807,10 +831,11 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create)
printk(KERN_DEBUG "DECnet: BUG! Attempt to create routing table from interrupt\n");
return NULL;
}
- if ((t = kmalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash), GFP_KERNEL)) == NULL)
- return NULL;
- memset(t, 0, sizeof(struct dn_fib_table));
+ t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash),
+ GFP_KERNEL);
+ if (t == NULL)
+ return NULL;
t->n = n;
t->insert = dn_fib_table_insert;
@@ -818,7 +843,6 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create)
t->lookup = dn_fib_table_lookup;
t->flush = dn_fib_table_flush;
t->dump = dn_fib_table_dump;
- memset(t->data, 0, sizeof(struct dn_hash));
hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
return t;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index e246f054f36..a4065eb1341 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str)
static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
size_t len;
__le16 addr;
@@ -220,8 +219,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
size_t len;
struct net_device *dev;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4bd78c8cfb2..2d31bf3f05c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -60,7 +60,6 @@
#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/system.h>
-#include <asm/checksum.h>
__setup("ether=", netdev_boot_setup);
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index f7e84e9d13a..a64be6cdf07 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -32,6 +32,7 @@ config IEEE80211_CRYPT_WEP
depends on IEEE80211
select CRYPTO
select CRYPTO_ARC4
+ select CRYPTO_ECB
select CRC32
---help---
Include software based cipher suites in support of IEEE
@@ -58,6 +59,7 @@ config IEEE80211_CRYPT_TKIP
depends on IEEE80211 && NET_RADIO
select CRYPTO
select CRYPTO_MICHAEL_MIC
+ select CRYPTO_ECB
select CRC32
---help---
Include software based cipher suites in support of IEEE 802.11i
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 4200ec50986..fc1f99a5973 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -16,6 +16,7 @@
#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <linux/mm.h>
#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <asm/string.h>
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 1b2efff11d3..7a95c3d8131 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/skbuff.h>
+#include <linux/mm.h>
#include <asm/string.h>
#include <net/ieee80211.h>
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 13b1e5fff7e..b1c6d1f717d 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -67,7 +67,7 @@ static int ieee80211_networks_allocate(struct ieee80211_device *ieee)
return 0;
ieee->networks =
- kmalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network),
+ kzalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network),
GFP_KERNEL);
if (!ieee->networks) {
printk(KERN_WARNING "%s: Out of memory allocating beacons\n",
@@ -75,9 +75,6 @@ static int ieee80211_networks_allocate(struct ieee80211_device *ieee)
return -ENOMEM;
}
- memset(ieee->networks, 0,
- MAX_NETWORK_COUNT * sizeof(struct ieee80211_network));
-
return 0;
}
@@ -118,6 +115,21 @@ static void ieee80211_networks_initialize(struct ieee80211_device *ieee)
&ieee->network_free_list);
}
+static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if ((new_mtu < 68) || (new_mtu > IEEE80211_DATA_LEN))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static struct net_device_stats *ieee80211_generic_get_stats(
+ struct net_device *dev)
+{
+ struct ieee80211_device *ieee = netdev_priv(dev);
+ return &ieee->stats;
+}
+
struct net_device *alloc_ieee80211(int sizeof_priv)
{
struct ieee80211_device *ieee;
@@ -133,6 +145,11 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
}
ieee = netdev_priv(dev);
dev->hard_start_xmit = ieee80211_xmit;
+ dev->change_mtu = ieee80211_change_mtu;
+
+ /* Drivers are free to override this if the generic implementation
+ * does not meet their needs. */
+ dev->get_stats = ieee80211_generic_get_stats;
ieee->dev = dev;
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 770704183a1..d97e5412e31 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -415,17 +415,16 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
ieee->host_mc_decrypt : ieee->host_decrypt;
if (can_be_decrypted) {
- int idx = 0;
if (skb->len >= hdrlen + 3) {
/* Top two-bits of byte 3 are the key index */
- idx = skb->data[hdrlen + 3] >> 6;
+ keyidx = skb->data[hdrlen + 3] >> 6;
}
- /* ieee->crypt[] is WEP_KEY (4) in length. Given that idx
- * is only allowed 2-bits of storage, no value of idx can
- * be provided via above code that would result in idx
+ /* ieee->crypt[] is WEP_KEY (4) in length. Given that keyidx
+ * is only allowed 2-bits of storage, no value of keyidx can
+ * be provided via above code that would result in keyidx
* being out of range */
- crypt = ieee->crypt[idx];
+ crypt = ieee->crypt[keyidx];
#ifdef NOT_YET
sta = NULL;
@@ -479,6 +478,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
goto rx_exit;
}
#endif
+ /* drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.29) */
+ if (sc == ieee->prev_seq_ctl)
+ goto rx_dropped;
+ else
+ ieee->prev_seq_ctl = sc;
/* Data frame - extract src/dst addresses */
if (skb->len < IEEE80211_3ADDR_LEN)
@@ -655,6 +659,51 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
goto rx_dropped;
}
+ /* If the frame was decrypted in hardware, we may need to strip off
+ * any security data (IV, ICV, etc) that was left behind */
+ if (!can_be_decrypted && (fc & IEEE80211_FCTL_PROTECTED) &&
+ ieee->host_strip_iv_icv) {
+ int trimlen = 0;
+
+ /* Top two-bits of byte 3 are the key index */
+ if (skb->len >= hdrlen + 3)
+ keyidx = skb->data[hdrlen + 3] >> 6;
+
+ /* To strip off any security data which appears before the
+ * payload, we simply increase hdrlen (as the header gets
+ * chopped off immediately below). For the security data which
+ * appears after the payload, we use skb_trim. */
+
+ switch (ieee->sec.encode_alg[keyidx]) {
+ case SEC_ALG_WEP:
+ /* 4 byte IV */
+ hdrlen += 4;
+ /* 4 byte ICV */
+ trimlen = 4;
+ break;
+ case SEC_ALG_TKIP:
+ /* 4 byte IV, 4 byte ExtIV */
+ hdrlen += 8;
+ /* 8 byte MIC, 4 byte ICV */
+ trimlen = 12;
+ break;
+ case SEC_ALG_CCMP:
+ /* 8 byte CCMP header */
+ hdrlen += 8;
+ /* 8 byte MIC */
+ trimlen = 8;
+ break;
+ }
+
+ if (skb->len < trimlen)
+ goto rx_dropped;
+
+ __skb_trim(skb, skb->len - trimlen);
+
+ if (skb->len < hdrlen)
+ goto rx_dropped;
+ }
+
/* skb: hdr + (possible reassembled) full plaintext payload */
payload = skb->data + hdrlen;
@@ -1078,12 +1127,12 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
while (length >= sizeof(*info_element)) {
if (sizeof(*info_element) + info_element->len > length) {
- IEEE80211_ERROR("Info elem: parse failed: "
- "info_element->len + 2 > left : "
- "info_element->len+2=%zd left=%d, id=%d.\n",
- info_element->len +
- sizeof(*info_element),
- length, info_element->id);
+ IEEE80211_DEBUG_MGMT("Info elem: parse failed: "
+ "info_element->len + 2 > left : "
+ "info_element->len+2=%zd left=%d, id=%d.\n",
+ info_element->len +
+ sizeof(*info_element),
+ length, info_element->id);
/* We stop processing but don't return an error here
* because some misbehaviour APs break this rule. ie.
* Orinoco AP1000. */
@@ -1255,12 +1304,11 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
case MFIE_TYPE_IBSS_DFS:
if (network->ibss_dfs)
break;
- network->ibss_dfs =
- kmalloc(info_element->len, GFP_ATOMIC);
+ network->ibss_dfs = kmemdup(info_element->data,
+ info_element->len,
+ GFP_ATOMIC);
if (!network->ibss_dfs)
return 1;
- memcpy(network->ibss_dfs, info_element->data,
- info_element->len);
network->flags |= NETWORK_HAS_IBSS_DFS;
break;
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index ae254497ba3..854fc13cd78 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -390,7 +390,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
* this stack is providing the full 802.11 header, one will
* eventually be affixed to this fragment -- so we must account
* for it when determining the amount of payload space. */
- bytes_per_frag = frag_size - IEEE80211_3ADDR_LEN;
+ bytes_per_frag = frag_size - hdr_len;
if (ieee->config &
(CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
bytes_per_frag -= IEEE80211_FCS_LEN;
@@ -412,7 +412,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
} else {
nr_frags = 1;
bytes_per_frag = bytes_last_frag = bytes;
- frag_size = bytes + IEEE80211_3ADDR_LEN;
+ frag_size = bytes + hdr_len;
}
rts_required = (frag_size > ieee->rts
diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c
index 589f6d2c548..e3f37fdda65 100644
--- a/net/ieee80211/softmac/ieee80211softmac_assoc.c
+++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c
@@ -48,7 +48,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft
dprintk(KERN_INFO PFX "sent association request!\n");
spin_lock_irqsave(&mac->lock, flags);
- mac->associated = 0; /* just to make sure */
+ mac->associnfo.associated = 0; /* just to make sure */
/* Set a timer for timeout */
/* FIXME: make timeout configurable */
@@ -58,28 +58,28 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft
}
void
-ieee80211softmac_assoc_timeout(void *d)
+ieee80211softmac_assoc_timeout(struct work_struct *work)
{
- struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d;
+ struct ieee80211softmac_device *mac =
+ container_of(work, struct ieee80211softmac_device,
+ associnfo.timeout.work);
struct ieee80211softmac_network *n;
- unsigned long flags;
- spin_lock_irqsave(&mac->lock, flags);
+ mutex_lock(&mac->associnfo.mutex);
/* we might race against ieee80211softmac_handle_assoc_response,
* so make sure only one of us does something */
- if (!mac->associnfo.associating) {
- spin_unlock_irqrestore(&mac->lock, flags);
- return;
- }
+ if (!mac->associnfo.associating)
+ goto out;
mac->associnfo.associating = 0;
mac->associnfo.bssvalid = 0;
- mac->associated = 0;
+ mac->associnfo.associated = 0;
n = ieee80211softmac_get_network_by_bssid_locked(mac, mac->associnfo.bssid);
- spin_unlock_irqrestore(&mac->lock, flags);
dprintk(KERN_INFO PFX "assoc request timed out!\n");
ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, n);
+out:
+ mutex_unlock(&mac->associnfo.mutex);
}
void
@@ -93,7 +93,7 @@ ieee80211softmac_disassoc(struct ieee80211softmac_device *mac)
netif_carrier_off(mac->dev);
- mac->associated = 0;
+ mac->associnfo.associated = 0;
mac->associnfo.bssvalid = 0;
mac->associnfo.associating = 0;
ieee80211softmac_init_bss(mac);
@@ -107,7 +107,7 @@ ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reas
{
struct ieee80211softmac_network *found;
- if (mac->associnfo.bssvalid && mac->associated) {
+ if (mac->associnfo.bssvalid && mac->associnfo.associated) {
found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid);
if (found)
ieee80211softmac_send_mgt_frame(mac, found, IEEE80211_STYPE_DISASSOC, reason);
@@ -188,25 +188,28 @@ ieee80211softmac_assoc_notify_auth(struct net_device *dev, int event_type, void
/* This function is called to handle userspace requests (asynchronously) */
void
-ieee80211softmac_assoc_work(void *d)
+ieee80211softmac_assoc_work(struct work_struct *work)
{
- struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d;
+ struct ieee80211softmac_device *mac =
+ container_of(work, struct ieee80211softmac_device,
+ associnfo.work.work);
struct ieee80211softmac_network *found = NULL;
struct ieee80211_network *net = NULL, *best = NULL;
int bssvalid;
unsigned long flags;
+ mutex_lock(&mac->associnfo.mutex);
+
+ if (!mac->associnfo.associating)
+ goto out;
+
/* ieee80211_disassoc might clear this */
bssvalid = mac->associnfo.bssvalid;
/* meh */
- if (mac->associated)
+ if (mac->associnfo.associated)
ieee80211softmac_send_disassoc_req(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT);
- spin_lock_irqsave(&mac->lock, flags);
- mac->associnfo.associating = 1;
- spin_unlock_irqrestore(&mac->lock, flags);
-
/* try to find the requested network in our list, if we found one already */
if (bssvalid || mac->associnfo.bssfixed)
found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid);
@@ -260,10 +263,8 @@ ieee80211softmac_assoc_work(void *d)
if (!found) {
if (mac->associnfo.scan_retry > 0) {
- spin_lock_irqsave(&mac->lock, flags);
mac->associnfo.scan_retry--;
- spin_unlock_irqrestore(&mac->lock, flags);
-
+
/* We know of no such network. Let's scan.
* NB: this also happens if we had no memory to copy the network info...
* Maybe we can hope to have more memory after scanning finishes ;)
@@ -272,19 +273,17 @@ ieee80211softmac_assoc_work(void *d)
ieee80211softmac_notify(mac->dev, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, ieee80211softmac_assoc_notify_scan, NULL);
if (ieee80211softmac_start_scan(mac))
dprintk(KERN_INFO PFX "Associate: failed to initiate scan. Is device up?\n");
- return;
+ goto out;
} else {
- spin_lock_irqsave(&mac->lock, flags);
mac->associnfo.associating = 0;
- mac->associated = 0;
- spin_unlock_irqrestore(&mac->lock, flags);
+ mac->associnfo.associated = 0;
dprintk(KERN_INFO PFX "Unable to find matching network after scan!\n");
/* reset the retry counter for the next user request since we
* break out and don't reschedule ourselves after this point. */
mac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_NET_NOT_FOUND, NULL);
- return;
+ goto out;
}
}
@@ -297,7 +296,7 @@ ieee80211softmac_assoc_work(void *d)
/* copy the ESSID for displaying it */
mac->associnfo.associate_essid.len = found->essid.len;
memcpy(mac->associnfo.associate_essid.data, found->essid.data, IW_ESSID_MAX_SIZE + 1);
-
+
/* we found a network! authenticate (if necessary) and associate to it. */
if (found->authenticating) {
dprintk(KERN_INFO PFX "Already requested authentication, waiting...\n");
@@ -305,7 +304,7 @@ ieee80211softmac_assoc_work(void *d)
mac->associnfo.assoc_wait = 1;
ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL);
}
- return;
+ goto out;
}
if (!found->authenticated && !found->authenticating) {
/* This relies on the fact that _auth_req only queues the work,
@@ -321,11 +320,14 @@ ieee80211softmac_assoc_work(void *d)
mac->associnfo.assoc_wait = 0;
ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, found);
}
- return;
+ goto out;
}
/* finally! now we can start associating */
mac->associnfo.assoc_wait = 0;
ieee80211softmac_assoc(mac, found);
+
+out:
+ mutex_unlock(&mac->associnfo.mutex);
}
/* call this to do whatever is necessary when we're associated */
@@ -341,7 +343,7 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac,
mac->bssinfo.supported_rates = net->supported_rates;
ieee80211softmac_recalc_txrates(mac);
- mac->associated = 1;
+ mac->associnfo.associated = 1;
mac->associnfo.short_preamble_available =
(cap & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0;
@@ -414,14 +416,14 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev,
network->authenticated = 0;
/* we don't want to do this more than once ... */
network->auth_desynced_once = 1;
- schedule_work(&mac->associnfo.work);
+ schedule_delayed_work(&mac->associnfo.work, 0);
break;
}
default:
dprintk(KERN_INFO PFX "associating failed (reason: 0x%x)!\n", status);
mac->associnfo.associating = 0;
mac->associnfo.bssvalid = 0;
- mac->associated = 0;
+ mac->associnfo.associated = 0;
ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, network);
}
@@ -429,6 +431,17 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev,
return 0;
}
+void
+ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mac->lock, flags);
+ mac->associnfo.associating = 1;
+ schedule_delayed_work(&mac->associnfo.work, 0);
+ spin_unlock_irqrestore(&mac->lock, flags);
+}
+
int
ieee80211softmac_handle_disassoc(struct net_device * dev,
struct ieee80211_disassoc *disassoc)
@@ -447,8 +460,7 @@ ieee80211softmac_handle_disassoc(struct net_device * dev,
dprintk(KERN_INFO PFX "got disassoc frame\n");
ieee80211softmac_disassoc(mac);
- /* try to reassociate */
- schedule_work(&mac->associnfo.work);
+ ieee80211softmac_try_reassoc(mac);
return 0;
}
@@ -468,7 +480,7 @@ ieee80211softmac_handle_reassoc_req(struct net_device * dev,
dprintkl(KERN_INFO PFX "reassoc request from unknown network\n");
return 0;
}
- schedule_work(&mac->associnfo.work);
+ schedule_delayed_work(&mac->associnfo.work, 0);
return 0;
}
diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c
index 4cef39e171d..8ed3e59b802 100644
--- a/net/ieee80211/softmac/ieee80211softmac_auth.c
+++ b/net/ieee80211/softmac/ieee80211softmac_auth.c
@@ -26,7 +26,7 @@
#include "ieee80211softmac_priv.h"
-static void ieee80211softmac_auth_queue(void *data);
+static void ieee80211softmac_auth_queue(struct work_struct *work);
/* Queues an auth request to the desired AP */
int
@@ -54,14 +54,14 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac,
auth->mac = mac;
auth->retry = IEEE80211SOFTMAC_AUTH_RETRY_LIMIT;
auth->state = IEEE80211SOFTMAC_AUTH_OPEN_REQUEST;
- INIT_WORK(&auth->work, &ieee80211softmac_auth_queue, (void *)auth);
+ INIT_DELAYED_WORK(&auth->work, ieee80211softmac_auth_queue);
/* Lock (for list) */
spin_lock_irqsave(&mac->lock, flags);
/* add to list */
list_add_tail(&auth->list, &mac->auth_queue);
- schedule_work(&auth->work);
+ schedule_delayed_work(&auth->work, 0);
spin_unlock_irqrestore(&mac->lock, flags);
return 0;
@@ -70,14 +70,15 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac,
/* Sends an auth request to the desired AP and handles timeouts */
static void
-ieee80211softmac_auth_queue(void *data)
+ieee80211softmac_auth_queue(struct work_struct *work)
{
struct ieee80211softmac_device *mac;
struct ieee80211softmac_auth_queue_item *auth;
struct ieee80211softmac_network *net;
unsigned long flags;
- auth = (struct ieee80211softmac_auth_queue_item *)data;
+ auth = container_of(work, struct ieee80211softmac_auth_queue_item,
+ work.work);
net = auth->net;
mac = auth->mac;
@@ -118,9 +119,11 @@ ieee80211softmac_auth_queue(void *data)
/* Sends a response to an auth challenge (for shared key auth). */
static void
-ieee80211softmac_auth_challenge_response(void *_aq)
+ieee80211softmac_auth_challenge_response(struct work_struct *work)
{
- struct ieee80211softmac_auth_queue_item *aq = _aq;
+ struct ieee80211softmac_auth_queue_item *aq =
+ container_of(work, struct ieee80211softmac_auth_queue_item,
+ work.work);
/* Send our response */
ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state);
@@ -158,7 +161,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
/* Make sure that we've got an auth queue item for this request */
if(aq == NULL)
{
- printkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but no queue item exists.\n", MAC_ARG(auth->header.addr2));
+ dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but no queue item exists.\n", MAC_ARG(auth->header.addr2));
/* Error #? */
return -1;
}
@@ -166,7 +169,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
/* Check for out of order authentication */
if(!net->authenticating)
{
- printkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but did not request authentication.\n",MAC_ARG(auth->header.addr2));
+ dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but did not request authentication.\n",MAC_ARG(auth->header.addr2));
return -1;
}
@@ -216,10 +219,16 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
net->challenge_len = *data++;
if (net->challenge_len > WLAN_AUTH_CHALLENGE_LEN)
net->challenge_len = WLAN_AUTH_CHALLENGE_LEN;
- if (net->challenge != NULL)
- kfree(net->challenge);
- net->challenge = kmalloc(net->challenge_len, GFP_ATOMIC);
- memcpy(net->challenge, data, net->challenge_len);
+ kfree(net->challenge);
+ net->challenge = kmemdup(data, net->challenge_len,
+ GFP_ATOMIC);
+ if (net->challenge == NULL) {
+ printkl(KERN_NOTICE PFX "Shared Key "
+ "Authentication failed due to "
+ "memory shortage.\n");
+ spin_unlock_irqrestore(&mac->lock, flags);
+ break;
+ }
aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE;
/* We reuse the work struct from the auth request here.
@@ -228,8 +237,8 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
* we have obviously already sent the initial auth
* request. */
cancel_delayed_work(&aq->work);
- INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq);
- schedule_work(&aq->work);
+ INIT_DELAYED_WORK(&aq->work, &ieee80211softmac_auth_challenge_response);
+ schedule_delayed_work(&aq->work, 0);
spin_unlock_irqrestore(&mac->lock, flags);
return 0;
case IEEE80211SOFTMAC_AUTH_SHARED_PASS:
@@ -328,6 +337,8 @@ ieee80211softmac_deauth_from_net(struct ieee80211softmac_device *mac,
/* can't transmit data right now... */
netif_carrier_off(mac->dev);
spin_unlock_irqrestore(&mac->lock, flags);
+
+ ieee80211softmac_try_reassoc(mac);
}
/*
@@ -342,7 +353,7 @@ ieee80211softmac_deauth_req(struct ieee80211softmac_device *mac,
/* Make sure the network is authenticated */
if (!net->authenticated)
{
- printkl(KERN_DEBUG PFX "Can't send deauthentication packet, network is not authenticated.\n");
+ dprintkl(KERN_DEBUG PFX "Can't send deauthentication packet, network is not authenticated.\n");
/* Error okay? */
return -EPERM;
}
@@ -376,7 +387,7 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de
net = ieee80211softmac_get_network_by_bssid(mac, deauth->header.addr2);
if (net == NULL) {
- printkl(KERN_DEBUG PFX "Received deauthentication packet from "MAC_FMT", but that network is unknown.\n",
+ dprintkl(KERN_DEBUG PFX "Received deauthentication packet from "MAC_FMT", but that network is unknown.\n",
MAC_ARG(deauth->header.addr2));
return 0;
}
@@ -384,7 +395,7 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de
/* Make sure the network is authenticated */
if(!net->authenticated)
{
- printkl(KERN_DEBUG PFX "Can't perform deauthentication, network is not authenticated.\n");
+ dprintkl(KERN_DEBUG PFX "Can't perform deauthentication, network is not authenticated.\n");
/* Error okay? */
return -EPERM;
}
@@ -392,6 +403,6 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de
ieee80211softmac_deauth_from_net(mac, net);
/* let's try to re-associate */
- schedule_work(&mac->associnfo.work);
+ schedule_delayed_work(&mac->associnfo.work, 0);
return 0;
}
diff --git a/net/ieee80211/softmac/ieee80211softmac_event.c b/net/ieee80211/softmac/ieee80211softmac_event.c
index f34fa2ef666..b9015656cfb 100644
--- a/net/ieee80211/softmac/ieee80211softmac_event.c
+++ b/net/ieee80211/softmac/ieee80211softmac_event.c
@@ -73,10 +73,12 @@ static char *event_descriptions[IEEE80211SOFTMAC_EVENT_LAST+1] = {
static void
-ieee80211softmac_notify_callback(void *d)
+ieee80211softmac_notify_callback(struct work_struct *work)
{
- struct ieee80211softmac_event event = *(struct ieee80211softmac_event*) d;
- kfree(d);
+ struct ieee80211softmac_event *pevent =
+ container_of(work, struct ieee80211softmac_event, work.work);
+ struct ieee80211softmac_event event = *pevent;
+ kfree(pevent);
event.fun(event.mac->dev, event.event_type, event.context);
}
@@ -99,7 +101,7 @@ ieee80211softmac_notify_internal(struct ieee80211softmac_device *mac,
return -ENOMEM;
eventptr->event_type = event;
- INIT_WORK(&eventptr->work, ieee80211softmac_notify_callback, eventptr);
+ INIT_DELAYED_WORK(&eventptr->work, ieee80211softmac_notify_callback);
eventptr->fun = fun;
eventptr->context = context;
eventptr->mac = mac;
@@ -170,7 +172,7 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve
/* User may have subscribed to ANY event, so
* we tell them which event triggered it. */
eventptr->event_type = event;
- schedule_work(&eventptr->work);
+ schedule_delayed_work(&eventptr->work, 0);
}
}
}
diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 82bfddbf33a..b96931001b4 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -304,7 +304,7 @@ ieee80211softmac_auth(struct ieee80211_auth **pkt,
2 + /* Auth Transaction Seq */
2 + /* Status Code */
/* Challenge Text IE */
- is_shared_response ? 0 : 1 + 1 + net->challenge_len
+ (is_shared_response ? 1 + 1 + net->challenge_len : 0)
);
if (unlikely((*pkt) == NULL))
return 0;
@@ -475,8 +475,13 @@ int ieee80211softmac_handle_beacon(struct net_device *dev,
{
struct ieee80211softmac_device *mac = ieee80211_priv(dev);
- if (mac->associated && memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0)
- ieee80211softmac_process_erp(mac, network->erp_value);
+ /* This might race, but we don't really care and it's not worth
+ * adding heavyweight locking in this fastpath.
+ */
+ if (mac->associnfo.associated) {
+ if (memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0)
+ ieee80211softmac_process_erp(mac, network->erp_value);
+ }
return 0;
}
diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c
index addea1cf73a..256207b71dc 100644
--- a/net/ieee80211/softmac/ieee80211softmac_module.c
+++ b/net/ieee80211/softmac/ieee80211softmac_module.c
@@ -57,8 +57,9 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv)
INIT_LIST_HEAD(&softmac->network_list);
INIT_LIST_HEAD(&softmac->events);
- INIT_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work, softmac);
- INIT_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout, softmac);
+ mutex_init(&softmac->associnfo.mutex);
+ INIT_DELAYED_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work);
+ INIT_DELAYED_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout);
softmac->start_scan = ieee80211softmac_start_scan_implementation;
softmac->wait_for_scan = ieee80211softmac_wait_for_scan_implementation;
softmac->stop_scan = ieee80211softmac_stop_scan_implementation;
diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h
index 0642e090b8a..4c2bba34d32 100644
--- a/net/ieee80211/softmac/ieee80211softmac_priv.h
+++ b/net/ieee80211/softmac/ieee80211softmac_priv.h
@@ -78,7 +78,7 @@
/* private definitions and prototypes */
/*** prototypes from _scan.c */
-void ieee80211softmac_scan(void *sm);
+void ieee80211softmac_scan(struct work_struct *work);
/* for internal use if scanning is needed */
int ieee80211softmac_start_scan(struct ieee80211softmac_device *mac);
void ieee80211softmac_stop_scan(struct ieee80211softmac_device *mac);
@@ -149,7 +149,7 @@ int ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *au
int ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *deauth);
/*** prototypes from _assoc.c */
-void ieee80211softmac_assoc_work(void *d);
+void ieee80211softmac_assoc_work(struct work_struct *work);
int ieee80211softmac_handle_assoc_response(struct net_device * dev,
struct ieee80211_assoc_response * resp,
struct ieee80211_network * network);
@@ -157,7 +157,7 @@ int ieee80211softmac_handle_disassoc(struct net_device * dev,
struct ieee80211_disassoc * disassoc);
int ieee80211softmac_handle_reassoc_req(struct net_device * dev,
struct ieee80211_reassoc_request * reassoc);
-void ieee80211softmac_assoc_timeout(void *d);
+void ieee80211softmac_assoc_timeout(struct work_struct *work);
void ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reason);
void ieee80211softmac_disassoc(struct ieee80211softmac_device *mac);
@@ -207,7 +207,7 @@ struct ieee80211softmac_auth_queue_item {
struct ieee80211softmac_device *mac; /* SoftMAC device */
u8 retry; /* Retry limit */
u8 state; /* Auth State */
- struct work_struct work; /* Work queue */
+ struct delayed_work work; /* Work queue */
};
/* scanning information */
@@ -219,7 +219,8 @@ struct ieee80211softmac_scaninfo {
stop:1;
u8 skip_flags;
struct completion finished;
- struct work_struct softmac_scan;
+ struct delayed_work softmac_scan;
+ struct ieee80211softmac_device *mac;
};
/* private event struct */
@@ -227,7 +228,7 @@ struct ieee80211softmac_event {
struct list_head list;
int event_type;
void *event_context;
- struct work_struct work;
+ struct delayed_work work;
notify_function_ptr fun;
void *context;
struct ieee80211softmac_device *mac;
@@ -238,4 +239,6 @@ void ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, in
int ieee80211softmac_notify_internal(struct ieee80211softmac_device *mac,
int event, void *event_context, notify_function_ptr fun, void *context, gfp_t gfp_mask);
+void ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac);
+
#endif /* IEEE80211SOFTMAC_PRIV_H_ */
diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c b/net/ieee80211/softmac/ieee80211softmac_scan.c
index d31cf77498c..0c85d6c24cd 100644
--- a/net/ieee80211/softmac/ieee80211softmac_scan.c
+++ b/net/ieee80211/softmac/ieee80211softmac_scan.c
@@ -47,7 +47,6 @@ ieee80211softmac_start_scan(struct ieee80211softmac_device *sm)
sm->scanning = 1;
spin_unlock_irqrestore(&sm->lock, flags);
- netif_tx_disable(sm->ieee->dev);
ret = sm->start_scan(sm->dev);
if (ret) {
spin_lock_irqsave(&sm->lock, flags);
@@ -91,12 +90,14 @@ ieee80211softmac_wait_for_scan(struct ieee80211softmac_device *sm)
/* internal scanning implementation follows */
-void ieee80211softmac_scan(void *d)
+void ieee80211softmac_scan(struct work_struct *work)
{
int invalid_channel;
u8 current_channel_idx;
- struct ieee80211softmac_device *sm = (struct ieee80211softmac_device *)d;
- struct ieee80211softmac_scaninfo *si = sm->scaninfo;
+ struct ieee80211softmac_scaninfo *si =
+ container_of(work, struct ieee80211softmac_scaninfo,
+ softmac_scan.work);
+ struct ieee80211softmac_device *sm = si->mac;
unsigned long flags;
while (!(si->stop) && (si->current_channel_idx < si->number_channels)) {
@@ -135,7 +136,8 @@ void ieee80211softmac_scan(void *d)
si->started = 0;
spin_unlock_irqrestore(&sm->lock, flags);
- dprintk(PFX "Scanning finished\n");
+ dprintk(PFX "Scanning finished: scanned %d channels starting with channel %d\n",
+ sm->scaninfo->number_channels, sm->scaninfo->channels[0].channel);
ieee80211softmac_scan_finished(sm);
complete_all(&sm->scaninfo->finished);
}
@@ -146,7 +148,8 @@ static inline struct ieee80211softmac_scaninfo *allocate_scaninfo(struct ieee802
struct ieee80211softmac_scaninfo *info = kmalloc(sizeof(struct ieee80211softmac_scaninfo), GFP_ATOMIC);
if (unlikely(!info))
return NULL;
- INIT_WORK(&info->softmac_scan, ieee80211softmac_scan, mac);
+ INIT_DELAYED_WORK(&info->softmac_scan, ieee80211softmac_scan);
+ info->mac = mac;
init_completion(&info->finished);
return info;
}
@@ -183,13 +186,11 @@ int ieee80211softmac_start_scan_implementation(struct net_device *dev)
sm->scaninfo->channels = sm->ieee->geo.bg;
sm->scaninfo->number_channels = sm->ieee->geo.bg_channels;
}
- dprintk(PFX "Start scanning with channel: %d\n", sm->scaninfo->channels[0].channel);
- dprintk(PFX "Scanning %d channels\n", sm->scaninfo->number_channels);
sm->scaninfo->current_channel_idx = 0;
sm->scaninfo->started = 1;
sm->scaninfo->stop = 0;
INIT_COMPLETION(sm->scaninfo->finished);
- schedule_work(&sm->scaninfo->softmac_scan);
+ schedule_delayed_work(&sm->scaninfo->softmac_scan, 0);
spin_unlock_irqrestore(&sm->lock, flags);
return 0;
}
@@ -248,7 +249,6 @@ void ieee80211softmac_scan_finished(struct ieee80211softmac_device *sm)
if (net)
sm->set_channel(sm->dev, net->channel);
}
- netif_wake_queue(sm->ieee->dev);
ieee80211softmac_call_events(sm, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, NULL);
}
EXPORT_SYMBOL_GPL(ieee80211softmac_scan_finished);
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index 2aa779d18f3..480d72c7a42 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -73,13 +73,14 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
struct ieee80211softmac_network *n;
struct ieee80211softmac_auth_queue_item *authptr;
int length = 0;
- unsigned long flags;
+
+ mutex_lock(&sm->associnfo.mutex);
/* Check if we're already associating to this or another network
* If it's another network, cancel and start over with our new network
* If it's our network, ignore the change, we're already doing it!
*/
- if((sm->associnfo.associating || sm->associated) &&
+ if((sm->associnfo.associating || sm->associnfo.associated) &&
(data->essid.flags && data->essid.length)) {
/* Get the associating network */
n = ieee80211softmac_get_network_by_bssid(sm, sm->associnfo.bssid);
@@ -87,10 +88,9 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
!memcmp(n->essid.data, extra, n->essid.len)) {
dprintk(KERN_INFO PFX "Already associating or associated to "MAC_FMT"\n",
MAC_ARG(sm->associnfo.bssid));
- return 0;
+ goto out;
} else {
dprintk(KERN_INFO PFX "Canceling existing associate request!\n");
- spin_lock_irqsave(&sm->lock,flags);
/* Cancel assoc work */
cancel_delayed_work(&sm->associnfo.work);
/* We don't have to do this, but it's a little cleaner */
@@ -98,14 +98,13 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
cancel_delayed_work(&authptr->work);
sm->associnfo.bssvalid = 0;
sm->associnfo.bssfixed = 0;
- spin_unlock_irqrestore(&sm->lock,flags);
flush_scheduled_work();
+ sm->associnfo.associating = 0;
+ sm->associnfo.associated = 0;
}
}
- spin_lock_irqsave(&sm->lock, flags);
-
sm->associnfo.static_essid = 0;
sm->associnfo.assoc_wait = 0;
@@ -121,10 +120,12 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
* If applicable, we have already copied the data in */
sm->associnfo.req_essid.len = length;
+ sm->associnfo.associating = 1;
/* queue lower level code to do work (if necessary) */
- schedule_work(&sm->associnfo.work);
+ schedule_delayed_work(&sm->associnfo.work, 0);
+out:
+ mutex_unlock(&sm->associnfo.mutex);
- spin_unlock_irqrestore(&sm->lock, flags);
return 0;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_essid);
@@ -136,10 +137,8 @@ ieee80211softmac_wx_get_essid(struct net_device *net_dev,
char *extra)
{
struct ieee80211softmac_device *sm = ieee80211_priv(net_dev);
- unsigned long flags;
- /* avoid getting inconsistent information */
- spin_lock_irqsave(&sm->lock, flags);
+ mutex_lock(&sm->associnfo.mutex);
/* If all fails, return ANY (empty) */
data->essid.length = 0;
data->essid.flags = 0; /* active */
@@ -152,12 +151,13 @@ ieee80211softmac_wx_get_essid(struct net_device *net_dev,
}
/* If we're associating/associated, return that */
- if (sm->associated || sm->associnfo.associating) {
+ if (sm->associnfo.associated || sm->associnfo.associating) {
data->essid.length = sm->associnfo.associate_essid.len;
data->essid.flags = 1; /* active */
memcpy(extra, sm->associnfo.associate_essid.data, sm->associnfo.associate_essid.len);
}
- spin_unlock_irqrestore(&sm->lock, flags);
+ mutex_unlock(&sm->associnfo.mutex);
+
return 0;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_essid);
@@ -322,15 +322,15 @@ ieee80211softmac_wx_get_wap(struct net_device *net_dev,
{
struct ieee80211softmac_device *mac = ieee80211_priv(net_dev);
int err = 0;
- unsigned long flags;
- spin_lock_irqsave(&mac->lock, flags);
+ mutex_lock(&mac->associnfo.mutex);
if (mac->associnfo.bssvalid)
memcpy(data->ap_addr.sa_data, mac->associnfo.bssid, ETH_ALEN);
else
memset(data->ap_addr.sa_data, 0xff, ETH_ALEN);
data->ap_addr.sa_family = ARPHRD_ETHER;
- spin_unlock_irqrestore(&mac->lock, flags);
+ mutex_unlock(&mac->associnfo.mutex);
+
return err;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_wap);
@@ -342,28 +342,27 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
char *extra)
{
struct ieee80211softmac_device *mac = ieee80211_priv(net_dev);
- unsigned long flags;
/* sanity check */
if (data->ap_addr.sa_family != ARPHRD_ETHER) {
return -EINVAL;
}
- spin_lock_irqsave(&mac->lock, flags);
+ mutex_lock(&mac->associnfo.mutex);
if (is_broadcast_ether_addr(data->ap_addr.sa_data)) {
/* the bssid we have is not to be fixed any longer,
* and we should reassociate to the best AP. */
mac->associnfo.bssfixed = 0;
/* force reassociation */
mac->associnfo.bssvalid = 0;
- if (mac->associated)
- schedule_work(&mac->associnfo.work);
+ if (mac->associnfo.associated)
+ schedule_delayed_work(&mac->associnfo.work, 0);
} else if (is_zero_ether_addr(data->ap_addr.sa_data)) {
/* the bssid we have is no longer fixed */
mac->associnfo.bssfixed = 0;
} else {
if (!memcmp(mac->associnfo.bssid, data->ap_addr.sa_data, ETH_ALEN)) {
- if (mac->associnfo.associating || mac->associated) {
+ if (mac->associnfo.associating || mac->associnfo.associated) {
/* bssid unchanged and associated or associating - just return */
goto out;
}
@@ -374,11 +373,12 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
/* tell the other code that this bssid should be used no matter what */
mac->associnfo.bssfixed = 1;
/* queue associate if new bssid or (old one again and not associated) */
- schedule_work(&mac->associnfo.work);
+ schedule_delayed_work(&mac->associnfo.work, 0);
}
out:
- spin_unlock_irqrestore(&mac->lock, flags);
+ mutex_unlock(&mac->associnfo.mutex);
+
return 0;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_wap);
@@ -394,7 +394,8 @@ ieee80211softmac_wx_set_genie(struct net_device *dev,
int err = 0;
char *buf;
int i;
-
+
+ mutex_lock(&mac->associnfo.mutex);
spin_lock_irqsave(&mac->lock, flags);
/* bleh. shouldn't be locked for that kmalloc... */
@@ -432,6 +433,8 @@ ieee80211softmac_wx_set_genie(struct net_device *dev,
out:
spin_unlock_irqrestore(&mac->lock, flags);
+ mutex_unlock(&mac->associnfo.mutex);
+
return err;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_genie);
@@ -446,7 +449,8 @@ ieee80211softmac_wx_get_genie(struct net_device *dev,
unsigned long flags;
int err = 0;
int space = wrqu->data.length;
-
+
+ mutex_lock(&mac->associnfo.mutex);
spin_lock_irqsave(&mac->lock, flags);
wrqu->data.length = 0;
@@ -459,6 +463,8 @@ ieee80211softmac_wx_get_genie(struct net_device *dev,
err = -E2BIG;
}
spin_unlock_irqrestore(&mac->lock, flags);
+ mutex_lock(&mac->associnfo.mutex);
+
return err;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_genie);
@@ -473,10 +479,13 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev,
struct iw_mlme *mlme = (struct iw_mlme *)extra;
u16 reason = cpu_to_le16(mlme->reason_code);
struct ieee80211softmac_network *net;
+ int err = -EINVAL;
+
+ mutex_lock(&mac->associnfo.mutex);
if (memcmp(mac->associnfo.bssid, mlme->addr.sa_data, ETH_ALEN)) {
printk(KERN_DEBUG PFX "wx_set_mlme: requested operation on net we don't use\n");
- return -EINVAL;
+ goto out;
}
switch (mlme->cmd) {
@@ -484,14 +493,23 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev,
net = ieee80211softmac_get_network_by_bssid_locked(mac, mlme->addr.sa_data);
if (!net) {
printk(KERN_DEBUG PFX "wx_set_mlme: we should know the net here...\n");
- return -EINVAL;
+ goto out;
}
- return ieee80211softmac_deauth_req(mac, net, reason);
+ err = ieee80211softmac_deauth_req(mac, net, reason);
+ goto out;
case IW_MLME_DISASSOC:
ieee80211softmac_send_disassoc_req(mac, reason);
- return 0;
+ mac->associnfo.associated = 0;
+ mac->associnfo.associating = 0;
+ err = 0;
+ goto out;
default:
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
}
+
+out:
+ mutex_unlock(&mac->associnfo.mutex);
+
+ return err;
}
EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_mlme);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 5572071af73..503e7059e31 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -104,13 +104,6 @@ config IP_MULTIPLE_TABLES
If unsure, say N.
-config IP_ROUTE_FWMARK
- bool "IP: use netfilter MARK value as routing key"
- depends on IP_MULTIPLE_TABLES && NETFILTER
- help
- If you say Y here, you will be able to specify different routes for
- packets with different mark values (see iptables(8), MARK target).
-
config IP_ROUTE_MULTIPATH
bool "IP: equal cost multipath"
depends on IP_ADVANCED_ROUTER
@@ -625,5 +618,17 @@ config DEFAULT_TCP_CONG
default "reno" if DEFAULT_RENO
default "cubic"
+config TCP_MD5SIG
+ bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ select CRYPTO
+ select CRYPTO_MD5
+ ---help---
+ RFC2385 specifices a method of giving MD5 protection to TCP sessions.
+ Its main (only?) use is to protect BGP sessions between core routers
+ on the Internet.
+
+ If unsure, say N.
+
source "net/ipv4/ipvs/Kconfig"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15645c51520..7a068626fee 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,7 +8,8 @@ obj-y := route.o inetpeer.o protocol.o \
inet_timewait_sock.o inet_connection_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o \
- datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
+ datagram.o raw.o udp.o udplite.o \
+ arp.o icmp.o devinet.o af_inet.o igmp.o \
sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index edcf0932ac6..1144900d37f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -104,6 +104,7 @@
#include <net/inet_connection_sock.h>
#include <net/tcp.h>
#include <net/udp.h>
+#include <net/udplite.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/raw.h>
@@ -204,7 +205,7 @@ int inet_listen(struct socket *sock, int backlog)
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
- err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+ err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
}
@@ -643,7 +644,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->dport;
sin->sin_addr.s_addr = inet->daddr;
} else {
- __u32 addr = inet->rcv_saddr;
+ __be32 addr = inet->rcv_saddr;
if (!addr)
addr = inet->saddr;
sin->sin_port = inet->sport;
@@ -994,8 +995,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
int err;
struct rtable *rt;
- __u32 old_saddr = inet->saddr;
- __u32 new_saddr;
+ __be32 old_saddr = inet->saddr;
+ __be32 new_saddr;
__be32 daddr = inet->daddr;
if (inet->opt && inet->opt->srr)
@@ -1223,10 +1224,13 @@ static int __init init_ipv4_mibs(void)
tcp_statistics[1] = alloc_percpu(struct tcp_mib);
udp_statistics[0] = alloc_percpu(struct udp_mib);
udp_statistics[1] = alloc_percpu(struct udp_mib);
+ udplite_statistics[0] = alloc_percpu(struct udp_mib);
+ udplite_statistics[1] = alloc_percpu(struct udp_mib);
if (!
(net_statistics[0] && net_statistics[1] && ip_statistics[0]
&& ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
- && udp_statistics[0] && udp_statistics[1]))
+ && udp_statistics[0] && udp_statistics[1]
+ && udplite_statistics[0] && udplite_statistics[1] ) )
return -ENOMEM;
(void) tcp_mib_init();
@@ -1313,6 +1317,8 @@ static int __init inet_init(void)
/* Setup TCP slab cache for open requests. */
tcp_init();
+ /* Add UDP-Lite (RFC 3828) */
+ udplite4_register();
/*
* Set the ICMP layer up
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 99542977e47..67a5509e26f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -14,7 +14,7 @@
* into IP header for icv calculation. Options are already checked
* for validity, so paranoia is not required. */
-static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
+static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr)
{
unsigned char * optptr = (unsigned char*)(iph+1);
int l = iph->ihl*4 - sizeof(struct iphdr);
@@ -162,7 +162,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
iph->frag_off = 0;
iph->check = 0;
if (ihl > sizeof(*iph)) {
- u32 dummy;
+ __be32 dummy;
if (ip_clear_mutable_options(iph, &dummy))
goto out;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cfb5d3de9c8..3981e8be9ab 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -203,7 +203,7 @@ struct neigh_table arp_tbl = {
.gc_thresh3 = 1024,
};
-int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir)
+int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
{
switch (dev->type) {
case ARPHRD_ETHER:
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index e2077a3aa8c..60aafb4a8ad 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -319,6 +319,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
entry->activity += 1;
atomic_inc(&entry->lsm_data->refcount);
secattr->cache = entry->lsm_data;
+ secattr->flags |= NETLBL_SECATTR_CACHE;
if (prev_entry == NULL) {
spin_unlock_bh(&cipso_v4_cache[bkt].lock);
return 0;
@@ -377,12 +378,11 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
return -ENOMEM;
- entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
+ entry->key = kmemdup(cipso_ptr, cipso_ptr_len, GFP_ATOMIC);
if (entry->key == NULL) {
ret_val = -ENOMEM;
goto cache_add_failure;
}
- memcpy(entry->key, cipso_ptr, cipso_ptr_len);
entry->key_len = cipso_ptr_len;
entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
atomic_inc(&secattr->cache->refcount);
@@ -447,8 +447,30 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
*/
int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
{
+ u32 iter;
+
if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
return -EINVAL;
+ for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) {
+ switch (doi_def->tags[iter]) {
+ case CIPSO_V4_TAG_RBITMAP:
+ break;
+ case CIPSO_V4_TAG_RANGE:
+ if (doi_def->type != CIPSO_V4_MAP_PASS)
+ return -EINVAL;
+ break;
+ case CIPSO_V4_TAG_INVALID:
+ if (iter == 0)
+ return -EINVAL;
+ break;
+ case CIPSO_V4_TAG_ENUM:
+ if (doi_def->type != CIPSO_V4_MAP_PASS)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
doi_def->valid = 1;
INIT_RCU_HEAD(&doi_def->rcu);
@@ -805,8 +827,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
/**
* cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
* @doi_def: the DOI definition
- * @host_cat: the category bitmap in host format
- * @host_cat_len: the length of the host's category bitmap in bytes
+ * @secattr: the security attributes
* @net_cat: the zero'd out category bitmap in network/CIPSO format
* @net_cat_len: the length of the CIPSO bitmap in bytes
*
@@ -817,59 +838,51 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
*
*/
static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
- const unsigned char *host_cat,
- u32 host_cat_len,
+ const struct netlbl_lsm_secattr *secattr,
unsigned char *net_cat,
u32 net_cat_len)
{
int host_spot = -1;
- u32 net_spot;
+ u32 net_spot = CIPSO_V4_INV_CAT;
u32 net_spot_max = 0;
- u32 host_clen_bits = host_cat_len * 8;
u32 net_clen_bits = net_cat_len * 8;
- u32 host_cat_size;
- u32 *host_cat_array;
+ u32 host_cat_size = 0;
+ u32 *host_cat_array = NULL;
- switch (doi_def->type) {
- case CIPSO_V4_MAP_PASS:
- net_spot_max = host_cat_len;
- while (net_spot_max > 0 && host_cat[net_spot_max - 1] == 0)
- net_spot_max--;
- if (net_spot_max > net_cat_len)
- return -EINVAL;
- memcpy(net_cat, host_cat, net_spot_max);
- return net_spot_max;
- case CIPSO_V4_MAP_STD:
+ if (doi_def->type == CIPSO_V4_MAP_STD) {
host_cat_size = doi_def->map.std->cat.local_size;
host_cat_array = doi_def->map.std->cat.local;
- for (;;) {
- host_spot = cipso_v4_bitmap_walk(host_cat,
- host_clen_bits,
- host_spot + 1,
- 1);
- if (host_spot < 0)
- break;
+ }
+
+ for (;;) {
+ host_spot = netlbl_secattr_catmap_walk(secattr->mls_cat,
+ host_spot + 1);
+ if (host_spot < 0)
+ break;
+
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ net_spot = host_spot;
+ break;
+ case CIPSO_V4_MAP_STD:
if (host_spot >= host_cat_size)
return -EPERM;
-
net_spot = host_cat_array[host_spot];
- if (net_spot >= net_clen_bits)
- return -ENOSPC;
- cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
-
- if (net_spot > net_spot_max)
- net_spot_max = net_spot;
+ if (net_spot >= CIPSO_V4_INV_CAT)
+ return -EPERM;
+ break;
}
+ if (net_spot >= net_clen_bits)
+ return -ENOSPC;
+ cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
- if (host_spot == -2)
- return -EFAULT;
-
- if (++net_spot_max % 8)
- return net_spot_max / 8 + 1;
- return net_spot_max / 8;
+ if (net_spot > net_spot_max)
+ net_spot_max = net_spot;
}
- return -EINVAL;
+ if (++net_spot_max % 8)
+ return net_spot_max / 8 + 1;
+ return net_spot_max / 8;
}
/**
@@ -877,102 +890,333 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
* @doi_def: the DOI definition
* @net_cat: the category bitmap in network/CIPSO format
* @net_cat_len: the length of the CIPSO bitmap in bytes
- * @host_cat: the zero'd out category bitmap in host format
- * @host_cat_len: the length of the host's category bitmap in bytes
+ * @secattr: the security attributes
*
* Description:
* Perform a label mapping to translate a CIPSO bitmap to the correct local
- * MLS category bitmap using the given DOI definition. Returns the minimum
- * size in bytes of the host bitmap on success, negative values otherwise.
+ * MLS category bitmap using the given DOI definition. Returns zero on
+ * success, negative values on failure.
*
*/
static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
const unsigned char *net_cat,
u32 net_cat_len,
- unsigned char *host_cat,
- u32 host_cat_len)
+ struct netlbl_lsm_secattr *secattr)
{
- u32 host_spot;
- u32 host_spot_max = 0;
+ int ret_val;
int net_spot = -1;
+ u32 host_spot = CIPSO_V4_INV_CAT;
u32 net_clen_bits = net_cat_len * 8;
- u32 host_clen_bits = host_cat_len * 8;
- u32 net_cat_size;
- u32 *net_cat_array;
+ u32 net_cat_size = 0;
+ u32 *net_cat_array = NULL;
- switch (doi_def->type) {
- case CIPSO_V4_MAP_PASS:
- if (net_cat_len > host_cat_len)
- return -EINVAL;
- memcpy(host_cat, net_cat, net_cat_len);
- return net_cat_len;
- case CIPSO_V4_MAP_STD:
+ if (doi_def->type == CIPSO_V4_MAP_STD) {
net_cat_size = doi_def->map.std->cat.cipso_size;
net_cat_array = doi_def->map.std->cat.cipso;
- for (;;) {
- net_spot = cipso_v4_bitmap_walk(net_cat,
- net_clen_bits,
- net_spot + 1,
- 1);
- if (net_spot < 0)
- break;
- if (net_spot >= net_cat_size ||
- net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
- return -EPERM;
+ }
- host_spot = net_cat_array[net_spot];
- if (host_spot >= host_clen_bits)
- return -ENOSPC;
- cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
+ for (;;) {
+ net_spot = cipso_v4_bitmap_walk(net_cat,
+ net_clen_bits,
+ net_spot + 1,
+ 1);
+ if (net_spot < 0) {
+ if (net_spot == -2)
+ return -EFAULT;
+ return 0;
+ }
- if (host_spot > host_spot_max)
- host_spot_max = host_spot;
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_PASS:
+ host_spot = net_spot;
+ break;
+ case CIPSO_V4_MAP_STD:
+ if (net_spot >= net_cat_size)
+ return -EPERM;
+ host_spot = net_cat_array[net_spot];
+ if (host_spot >= CIPSO_V4_INV_CAT)
+ return -EPERM;
+ break;
}
+ ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat,
+ host_spot,
+ GFP_ATOMIC);
+ if (ret_val != 0)
+ return ret_val;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_enum_valid - Checks to see if the categories are valid
+ * @doi_def: the DOI definition
+ * @enumcat: category list
+ * @enumcat_len: length of the category list in bytes
+ *
+ * Description:
+ * Checks the given categories against the given DOI definition and returns a
+ * negative value if any of the categories do not have a valid mapping and a
+ * zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_enum_valid(const struct cipso_v4_doi *doi_def,
+ const unsigned char *enumcat,
+ u32 enumcat_len)
+{
+ u16 cat;
+ int cat_prev = -1;
+ u32 iter;
+
+ if (doi_def->type != CIPSO_V4_MAP_PASS || enumcat_len & 0x01)
+ return -EFAULT;
- if (net_spot == -2)
+ for (iter = 0; iter < enumcat_len; iter += 2) {
+ cat = ntohs(*((__be16 *)&enumcat[iter]));
+ if (cat <= cat_prev)
return -EFAULT;
+ cat_prev = cat;
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_map_cat_enum_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @net_cat: the zero'd out category list in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO category list in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO category list using the given DOI definition. Returns the
+ * size in bytes of the network category bitmap on success, negative values
+ * otherwise.
+ *
+ */
+static int cipso_v4_map_cat_enum_hton(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char *net_cat,
+ u32 net_cat_len)
+{
+ int cat = -1;
+ u32 cat_iter = 0;
- if (++host_spot_max % 8)
- return host_spot_max / 8 + 1;
- return host_spot_max / 8;
+ for (;;) {
+ cat = netlbl_secattr_catmap_walk(secattr->mls_cat, cat + 1);
+ if (cat < 0)
+ break;
+ if ((cat_iter + 2) > net_cat_len)
+ return -ENOSPC;
+
+ *((__be16 *)&net_cat[cat_iter]) = htons(cat);
+ cat_iter += 2;
}
- return -EINVAL;
+ return cat_iter;
+}
+
+/**
+ * cipso_v4_map_cat_enum_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category list in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO category list to the correct
+ * local MLS category bitmap using the given DOI definition. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def,
+ const unsigned char *net_cat,
+ u32 net_cat_len,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u32 iter;
+
+ for (iter = 0; iter < net_cat_len; iter += 2) {
+ ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat,
+ ntohs(*((__be16 *)&net_cat[iter])),
+ GFP_ATOMIC);
+ if (ret_val != 0)
+ return ret_val;
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_map_cat_rng_valid - Checks to see if the categories are valid
+ * @doi_def: the DOI definition
+ * @rngcat: category list
+ * @rngcat_len: length of the category list in bytes
+ *
+ * Description:
+ * Checks the given categories against the given DOI definition and returns a
+ * negative value if any of the categories do not have a valid mapping and a
+ * zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_rng_valid(const struct cipso_v4_doi *doi_def,
+ const unsigned char *rngcat,
+ u32 rngcat_len)
+{
+ u16 cat_high;
+ u16 cat_low;
+ u32 cat_prev = CIPSO_V4_MAX_REM_CATS + 1;
+ u32 iter;
+
+ if (doi_def->type != CIPSO_V4_MAP_PASS || rngcat_len & 0x01)
+ return -EFAULT;
+
+ for (iter = 0; iter < rngcat_len; iter += 4) {
+ cat_high = ntohs(*((__be16 *)&rngcat[iter]));
+ if ((iter + 4) <= rngcat_len)
+ cat_low = ntohs(*((__be16 *)&rngcat[iter + 2]));
+ else
+ cat_low = 0;
+
+ if (cat_high > cat_prev)
+ return -EFAULT;
+
+ cat_prev = cat_low;
+ }
+
+ return 0;
+}
+
+/**
+ * cipso_v4_map_cat_rng_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @net_cat: the zero'd out category list in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO category list in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO category list using the given DOI definition. Returns the
+ * size in bytes of the network category bitmap on success, negative values
+ * otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char *net_cat,
+ u32 net_cat_len)
+{
+ /* The constant '16' is not random, it is the maximum number of
+ * high/low category range pairs as permitted by the CIPSO draft based
+ * on a maximum IPv4 header length of 60 bytes - the BUG_ON() assertion
+ * does a sanity check to make sure we don't overflow the array. */
+ int iter = -1;
+ u16 array[16];
+ u32 array_cnt = 0;
+ u32 cat_size = 0;
+
+ BUG_ON(net_cat_len > 30);
+
+ for (;;) {
+ iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1);
+ if (iter < 0)
+ break;
+ cat_size += (iter == 0 ? 0 : sizeof(u16));
+ if (cat_size > net_cat_len)
+ return -ENOSPC;
+ array[array_cnt++] = iter;
+
+ iter = netlbl_secattr_catmap_walk_rng(secattr->mls_cat, iter);
+ if (iter < 0)
+ return -EFAULT;
+ cat_size += sizeof(u16);
+ if (cat_size > net_cat_len)
+ return -ENOSPC;
+ array[array_cnt++] = iter;
+ }
+
+ for (iter = 0; array_cnt > 0;) {
+ *((__be16 *)&net_cat[iter]) = htons(array[--array_cnt]);
+ iter += 2;
+ array_cnt--;
+ if (array[array_cnt] != 0) {
+ *((__be16 *)&net_cat[iter]) = htons(array[array_cnt]);
+ iter += 2;
+ }
+ }
+
+ return cat_size;
+}
+
+/**
+ * cipso_v4_map_cat_rng_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category list in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO category list to the correct
+ * local MLS category bitmap using the given DOI definition. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
+ const unsigned char *net_cat,
+ u32 net_cat_len,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u32 net_iter;
+ u16 cat_low;
+ u16 cat_high;
+
+ for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
+ cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
+ if ((net_iter + 4) <= net_cat_len)
+ cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
+ else
+ cat_low = 0;
+
+ ret_val = netlbl_secattr_catmap_setrng(secattr->mls_cat,
+ cat_low,
+ cat_high,
+ GFP_ATOMIC);
+ if (ret_val != 0)
+ return ret_val;
+ }
+
+ return 0;
}
/*
* Protocol Handling Functions
*/
+#define CIPSO_V4_OPT_LEN_MAX 40
#define CIPSO_V4_HDR_LEN 6
/**
* cipso_v4_gentag_hdr - Generate a CIPSO option header
* @doi_def: the DOI definition
- * @len: the total tag length in bytes
+ * @len: the total tag length in bytes, not including this header
* @buf: the CIPSO option buffer
*
* Description:
- * Write a CIPSO header into the beginning of @buffer. Return zero on success,
- * negative values on failure.
+ * Write a CIPSO header into the beginning of @buffer.
*
*/
-static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
- u32 len,
- unsigned char *buf)
+static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
+ unsigned char *buf,
+ u32 len)
{
- if (CIPSO_V4_HDR_LEN + len > 40)
- return -ENOSPC;
-
buf[0] = IPOPT_CIPSO;
buf[1] = CIPSO_V4_HDR_LEN + len;
- *(u32 *)&buf[2] = htonl(doi_def->doi);
-
- return 0;
+ *(__be32 *)&buf[2] = htonl(doi_def->doi);
}
-#define CIPSO_V4_TAG1_CAT_LEN 30
-
/**
* cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
* @doi_def: the DOI definition
@@ -983,83 +1227,249 @@ static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
* Description:
* Generate a CIPSO option using the restricted bitmap tag, tag type #1. The
* actual buffer length may be larger than the indicated size due to
- * translation between host and network category bitmaps. Returns zero on
- * success, negative values on failure.
+ * translation between host and network category bitmaps. Returns the size of
+ * the tag on success, negative values on failure.
*
*/
static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
const struct netlbl_lsm_secattr *secattr,
- unsigned char **buffer,
- u32 *buffer_len)
+ unsigned char *buffer,
+ u32 buffer_len)
{
- int ret_val = -EPERM;
- unsigned char *buf = NULL;
- u32 buf_len;
+ int ret_val;
+ u32 tag_len;
u32 level;
- if (secattr->mls_cat) {
- buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
- GFP_ATOMIC);
- if (buf == NULL)
- return -ENOMEM;
+ if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0)
+ return -EPERM;
+
+ ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+ if (ret_val != 0)
+ return ret_val;
+ if (secattr->flags & NETLBL_SECATTR_MLS_CAT) {
ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
- secattr->mls_cat,
- secattr->mls_cat_len,
- &buf[CIPSO_V4_HDR_LEN + 4],
- CIPSO_V4_TAG1_CAT_LEN);
+ secattr,
+ &buffer[4],
+ buffer_len - 4);
if (ret_val < 0)
- goto gentag_failure;
+ return ret_val;
/* This will send packets using the "optimized" format when
* possibile as specified in section 3.4.2.6 of the
* CIPSO draft. */
- if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
- ret_val = 10;
+ if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
+ tag_len = 14;
+ else
+ tag_len = 4 + ret_val;
+ } else
+ tag_len = 4;
+
+ buffer[0] = 0x01;
+ buffer[1] = tag_len;
+ buffer[3] = level;
+
+ return tag_len;
+}
- buf_len = 4 + ret_val;
- } else {
- buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
- if (buf == NULL)
+/**
+ * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
+ * attributes in @secattr. Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+ const unsigned char *tag,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u8 tag_len = tag[1];
+ u32 level;
+
+ ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
+ if (ret_val != 0)
+ return ret_val;
+ secattr->mls_lvl = level;
+ secattr->flags |= NETLBL_SECATTR_MLS_LVL;
+
+ if (tag_len > 4) {
+ secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
+ if (secattr->mls_cat == NULL)
return -ENOMEM;
- buf_len = 4;
+
+ ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+ &tag[4],
+ tag_len - 4,
+ secattr);
+ if (ret_val != 0) {
+ netlbl_secattr_catmap_free(secattr->mls_cat);
+ return ret_val;
+ }
+
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
+ return 0;
+}
+
+/**
+ * cipso_v4_gentag_enum - Generate a CIPSO enumerated tag (type #2)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the enumerated tag, tag type #2. Returns the
+ * size of the tag on success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char *buffer,
+ u32 buffer_len)
+{
+ int ret_val;
+ u32 tag_len;
+ u32 level;
+
+ if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL))
+ return -EPERM;
+
ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
if (ret_val != 0)
- goto gentag_failure;
+ return ret_val;
+
+ if (secattr->flags & NETLBL_SECATTR_MLS_CAT) {
+ ret_val = cipso_v4_map_cat_enum_hton(doi_def,
+ secattr,
+ &buffer[4],
+ buffer_len - 4);
+ if (ret_val < 0)
+ return ret_val;
+
+ tag_len = 4 + ret_val;
+ } else
+ tag_len = 4;
+
+ buffer[0] = 0x02;
+ buffer[1] = tag_len;
+ buffer[3] = level;
+
+ return tag_len;
+}
+
+/**
+ * cipso_v4_parsetag_enum - Parse a CIPSO enumerated tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO enumerated tag (tag type #2) and return the security
+ * attributes in @secattr. Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def,
+ const unsigned char *tag,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u8 tag_len = tag[1];
+ u32 level;
- ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
+ ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
if (ret_val != 0)
- goto gentag_failure;
+ return ret_val;
+ secattr->mls_lvl = level;
+ secattr->flags |= NETLBL_SECATTR_MLS_LVL;
- buf[CIPSO_V4_HDR_LEN] = 0x01;
- buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
- buf[CIPSO_V4_HDR_LEN + 3] = level;
+ if (tag_len > 4) {
+ secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
+ if (secattr->mls_cat == NULL)
+ return -ENOMEM;
- *buffer = buf;
- *buffer_len = CIPSO_V4_HDR_LEN + buf_len;
+ ret_val = cipso_v4_map_cat_enum_ntoh(doi_def,
+ &tag[4],
+ tag_len - 4,
+ secattr);
+ if (ret_val != 0) {
+ netlbl_secattr_catmap_free(secattr->mls_cat);
+ return ret_val;
+ }
+
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ }
return 0;
+}
-gentag_failure:
- kfree(buf);
- return ret_val;
+/**
+ * cipso_v4_gentag_rng - Generate a CIPSO ranged tag (type #5)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the ranged tag, tag type #5. Returns the
+ * size of the tag on success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char *buffer,
+ u32 buffer_len)
+{
+ int ret_val;
+ u32 tag_len;
+ u32 level;
+
+ if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL))
+ return -EPERM;
+
+ ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+ if (ret_val != 0)
+ return ret_val;
+
+ if (secattr->flags & NETLBL_SECATTR_MLS_CAT) {
+ ret_val = cipso_v4_map_cat_rng_hton(doi_def,
+ secattr,
+ &buffer[4],
+ buffer_len - 4);
+ if (ret_val < 0)
+ return ret_val;
+
+ tag_len = 4 + ret_val;
+ } else
+ tag_len = 4;
+
+ buffer[0] = 0x05;
+ buffer[1] = tag_len;
+ buffer[3] = level;
+
+ return tag_len;
}
/**
- * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * cipso_v4_parsetag_rng - Parse a CIPSO ranged tag
* @doi_def: the DOI definition
* @tag: the CIPSO tag
* @secattr: the security attributes
*
* Description:
- * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
- * attributes in @secattr. Return zero on success, negatives values on
- * failure.
+ * Parse a CIPSO ranged tag (tag type #5) and return the security attributes
+ * in @secattr. Return zero on success, negatives values on failure.
*
*/
-static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
const unsigned char *tag,
struct netlbl_lsm_secattr *secattr)
{
@@ -1071,32 +1481,23 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
if (ret_val != 0)
return ret_val;
secattr->mls_lvl = level;
- secattr->mls_lvl_vld = 1;
+ secattr->flags |= NETLBL_SECATTR_MLS_LVL;
if (tag_len > 4) {
- switch (doi_def->type) {
- case CIPSO_V4_MAP_PASS:
- secattr->mls_cat_len = tag_len - 4;
- break;
- case CIPSO_V4_MAP_STD:
- secattr->mls_cat_len =
- doi_def->map.std->cat.local_size;
- break;
- }
- secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
+ secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC);
if (secattr->mls_cat == NULL)
return -ENOMEM;
- ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+ ret_val = cipso_v4_map_cat_rng_ntoh(doi_def,
&tag[4],
tag_len - 4,
- secattr->mls_cat,
- secattr->mls_cat_len);
- if (ret_val < 0) {
- kfree(secattr->mls_cat);
+ secattr);
+ if (ret_val != 0) {
+ netlbl_secattr_catmap_free(secattr->mls_cat);
return ret_val;
}
- secattr->mls_cat_len = ret_val;
+
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
}
return 0;
@@ -1140,7 +1541,7 @@ int cipso_v4_validate(unsigned char **option)
}
rcu_read_lock();
- doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
+ doi_def = cipso_v4_doi_search(ntohl(*((__be32 *)&opt[2])));
if (doi_def == NULL) {
err_offset = 2;
goto validate_return_locked;
@@ -1191,6 +1592,44 @@ int cipso_v4_validate(unsigned char **option)
}
}
break;
+ case CIPSO_V4_TAG_ENUM:
+ if (tag_len < 4) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+
+ if (cipso_v4_map_lvl_valid(doi_def,
+ tag[3]) < 0) {
+ err_offset = opt_iter + 3;
+ goto validate_return_locked;
+ }
+ if (tag_len > 4 &&
+ cipso_v4_map_cat_enum_valid(doi_def,
+ &tag[4],
+ tag_len - 4) < 0) {
+ err_offset = opt_iter + 4;
+ goto validate_return_locked;
+ }
+ break;
+ case CIPSO_V4_TAG_RANGE:
+ if (tag_len < 4) {
+ err_offset = opt_iter + 1;
+ goto validate_return_locked;
+ }
+
+ if (cipso_v4_map_lvl_valid(doi_def,
+ tag[3]) < 0) {
+ err_offset = opt_iter + 3;
+ goto validate_return_locked;
+ }
+ if (tag_len > 4 &&
+ cipso_v4_map_cat_rng_valid(doi_def,
+ &tag[4],
+ tag_len - 4) < 0) {
+ err_offset = opt_iter + 4;
+ goto validate_return_locked;
+ }
+ break;
default:
err_offset = opt_iter;
goto validate_return_locked;
@@ -1265,7 +1704,7 @@ int cipso_v4_socket_setattr(const struct socket *sock,
{
int ret_val = -EPERM;
u32 iter;
- unsigned char *buf = NULL;
+ unsigned char *buf;
u32 buf_len = 0;
u32 opt_len;
struct ip_options *opt = NULL;
@@ -1281,17 +1720,40 @@ int cipso_v4_socket_setattr(const struct socket *sock,
if (sk == NULL)
return 0;
+ /* We allocate the maximum CIPSO option size here so we are probably
+ * being a little wasteful, but it makes our life _much_ easier later
+ * on and after all we are only talking about 40 bytes. */
+ buf_len = CIPSO_V4_OPT_LEN_MAX;
+ buf = kmalloc(buf_len, GFP_ATOMIC);
+ if (buf == NULL) {
+ ret_val = -ENOMEM;
+ goto socket_setattr_failure;
+ }
+
/* XXX - This code assumes only one tag per CIPSO option which isn't
* really a good assumption to make but since we only support the MAC
* tags right now it is a safe assumption. */
iter = 0;
do {
+ memset(buf, 0, buf_len);
switch (doi_def->tags[iter]) {
case CIPSO_V4_TAG_RBITMAP:
ret_val = cipso_v4_gentag_rbm(doi_def,
- secattr,
- &buf,
- &buf_len);
+ secattr,
+ &buf[CIPSO_V4_HDR_LEN],
+ buf_len - CIPSO_V4_HDR_LEN);
+ break;
+ case CIPSO_V4_TAG_ENUM:
+ ret_val = cipso_v4_gentag_enum(doi_def,
+ secattr,
+ &buf[CIPSO_V4_HDR_LEN],
+ buf_len - CIPSO_V4_HDR_LEN);
+ break;
+ case CIPSO_V4_TAG_RANGE:
+ ret_val = cipso_v4_gentag_rng(doi_def,
+ secattr,
+ &buf[CIPSO_V4_HDR_LEN],
+ buf_len - CIPSO_V4_HDR_LEN);
break;
default:
ret_val = -EPERM;
@@ -1299,15 +1761,18 @@ int cipso_v4_socket_setattr(const struct socket *sock,
}
iter++;
- } while (ret_val != 0 &&
+ } while (ret_val < 0 &&
iter < CIPSO_V4_TAG_MAXCNT &&
doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
- if (ret_val != 0)
+ if (ret_val < 0)
goto socket_setattr_failure;
+ cipso_v4_gentag_hdr(doi_def, buf, ret_val);
+ buf_len = CIPSO_V4_HDR_LEN + ret_val;
/* We can't use ip_options_get() directly because it makes a call to
* ip_options_get_alloc() which allocates memory with GFP_KERNEL and
- * we can't block here. */
+ * we won't always have CAP_NET_RAW even though we _always_ want to
+ * set the IPOPT_CIPSO option. */
opt_len = (buf_len + 3) & ~3;
opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
if (opt == NULL) {
@@ -1317,11 +1782,9 @@ int cipso_v4_socket_setattr(const struct socket *sock,
memcpy(opt->__data, buf, buf_len);
opt->optlen = opt_len;
opt->is_data = 1;
+ opt->cipso = sizeof(struct iphdr);
kfree(buf);
buf = NULL;
- ret_val = ip_options_compile(opt, NULL);
- if (ret_val != 0)
- goto socket_setattr_failure;
sk_inet = inet_sk(sk);
if (sk_inet->is_icsk) {
@@ -1371,19 +1834,33 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
if (ret_val == 0)
return ret_val;
- doi = ntohl(*(u32 *)&cipso_ptr[2]);
+ doi = ntohl(*(__be32 *)&cipso_ptr[2]);
rcu_read_lock();
- doi_def = cipso_v4_doi_getdef(doi);
+ doi_def = cipso_v4_doi_search(doi);
if (doi_def == NULL) {
rcu_read_unlock();
return -ENOMSG;
}
+
+ /* XXX - This code assumes only one tag per CIPSO option which isn't
+ * really a good assumption to make but since we only support the MAC
+ * tags right now it is a safe assumption. */
switch (cipso_ptr[6]) {
case CIPSO_V4_TAG_RBITMAP:
ret_val = cipso_v4_parsetag_rbm(doi_def,
&cipso_ptr[6],
secattr);
break;
+ case CIPSO_V4_TAG_ENUM:
+ ret_val = cipso_v4_parsetag_enum(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
+ case CIPSO_V4_TAG_RANGE:
+ ret_val = cipso_v4_parsetag_rng(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
}
rcu_read_unlock();
@@ -1431,23 +1908,30 @@ int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
u32 doi;
struct cipso_v4_doi *doi_def;
- if (!CIPSO_V4_OPTEXIST(skb))
- return -ENOMSG;
cipso_ptr = CIPSO_V4_OPTPTR(skb);
if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
return 0;
- doi = ntohl(*(u32 *)&cipso_ptr[2]);
+ doi = ntohl(*(__be32 *)&cipso_ptr[2]);
rcu_read_lock();
- doi_def = cipso_v4_doi_getdef(doi);
+ doi_def = cipso_v4_doi_search(doi);
if (doi_def == NULL)
goto skbuff_getattr_return;
+
+ /* XXX - This code assumes only one tag per CIPSO option which isn't
+ * really a good assumption to make but since we only support the MAC
+ * tags right now it is a safe assumption. */
switch (cipso_ptr[6]) {
case CIPSO_V4_TAG_RBITMAP:
ret_val = cipso_v4_parsetag_rbm(doi_def,
&cipso_ptr[6],
secattr);
break;
+ case CIPSO_V4_TAG_ENUM:
+ ret_val = cipso_v4_parsetag_enum(doi_def,
+ &cipso_ptr[6],
+ secattr);
+ break;
}
skbuff_getattr_return:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7602c79a389..84bed40273a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -577,20 +577,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
* Determine a default network mask, based on the IP address.
*/
-static __inline__ int inet_abc_len(u32 addr)
+static __inline__ int inet_abc_len(__be32 addr)
{
int rc = -1; /* Something else, probably a multicast. */
if (ZERONET(addr))
rc = 0;
else {
- addr = ntohl(addr);
+ __u32 haddr = ntohl(addr);
- if (IN_CLASSA(addr))
+ if (IN_CLASSA(haddr))
rc = 8;
- else if (IN_CLASSB(addr))
+ else if (IN_CLASSB(haddr))
rc = 16;
- else if (IN_CLASSC(addr))
+ else if (IN_CLASSC(haddr))
rc = 24;
}
@@ -1120,6 +1120,16 @@ static struct notifier_block ip_netdev_notifier = {
.notifier_call =inetdev_event,
};
+static inline size_t inet_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(4) /* IFA_ADDRESS */
+ + nla_total_size(4) /* IFA_LOCAL */
+ + nla_total_size(4) /* IFA_BROADCAST */
+ + nla_total_size(4) /* IFA_ANYCAST */
+ + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
+}
+
static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
u32 pid, u32 seq, int event, unsigned int flags)
{
@@ -1208,15 +1218,13 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
u32 seq = nlh ? nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in inet_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
errout:
@@ -1295,8 +1303,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
int *valp = table->data;
int new;
@@ -1556,12 +1563,12 @@ static void devinet_sysctl_register(struct in_device *in_dev,
{
int i;
struct net_device *dev = in_dev ? in_dev->dev : NULL;
- struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+ struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
+ GFP_KERNEL);
char *dev_name = NULL;
if (!t)
return;
- memcpy(t, &devinet_sysctl, sizeof(*t));
for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
t->devinet_vars[i].de = NULL;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b5c205b5766..f2c6776ea0e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -67,7 +67,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
struct udphdr *uh;
- u32 *udpdata32;
+ __be32 *udpdata32;
uh = (struct udphdr *)esph;
uh->source = encap->encap_sport;
@@ -81,7 +81,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esph = (struct ip_esp_hdr *)(uh + 1);
break;
case UDP_ENCAP_ESPINUDP_NON_IKE:
- udpdata32 = (u32 *)(uh + 1);
+ udpdata32 = (__be32 *)(uh + 1);
udpdata32[0] = udpdata32[1] = 0;
esph = (struct ip_esp_hdr *)(udpdata32 + 2);
break;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index af0190d8b6c..d47b72af89e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -768,8 +768,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
{
struct fib_result res;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
- .fwmark = frn->fl_fwmark,
+ struct flowi fl = { .mark = frn->fl_mark,
+ .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
.tos = frn->fl_tos,
.scope = frn->fl_scope } } };
if (tb) {
@@ -811,7 +811,6 @@ static void nl_fib_input(struct sock *sk, int len)
pid = nlh->nlmsg_pid; /*pid of sending process */
NETLINK_CB(skb).pid = 0; /* from kernel */
- NETLINK_CB(skb).dst_pid = pid;
NETLINK_CB(skb).dst_group = 0; /* unicast */
netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 107bb6cbb0b..648f47c1c39 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -45,8 +45,8 @@
#include "fib_lookup.h"
-static kmem_cache_t *fn_hash_kmem __read_mostly;
-static kmem_cache_t *fn_alias_kmem __read_mostly;
+static struct kmem_cache *fn_hash_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __read_mostly;
struct fib_node {
struct hlist_node fn_hash;
@@ -485,13 +485,13 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
goto out;
err = -ENOBUFS;
- new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+ new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (new_fa == NULL)
goto out;
new_f = NULL;
if (!f) {
- new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
+ new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL);
if (new_f == NULL)
goto out_free_new_fa;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0852b9cd065..b837c33e040 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -44,10 +44,6 @@ struct fib4_rule
__be32 srcmask;
__be32 dst;
__be32 dstmask;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- u32 fwmark;
- u32 fwmask;
-#endif
#ifdef CONFIG_NET_CLS_ROUTE
u32 tclassid;
#endif
@@ -160,11 +156,6 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tos && (r->tos != fl->fl4_tos))
return 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
- return 0;
-#endif
-
return 1;
}
@@ -179,14 +170,10 @@ static struct fib_table *fib_empty_table(void)
}
static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
- [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
- [FRA_PRIORITY] = { .type = NLA_U32 },
+ FRA_GENERIC_POLICY,
[FRA_SRC] = { .type = NLA_U32 },
[FRA_DST] = { .type = NLA_U32 },
- [FRA_FWMARK] = { .type = NLA_U32 },
- [FRA_FWMASK] = { .type = NLA_U32 },
[FRA_FLOW] = { .type = NLA_U32 },
- [FRA_TABLE] = { .type = NLA_U32 },
};
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
@@ -220,20 +207,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_DST])
rule4->dst = nla_get_be32(tb[FRA_DST]);
-#ifdef CONFIG_IP_ROUTE_FWMARK
- if (tb[FRA_FWMARK]) {
- rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
- if (rule4->fwmark)
- /* compatibility: if the mark value is non-zero all bits
- * are compared unless a mask is explicitly specified.
- */
- rule4->fwmask = 0xFFFFFFFF;
- }
-
- if (tb[FRA_FWMASK])
- rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
-#endif
-
#ifdef CONFIG_NET_CLS_ROUTE
if (tb[FRA_FLOW])
rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
@@ -264,14 +237,6 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->tos && (rule4->tos != frh->tos))
return 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
- return 0;
-
- if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
- return 0;
-#endif
-
#ifdef CONFIG_NET_CLS_ROUTE
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
@@ -296,14 +261,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->src_len = rule4->src_len;
frh->tos = rule4->tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- if (rule4->fwmark)
- NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
-
- if (rule4->fwmask || rule4->fwmark)
- NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
-#endif
-
if (rule4->dst_len)
NLA_PUT_BE32(skb, FRA_DST, rule4->dst);
@@ -342,6 +299,13 @@ static u32 fib4_rule_default_pref(void)
return 0;
}
+static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
+{
+ return nla_total_size(4) /* dst */
+ + nla_total_size(4) /* src */
+ + nla_total_size(4); /* flow */
+}
+
static struct fib_rules_ops fib4_rules_ops = {
.family = AF_INET,
.rule_size = sizeof(struct fib4_rule),
@@ -351,6 +315,7 @@ static struct fib_rules_ops fib4_rules_ops = {
.compare = fib4_rule_compare,
.fill = fib4_rule_fill,
.default_pref = fib4_rule_default_pref,
+ .nlmsg_payload = fib4_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV4_RULE,
.policy = fib4_rule_policy,
.rules_list = &fib4_rules,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 884d176e008..e63b8a98fb4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -273,25 +273,49 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
return -1;
}
+static inline size_t fib_nlmsg_size(struct fib_info *fi)
+{
+ size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(4) /* RTA_DST */
+ + nla_total_size(4) /* RTA_PRIORITY */
+ + nla_total_size(4); /* RTA_PREFSRC */
+
+ /* space for nested metrics */
+ payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
+
+ if (fi->fib_nhs) {
+ /* Also handles the special case fib_nhs == 1 */
+
+ /* each nexthop is packed in an attribute */
+ size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+
+ /* may contain flow and gateway attribute */
+ nhsize += 2 * nla_total_size(4);
+
+ /* all nexthops are packed in a nested attribute */
+ payload += nla_total_size(fi->fib_nhs * nhsize);
+ }
+
+ return payload;
+}
+
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
int dst_len, u32 tb_id, struct nl_info *info)
{
struct sk_buff *skb;
- int payload = sizeof(struct rtmsg) + 256;
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
- skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+ skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = fib_dump_info(skb, info->pid, seq, event, tb_id,
fa->fa_type, fa->fa_scope, key, dst_len,
fa->fa_tos, fa->fa_info, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in fib_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
info->nlh, GFP_KERNEL);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d17990ec724..cfb249cc0a5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -172,7 +172,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn);
static struct tnode *halve(struct trie *t, struct tnode *tn);
static void tnode_free(struct tnode *tn);
-static kmem_cache_t *fn_alias_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct trie *trie_local = NULL, *trie_main = NULL;
@@ -1187,7 +1187,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
u8 state;
err = -ENOBUFS;
- new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+ new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (new_fa == NULL)
goto out;
@@ -1232,7 +1232,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
goto out;
err = -ENOBUFS;
- new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+ new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (new_fa == NULL)
goto out;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b39a37a4754..40cf0d0e1b8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -332,7 +332,7 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
struct sk_buff *skb)
{
struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
- unsigned int csum;
+ __wsum csum;
csum = skb_copy_and_csum_bits(icmp_param->skb,
icmp_param->offset + offset,
@@ -356,7 +356,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
ip_flush_pending_frames(icmp_socket->sk);
else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
struct icmphdr *icmph = skb->h.icmph;
- unsigned int csum = 0;
+ __wsum csum = 0;
struct sk_buff *skb1;
skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
@@ -931,7 +931,7 @@ int icmp_rcv(struct sk_buff *skb)
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
- if (!(u16)csum_fold(skb->csum))
+ if (!csum_fold(skb->csum))
break;
/* fall through */
case CHECKSUM_NONE:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6eee71647b7..0017ccb01d6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -932,7 +932,7 @@ int igmp_rcv(struct sk_buff *skb)
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
- if (!(u16)csum_fold(skb->csum))
+ if (!csum_fold(skb->csum))
break;
/* fall through */
case CHECKSUM_NONE:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 96bbe2a0aa1..9d68837888d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -343,7 +343,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
EXPORT_SYMBOL_GPL(inet_csk_route_req);
static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
- const u32 rnd, const u16 synq_hsize)
+ const u32 rnd, const u32 synq_hsize)
{
return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 244c4f445c7..8c79c8a4ea5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -27,11 +27,11 @@
* Allocate and initialize a new local port bind bucket.
* The bindhash mutex for snum's hash chain must be held here.
*/
-struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
+struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
struct inet_bind_hashbucket *head,
const unsigned short snum)
{
- struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
+ struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
if (tb != NULL) {
tb->port = snum;
@@ -45,7 +45,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
-void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb)
+void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
{
if (hlist_empty(&tb->owners)) {
__hlist_del(&tb->node);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index cdd805344c6..9f414e35c48 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -91,7 +91,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
{
struct inet_timewait_sock *tw =
kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
- SLAB_ATOMIC);
+ GFP_ATOMIC);
if (tw != NULL) {
const struct inet_sock *inet = inet_sk(sk);
@@ -178,7 +178,6 @@ void inet_twdr_hangman(unsigned long data)
need_timer = 0;
if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
twdr->thread_slots |= (1 << twdr->slot);
- mb();
schedule_work(&twdr->twkill_work);
need_timer = 1;
} else {
@@ -197,9 +196,10 @@ EXPORT_SYMBOL_GPL(inet_twdr_hangman);
extern void twkill_slots_invalid(void);
-void inet_twdr_twkill_work(void *data)
+void inet_twdr_twkill_work(struct work_struct *work)
{
- struct inet_timewait_death_row *twdr = data;
+ struct inet_timewait_death_row *twdr =
+ container_of(work, struct inet_timewait_death_row, twkill_work);
int i;
if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index f072f3875af..711eb6d0285 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -73,7 +73,7 @@
/* Exported for inet_getid inline function. */
DEFINE_SPINLOCK(inet_peer_idlock);
-static kmem_cache_t *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __read_mostly;
#define node_height(x) x->avl_height
static struct inet_peer peer_fake_node = {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 74046efdf87..8ce00d3703d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -565,7 +565,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
} else {
struct sk_buff *free_it = next;
- /* Old fragmnet is completely overridden with
+ /* Old fragment is completely overridden with
* new one drop it.
*/
next = next->next;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d5b5dec075b..476cb6084c7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -144,7 +144,7 @@ static struct net_device *ipgre_fb_tunnel_dev;
*/
#define HASH_SIZE 16
-#define HASH(addr) ((addr^(addr>>4))&0xF)
+#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
static struct ip_tunnel *tunnels[4][HASH_SIZE];
@@ -157,7 +157,7 @@ static DEFINE_RWLOCK(ipgre_lock);
/* Given src, dst and key, find appropriate for input tunnel. */
-static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
+static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(key);
@@ -194,9 +194,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
{
- u32 remote = t->parms.iph.daddr;
- u32 local = t->parms.iph.saddr;
- u32 key = t->parms.i_key;
+ __be32 remote = t->parms.iph.daddr;
+ __be32 local = t->parms.iph.saddr;
+ __be32 key = t->parms.i_key;
unsigned h = HASH(key);
int prio = 0;
@@ -236,9 +236,9 @@ static void ipgre_tunnel_unlink(struct ip_tunnel *t)
static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
{
- u32 remote = parms->iph.daddr;
- u32 local = parms->iph.saddr;
- u32 key = parms->i_key;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
+ __be32 key = parms->i_key;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
unsigned h = HASH(key);
@@ -319,12 +319,12 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
*/
struct iphdr *iph = (struct iphdr*)skb->data;
- u16 *p = (u16*)(skb->data+(iph->ihl<<2));
+ __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
int grehlen = (iph->ihl<<2) + 4;
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
struct ip_tunnel *t;
- u16 flags;
+ __be16 flags;
flags = p[0];
if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
@@ -370,7 +370,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
}
read_lock(&ipgre_lock);
- t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
+ t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
goto out;
@@ -388,14 +388,14 @@ out:
#else
struct iphdr *iph = (struct iphdr*)dp;
struct iphdr *eiph;
- u16 *p = (u16*)(dp+(iph->ihl<<2));
+ __be16 *p = (__be16*)(dp+(iph->ihl<<2));
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
int rel_type = 0;
int rel_code = 0;
__be32 rel_info = 0;
__u32 n = 0;
- u16 flags;
+ __be16 flags;
int grehlen = (iph->ihl<<2) + 4;
struct sk_buff *skb2;
struct flowi fl;
@@ -556,9 +556,9 @@ static int ipgre_rcv(struct sk_buff *skb)
{
struct iphdr *iph;
u8 *h;
- u16 flags;
- u16 csum = 0;
- u32 key = 0;
+ __be16 flags;
+ __sum16 csum = 0;
+ __be32 key = 0;
u32 seqno = 0;
struct ip_tunnel *tunnel;
int offset = 4;
@@ -568,7 +568,7 @@ static int ipgre_rcv(struct sk_buff *skb)
iph = skb->nh.iph;
h = skb->data;
- flags = *(u16*)h;
+ flags = *(__be16*)h;
if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
/* - Version must be 0.
@@ -580,7 +580,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if (flags&GRE_CSUM) {
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
- csum = (u16)csum_fold(skb->csum);
+ csum = csum_fold(skb->csum);
if (!csum)
break;
/* fall through */
@@ -592,11 +592,11 @@ static int ipgre_rcv(struct sk_buff *skb)
offset += 4;
}
if (flags&GRE_KEY) {
- key = *(u32*)(h + offset);
+ key = *(__be32*)(h + offset);
offset += 4;
}
if (flags&GRE_SEQ) {
- seqno = ntohl(*(u32*)(h + offset));
+ seqno = ntohl(*(__be32*)(h + offset));
offset += 4;
}
}
@@ -605,7 +605,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
secpath_reset(skb);
- skb->protocol = *(u16*)(h + 2);
+ skb->protocol = *(__be16*)(h + 2);
/* WCCP version 1 and 2 protocol decoding.
* - Change protocol to IP
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
@@ -673,13 +673,13 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct iphdr *old_iph = skb->nh.iph;
struct iphdr *tiph;
u8 tos;
- u16 df;
+ __be16 df;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
int gre_hlen;
- u32 dst;
+ __be32 dst;
int mtu;
if (tunnel->recursion++) {
@@ -860,11 +860,11 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
}
- ((u16*)(iph+1))[0] = tunnel->parms.o_flags;
- ((u16*)(iph+1))[1] = skb->protocol;
+ ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
+ ((__be16*)(iph+1))[1] = skb->protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
- u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
+ __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
if (tunnel->parms.o_flags&GRE_SEQ) {
++tunnel->o_seqno;
@@ -877,7 +877,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
if (tunnel->parms.o_flags&GRE_CSUM) {
*ptr = 0;
- *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+ *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
}
}
@@ -1068,7 +1068,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
{
struct ip_tunnel *t = netdev_priv(dev);
struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
- u16 *p = (u16*)(iph+1);
+ __be16 *p = (__be16*)(iph+1);
memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
p[0] = t->parms.o_flags;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 8dabbfc3126..9f02917d6f4 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -443,7 +443,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
opt->router_alert = optptr - iph;
break;
case IPOPT_CIPSO:
- if (opt->cipso) {
+ if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
pp_ptr = optptr;
goto error;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index fc195a44fc2..f071f84808f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -53,6 +53,7 @@
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/highmem.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static inline int ip_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct hh_cache *hh = dst->hh;
struct net_device *dev = dst->dev;
int hh_len = LL_RESERVED_SPACE(dev);
@@ -182,16 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
skb = skb2;
}
- if (hh) {
- int hh_alen;
-
- read_lock_bh(&hh->hh_lock);
- hh_alen = HH_DATA_ALIGN(hh->hh_len);
- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
- read_unlock_bh(&hh->hh_lock);
- skb_push(skb, hh->hh_len);
- return hh->hh_output(skb);
- } else if (dst->neighbour)
+ if (dst->hh)
+ return neigh_hh_output(dst->hh, skb);
+ else if (dst->neighbour)
return dst->neighbour->output(skb);
if (net_ratelimit())
@@ -288,9 +281,8 @@ int ip_output(struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
-int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
+int ip_queue_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = inet->opt;
struct rtable *rt;
@@ -342,7 +334,7 @@ packet_routed:
/* OK, we know where to send it, allocate and build IP header. */
iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
- *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+ *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
iph->tot_len = htons(skb->len);
if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
iph->frag_off = htons(IP_DF);
@@ -386,6 +378,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
dst_release(to->dst);
to->dst = dst_clone(from->dst);
to->dev = from->dev;
+ to->mark = from->mark;
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
@@ -394,7 +387,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
#ifdef CONFIG_NETFILTER
- to->nfmark = from->nfmark;
/* Connection association is same as pre-frag packet */
nf_conntrack_put(to->nfct);
to->nfct = from->nfct;
@@ -683,7 +675,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
if (memcpy_fromiovecend(to, iov, offset, len) < 0)
return -EFAULT;
} else {
- unsigned int csum = 0;
+ __wsum csum = 0;
if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
return -EFAULT;
skb->csum = csum_block_add(skb->csum, csum, odd);
@@ -691,11 +683,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
return 0;
}
-static inline unsigned int
+static inline __wsum
csum_page(struct page *page, int offset, int copy)
{
char *kaddr;
- unsigned int csum;
+ __wsum csum;
kaddr = kmap(page);
csum = csum_partial(kaddr + offset, copy, 0);
kunmap(page);
@@ -1167,7 +1159,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
}
if (skb->ip_summed == CHECKSUM_NONE) {
- unsigned int csum;
+ __wsum csum;
csum = csum_page(page, offset, len);
skb->csum = csum_block_add(skb->csum, csum, skb->len);
}
@@ -1315,7 +1307,7 @@ void ip_flush_pending_frames(struct sock *sk)
static int ip_reply_glue_bits(void *dptr, char *to, int offset,
int len, int odd, struct sk_buff *skb)
{
- unsigned int csum;
+ __wsum csum;
csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
skb->csum = csum_block_add(skb->csum, csum, odd);
@@ -1385,7 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
&ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
if (arg->csumoffset >= 0)
- *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+ *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(sk);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4b132953bcc..57d4bae6f08 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -355,7 +355,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin = (struct sockaddr_in *)msg->msg_name;
if (sin) {
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = *(u32*)(skb->nh.raw + serr->addr_offset);
+ sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f8ce8475915..afa60b9a003 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -101,6 +101,7 @@
#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers
- '3' from resolv.h */
+#define NONE __constant_htonl(INADDR_NONE)
/*
* Public IP configuration
@@ -129,19 +130,19 @@ int ic_proto_enabled __initdata = 0
static int ic_host_name_set __initdata = 0; /* Host name set by us? */
-u32 ic_myaddr = INADDR_NONE; /* My IP address */
-static u32 ic_netmask = INADDR_NONE; /* Netmask for local subnet */
-u32 ic_gateway = INADDR_NONE; /* Gateway IP address */
+__be32 ic_myaddr = NONE; /* My IP address */
+static __be32 ic_netmask = NONE; /* Netmask for local subnet */
+__be32 ic_gateway = NONE; /* Gateway IP address */
-u32 ic_servaddr = INADDR_NONE; /* Boot server IP address */
+__be32 ic_servaddr = NONE; /* Boot server IP address */
-u32 root_server_addr = INADDR_NONE; /* Address of NFS server */
+__be32 root_server_addr = NONE; /* Address of NFS server */
u8 root_server_path[256] = { 0, }; /* Path to mount as root */
/* Persistent data: */
static int ic_proto_used; /* Protocol used, if any */
-static u32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
+static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
static u8 ic_domain[64]; /* DNS (not NIS) domain name */
/*
@@ -172,7 +173,7 @@ struct ic_device {
struct net_device *dev;
unsigned short flags;
short able;
- u32 xid;
+ __be32 xid;
};
static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
@@ -223,7 +224,7 @@ static int __init ic_open_devs(void)
d->flags = oflags;
d->able = able;
if (able & IC_BOOTP)
- get_random_bytes(&d->xid, sizeof(u32));
+ get_random_bytes(&d->xid, sizeof(__be32));
else
d->xid = 0;
ic_proto_have_if |= able;
@@ -269,7 +270,7 @@ static void __init ic_close_devs(void)
*/
static inline void
-set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port)
+set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
{
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = addr;
@@ -332,7 +333,7 @@ static int __init ic_setup_routes(void)
{
/* No need to setup device routes, only the default route... */
- if (ic_gateway != INADDR_NONE) {
+ if (ic_gateway != NONE) {
struct rtentry rm;
int err;
@@ -368,10 +369,10 @@ static int __init ic_defaults(void)
if (!ic_host_name_set)
sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
- if (root_server_addr == INADDR_NONE)
+ if (root_server_addr == NONE)
root_server_addr = ic_servaddr;
- if (ic_netmask == INADDR_NONE) {
+ if (ic_netmask == NONE) {
if (IN_CLASSA(ntohl(ic_myaddr)))
ic_netmask = htonl(IN_CLASSA_NET);
else if (IN_CLASSB(ntohl(ic_myaddr)))
@@ -420,7 +421,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
{
struct arphdr *rarp;
unsigned char *rarp_ptr;
- unsigned long sip, tip;
+ __be32 sip, tip;
unsigned char *sha, *tha; /* s for "source", t for "target" */
struct ic_device *d;
@@ -485,12 +486,12 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop_unlock;
/* Discard packets which are not from specified server. */
- if (ic_servaddr != INADDR_NONE && ic_servaddr != sip)
+ if (ic_servaddr != NONE && ic_servaddr != sip)
goto drop_unlock;
/* We have a winner! */
ic_dev = dev;
- if (ic_myaddr == INADDR_NONE)
+ if (ic_myaddr == NONE)
ic_myaddr = tip;
ic_servaddr = sip;
ic_got_reply = IC_RARP;
@@ -530,13 +531,13 @@ struct bootp_pkt { /* BOOTP packet format */
u8 htype; /* HW address type */
u8 hlen; /* HW address length */
u8 hops; /* Used only by gateways */
- u32 xid; /* Transaction ID */
- u16 secs; /* Seconds since we started */
- u16 flags; /* Just what it says */
- u32 client_ip; /* Client's IP address if known */
- u32 your_ip; /* Assigned IP address */
- u32 server_ip; /* (Next, e.g. NFS) Server's IP address */
- u32 relay_ip; /* IP address of BOOTP relay */
+ __be32 xid; /* Transaction ID */
+ __be16 secs; /* Seconds since we started */
+ __be16 flags; /* Just what it says */
+ __be32 client_ip; /* Client's IP address if known */
+ __be32 your_ip; /* Assigned IP address */
+ __be32 server_ip; /* (Next, e.g. NFS) Server's IP address */
+ __be32 relay_ip; /* IP address of BOOTP relay */
u8 hw_addr[16]; /* Client's HW address */
u8 serv_name[64]; /* Server host name */
u8 boot_file[128]; /* Name of boot file */
@@ -576,7 +577,7 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 };
static void __init
ic_dhcp_init_options(u8 *options)
{
- u8 mt = ((ic_servaddr == INADDR_NONE)
+ u8 mt = ((ic_servaddr == NONE)
? DHCPDISCOVER : DHCPREQUEST);
u8 *e = options;
@@ -666,7 +667,7 @@ static inline void ic_bootp_init(void)
int i;
for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
- ic_nameservers[i] = INADDR_NONE;
+ ic_nameservers[i] = NONE;
dev_add_pack(&bootp_packet_type);
}
@@ -708,7 +709,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
h->frag_off = htons(IP_DF);
h->ttl = 64;
h->protocol = IPPROTO_UDP;
- h->daddr = INADDR_BROADCAST;
+ h->daddr = htonl(INADDR_BROADCAST);
h->check = ip_fast_csum((unsigned char *) h, h->ihl);
/* Construct UDP header */
@@ -730,8 +731,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
b->htype = dev->type; /* can cause undefined behavior */
}
b->hlen = dev->addr_len;
- b->your_ip = INADDR_NONE;
- b->server_ip = INADDR_NONE;
+ b->your_ip = NONE;
+ b->server_ip = NONE;
memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
b->secs = htons(jiffies_diff / HZ);
b->xid = d->xid;
@@ -788,11 +789,11 @@ static void __init ic_do_bootp_ext(u8 *ext)
switch (*ext++) {
case 1: /* Subnet mask */
- if (ic_netmask == INADDR_NONE)
+ if (ic_netmask == NONE)
memcpy(&ic_netmask, ext+1, 4);
break;
case 3: /* Default gateway */
- if (ic_gateway == INADDR_NONE)
+ if (ic_gateway == NONE)
memcpy(&ic_gateway, ext+1, 4);
break;
case 6: /* DNS server */
@@ -800,7 +801,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
if (servers > CONF_NAMESERVERS_MAX)
servers = CONF_NAMESERVERS_MAX;
for (i = 0; i < servers; i++) {
- if (ic_nameservers[i] == INADDR_NONE)
+ if (ic_nameservers[i] == NONE)
memcpy(&ic_nameservers[i], ext+1+4*i, 4);
}
break;
@@ -917,7 +918,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
#ifdef IPCONFIG_DHCP
if (ic_proto_enabled & IC_USE_DHCP) {
- u32 server_id = INADDR_NONE;
+ __be32 server_id = NONE;
int mt = 0;
ext = &b->exten[4];
@@ -949,7 +950,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
/* While in the process of accepting one offer,
* ignore all others.
*/
- if (ic_myaddr != INADDR_NONE)
+ if (ic_myaddr != NONE)
goto drop_unlock;
/* Let's accept that offer. */
@@ -965,7 +966,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
* precedence over the bootp header one if
* they are different.
*/
- if ((server_id != INADDR_NONE) &&
+ if ((server_id != NONE) &&
(b->server_ip != server_id))
b->server_ip = ic_servaddr;
break;
@@ -979,8 +980,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
default:
/* Urque. Forget it*/
- ic_myaddr = INADDR_NONE;
- ic_servaddr = INADDR_NONE;
+ ic_myaddr = NONE;
+ ic_servaddr = NONE;
goto drop_unlock;
};
@@ -1004,9 +1005,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
ic_dev = dev;
ic_myaddr = b->your_ip;
ic_servaddr = b->server_ip;
- if (ic_gateway == INADDR_NONE && b->relay_ip)
+ if (ic_gateway == NONE && b->relay_ip)
ic_gateway = b->relay_ip;
- if (ic_nameservers[0] == INADDR_NONE)
+ if (ic_nameservers[0] == NONE)
ic_nameservers[0] = ic_servaddr;
ic_got_reply = IC_BOOTP;
@@ -1150,7 +1151,7 @@ static int __init ic_dynamic(void)
#endif
if (!ic_got_reply) {
- ic_myaddr = INADDR_NONE;
+ ic_myaddr = NONE;
return -1;
}
@@ -1182,12 +1183,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"domain %s\n", ic_domain);
for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
- if (ic_nameservers[i] != INADDR_NONE)
+ if (ic_nameservers[i] != NONE)
seq_printf(seq,
"nameserver %u.%u.%u.%u\n",
NIPQUAD(ic_nameservers[i]));
}
- if (ic_servaddr != INADDR_NONE)
+ if (ic_servaddr != NONE)
seq_printf(seq,
"bootserver %u.%u.%u.%u\n",
NIPQUAD(ic_servaddr));
@@ -1213,9 +1214,9 @@ static struct file_operations pnp_seq_fops = {
* need to have root_server_addr set _before_ IPConfig gets called as it
* can override it.
*/
-u32 __init root_nfs_parse_addr(char *name)
+__be32 __init root_nfs_parse_addr(char *name)
{
- u32 addr;
+ __be32 addr;
int octets = 0;
char *cp, *cq;
@@ -1237,7 +1238,7 @@ u32 __init root_nfs_parse_addr(char *name)
addr = in_aton(name);
memmove(name, cp, strlen(cp) + 1);
} else
- addr = INADDR_NONE;
+ addr = NONE;
return addr;
}
@@ -1248,7 +1249,7 @@ u32 __init root_nfs_parse_addr(char *name)
static int __init ip_auto_config(void)
{
- u32 addr;
+ __be32 addr;
#ifdef CONFIG_PROC_FS
proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops);
@@ -1277,11 +1278,11 @@ static int __init ip_auto_config(void)
* interfaces and no default was set), use BOOTP or RARP to get the
* missing values.
*/
- if (ic_myaddr == INADDR_NONE ||
+ if (ic_myaddr == NONE ||
#ifdef CONFIG_ROOT_NFS
(MAJOR(ROOT_DEV) == UNNAMED_MAJOR
- && root_server_addr == INADDR_NONE
- && ic_servaddr == INADDR_NONE) ||
+ && root_server_addr == NONE
+ && ic_servaddr == NONE) ||
#endif
ic_first_dev->next) {
#ifdef IPCONFIG_DYNAMIC
@@ -1334,7 +1335,7 @@ static int __init ip_auto_config(void)
}
addr = root_nfs_parse_addr(root_server_path);
- if (root_server_addr == INADDR_NONE)
+ if (root_server_addr == NONE)
root_server_addr = addr;
/*
@@ -1461,19 +1462,19 @@ static int __init ip_auto_config_setup(char *addrs)
switch (num) {
case 0:
if ((ic_myaddr = in_aton(ip)) == INADDR_ANY)
- ic_myaddr = INADDR_NONE;
+ ic_myaddr = NONE;
break;
case 1:
if ((ic_servaddr = in_aton(ip)) == INADDR_ANY)
- ic_servaddr = INADDR_NONE;
+ ic_servaddr = NONE;
break;
case 2:
if ((ic_gateway = in_aton(ip)) == INADDR_ANY)
- ic_gateway = INADDR_NONE;
+ ic_gateway = NONE;
break;
case 3:
if ((ic_netmask = in_aton(ip)) == INADDR_ANY)
- ic_netmask = INADDR_NONE;
+ ic_netmask = NONE;
break;
case 4:
if ((dp = strchr(ip, '.'))) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 0c455652922..9d719d664e5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -118,7 +118,7 @@
#include <net/xfrm.h>
#define HASH_SIZE 16
-#define HASH(addr) ((addr^(addr>>4))&0xF)
+#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
static int ipip_fb_tunnel_init(struct net_device *dev);
static int ipip_tunnel_init(struct net_device *dev);
@@ -134,7 +134,7 @@ static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunne
static DEFINE_RWLOCK(ipip_lock);
-static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
+static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
@@ -160,8 +160,8 @@ static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
{
- u32 remote = t->parms.iph.daddr;
- u32 local = t->parms.iph.saddr;
+ __be32 remote = t->parms.iph.daddr;
+ __be32 local = t->parms.iph.saddr;
unsigned h = 0;
int prio = 0;
@@ -203,8 +203,8 @@ static void ipip_tunnel_link(struct ip_tunnel *t)
static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
{
- u32 remote = parms->iph.daddr;
- u32 local = parms->iph.saddr;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
unsigned h = 0;
@@ -519,13 +519,13 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
- u16 df = tiph->frag_off;
+ __be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = skb->nh.iph;
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
- u32 dst = tiph->daddr;
+ __be32 dst = tiph->daddr;
int mtu;
if (tunnel->recursion++) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 97cfa97c8ab..ecb5422ea23 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -105,7 +105,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
In this case data path is free of exclusive locks at all.
*/
-static kmem_cache_t *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __read_mostly;
static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
@@ -1493,7 +1493,7 @@ static int pim_rcv(struct sk_buff * skb)
if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
(pim->flags&PIM_NULL_REGISTER) ||
(ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
- (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
+ csum_fold(skb_checksum(skb, 0, skb->len, 0))))
goto drop;
/* check if the inner packet is destined to mcast group */
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index e7752334d29..6c40899aa16 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -80,10 +80,9 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
if (!pp->unregister_app)
return -EOPNOTSUPP;
- inc = kmalloc(sizeof(struct ip_vs_app), GFP_KERNEL);
+ inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
if (!inc)
return -ENOMEM;
- memcpy(inc, app, sizeof(*inc));
INIT_LIST_HEAD(&inc->p_list);
INIT_LIST_HEAD(&inc->incs_list);
inc->app = app;
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 8832eb517d5..8086787a2c5 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -44,7 +44,7 @@
static struct list_head *ip_vs_conn_tab;
/* SLAB cache for IPVS connections */
-static kmem_cache_t *ip_vs_conn_cachep __read_mostly;
+static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
/* counter for current IPVS connections */
static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1445bb47fea..34257520a3a 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -536,9 +536,9 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
return NF_STOP;
}
-u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
- return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+ return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
}
static inline struct sk_buff *
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index f261616e460..9b933381ebb 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -221,10 +221,10 @@ static void update_defense_level(void)
* Timer for checking the defense
*/
#define DEFENSE_TIMER_PERIOD 1*HZ
-static void defense_work_handler(void *data);
-static DECLARE_WORK(defense_work, defense_work_handler, NULL);
+static void defense_work_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
-static void defense_work_handler(void *data)
+static void defense_work_handler(struct work_struct *work)
{
update_defense_level();
if (atomic_read(&ip_vs_dropentry))
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 6d398f10aa9..687c1de1146 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -200,7 +200,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
from = n_cp->vaddr;
port = n_cp->vport;
sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
- ntohs(port)&255, (ntohs(port)>>8)&255);
+ (ntohs(port)>>8)&255, ntohs(port)&255);
buf_len = strlen(buf);
/*
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 524751e031d..a4385a2180e 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -45,6 +45,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
+#include <linux/jiffies.h>
/* for sysctl */
#include <linux/fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 08990192b6e..fe1af5d079a 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -43,6 +43,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
+#include <linux/jiffies.h>
/* for sysctl */
#include <linux/fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index c4528b5c800..e844ddb82b9 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -118,13 +118,7 @@ void ip_vs_protocol_timeout_change(int flags)
int *
ip_vs_create_timeout_table(int *table, int size)
{
- int *t;
-
- t = kmalloc(size, GFP_ATOMIC);
- if (t == NULL)
- return NULL;
- memcpy(t, table, size);
- return t;
+ return kmemdup(table, size, GFP_ATOMIC);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bfe779e7459..16a9ebee2fe 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -84,7 +84,7 @@ tcp_conn_schedule(struct sk_buff *skb,
}
if (th->syn &&
- (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
+ (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
skb->nh.iph->daddr, th->dest))) {
if (ip_vs_todrop()) {
/*
@@ -116,9 +116,9 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
__be16 oldport, __be16 newport)
{
tcph->check =
- ip_vs_check_diff(~oldip, newip,
- ip_vs_check_diff(oldport ^ htonl(0xFFFF),
- newport, tcph->check));
+ csum_fold(ip_vs_check_diff4(oldip, newip,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(tcph->check))));
}
@@ -490,16 +490,18 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
static DEFINE_SPINLOCK(tcp_app_lock);
-static inline __u16 tcp_app_hashkey(__u16 port)
+static inline __u16 tcp_app_hashkey(__be16 port)
{
- return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK;
+ return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
+ & TCP_APP_TAB_MASK;
}
static int tcp_register_app(struct ip_vs_app *inc)
{
struct ip_vs_app *i;
- __u16 hash, port = inc->port;
+ __u16 hash;
+ __be16 port = inc->port;
int ret = 0;
hash = tcp_app_hashkey(port);
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 54aa7603591..03f0a414cfa 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -89,7 +89,7 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
}
- if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
+ if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
skb->nh.iph->daddr, uh->dest))) {
if (ip_vs_todrop()) {
/*
@@ -121,11 +121,11 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
__be16 oldport, __be16 newport)
{
uhdr->check =
- ip_vs_check_diff(~oldip, newip,
- ip_vs_check_diff(oldport ^ htonl(0xFFFF),
- newport, uhdr->check));
+ csum_fold(ip_vs_check_diff4(oldip, newip,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(uhdr->check))));
if (!uhdr->check)
- uhdr->check = htonl(0xFFFF);
+ uhdr->check = CSUM_MANGLED_0;
}
static int
@@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb,
cp->protocol,
(*pskb)->csum);
if (udph->check == 0)
- udph->check = htonl(0xFFFF);
+ udph->check = CSUM_MANGLED_0;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, udph->check,
(char*)&(udph->check) - (char*)udph);
@@ -228,7 +228,7 @@ udp_dnat_handler(struct sk_buff **pskb,
cp->protocol,
(*pskb)->csum);
if (udph->check == 0)
- udph->check = 0xFFFF;
+ udph->check = CSUM_MANGLED_0;
(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
@@ -282,16 +282,18 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
static struct list_head udp_apps[UDP_APP_TAB_SIZE];
static DEFINE_SPINLOCK(udp_app_lock);
-static inline __u16 udp_app_hashkey(__u16 port)
+static inline __u16 udp_app_hashkey(__be16 port)
{
- return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
+ return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
+ & UDP_APP_TAB_MASK;
}
static int udp_register_app(struct ip_vs_app *inc)
{
struct ip_vs_app *i;
- __u16 hash, port = inc->port;
+ __u16 hash;
+ __be16 port = inc->port;
int ret = 0;
hash = udp_app_hashkey(port);
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 91a075edd68..7ea2d981a93 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -657,7 +657,7 @@ static void sync_master_loop(void)
if (stop_master_sync)
break;
- ssleep(1);
+ msleep_interruptible(1000);
}
/* clean up the sync_buff queue */
@@ -714,7 +714,7 @@ static void sync_backup_loop(void)
if (stop_backup_sync)
break;
- ssleep(1);
+ msleep_interruptible(1000);
}
/* release the sending multicast socket */
@@ -826,7 +826,7 @@ static int fork_sync_thread(void *startup)
if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
IP_VS_ERR("could not create sync_thread due to %d... "
"retrying.\n", pid);
- ssleep(1);
+ msleep_interruptible(1000);
goto repeat;
}
@@ -849,10 +849,12 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
ip_vs_sync_state |= state;
if (state == IP_VS_STATE_MASTER) {
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, sizeof(ip_vs_master_mcast_ifn));
+ strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_master_mcast_ifn));
ip_vs_master_syncid = syncid;
} else {
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, sizeof(ip_vs_backup_mcast_ifn));
+ strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_backup_mcast_ifn));
ip_vs_backup_syncid = syncid;
}
@@ -860,7 +862,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) {
IP_VS_ERR("could not create fork_sync_thread due to %d... "
"retrying.\n", pid);
- ssleep(1);
+ msleep_interruptible(1000);
goto repeat;
}
@@ -880,7 +882,8 @@ int stop_sync_thread(int state)
IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
IP_VS_INFO("stopping sync thread %d ...\n",
- (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid);
+ (state == IP_VS_STATE_MASTER) ?
+ sync_master_pid : sync_backup_pid);
__set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&stop_sync_wait, &wait);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index e2005c6810a..a68966059b5 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -27,9 +27,7 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
-#endif
+ fl.mark = (*pskb)->mark;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
@@ -164,17 +162,17 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
return 0;
}
-unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
struct iphdr *iph = skb->nh.iph;
- unsigned int csum = 0;
+ __sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
break;
- if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
+ if ((protocol == 0 && !csum_fold(skb->csum)) ||
!csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - dataoff, protocol,
skb->csum)) {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d88c292f118..f6026d4ac42 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -6,7 +6,7 @@ menu "IP: Netfilter Configuration"
depends on INET && NETFILTER
config NF_CONNTRACK_IPV4
- tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
+ tristate "IPv4 connection tracking support (required for NAT) (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_CONNTRACK
---help---
Connection tracking keeps a record of what packets have passed
@@ -19,21 +19,18 @@ config NF_CONNTRACK_IPV4
To compile it as a module, choose M here. If unsure, say N.
-# connection tracking, helpers and protocols
-config IP_NF_CONNTRACK
- tristate "Connection tracking (required for masq/NAT)"
- ---help---
- Connection tracking keeps a record of what packets have passed
- through your machine, in order to figure out how they are related
- into connections.
-
- This is required to do Masquerading or other kinds of Network
- Address Translation (except for Fast NAT). It can also be used to
- enhance packet filtering (see `Connection state match support'
- below).
+config NF_CONNTRACK_PROC_COMPAT
+ bool "proc/sysctl compatibility with old connection tracking"
+ depends on NF_CONNTRACK_IPV4
+ default y
+ help
+ This option enables /proc and sysctl compatibility with the old
+ layer 3 dependant connection tracking. This is needed to keep
+ old programs that have not been adapted to the new names working.
- To compile it as a module, choose M here. If unsure, say N.
+ If unsure, say Y.
+# connection tracking, helpers and protocols
config IP_NF_CT_ACCT
bool "Connection tracking flow accounting"
depends on IP_NF_CONNTRACK
@@ -315,20 +312,6 @@ config IP_NF_MATCH_ADDRTYPE
If you want to compile it as a module, say M here and read
<file:Documentation/modules.txt>. If unsure, say `N'.
-config IP_NF_MATCH_HASHLIMIT
- tristate 'hashlimit match support'
- depends on IP_NF_IPTABLES
- help
- This option adds a new iptables `hashlimit' match.
-
- As opposed to `limit', this match dynamically creates a hash table
- of limit buckets, based on your selection of source/destination
- ip addresses and/or ports.
-
- It enables you to express policies like `10kpps for any given
- destination IP' or `500pps from any given source IP' with a single
- IPtables rule.
-
# `filter', generic and specific targets
config IP_NF_FILTER
tristate "Packet filtering"
@@ -404,7 +387,7 @@ config IP_NF_TARGET_TCPMSS
To compile it as a module, choose M here. If unsure, say N.
-# NAT + specific targets
+# NAT + specific targets: ip_conntrack
config IP_NF_NAT
tristate "Full NAT"
depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
@@ -415,14 +398,30 @@ config IP_NF_NAT
To compile it as a module, choose M here. If unsure, say N.
+# NAT + specific targets: nf_conntrack
+config NF_NAT
+ tristate "Full NAT"
+ depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4
+ help
+ The Full NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. It is controlled by
+ the `nat' table in iptables: see the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_NF_NAT_NEEDED
bool
- depends on IP_NF_NAT != n
+ depends on IP_NF_NAT
+ default y
+
+config NF_NAT_NEEDED
+ bool
+ depends on NF_NAT
default y
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- depends on IP_NF_NAT
+ depends on (NF_NAT || IP_NF_NAT)
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
@@ -434,7 +433,7 @@ config IP_NF_TARGET_MASQUERADE
config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
- depends on IP_NF_NAT
+ depends on (NF_NAT || IP_NF_NAT)
help
REDIRECT is a special case of NAT: all incoming connections are
mapped onto the incoming interface's address, causing the packets to
@@ -445,7 +444,7 @@ config IP_NF_TARGET_REDIRECT
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
- depends on IP_NF_NAT
+ depends on (NF_NAT || IP_NF_NAT)
help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
@@ -456,7 +455,7 @@ config IP_NF_TARGET_NETMAP
config IP_NF_TARGET_SAME
tristate "SAME target support"
- depends on IP_NF_NAT
+ depends on (NF_NAT || IP_NF_NAT)
help
This option adds a `SAME' target, which works like the standard SNAT
target, but attempts to give clients the same IP for all connections.
@@ -478,19 +477,52 @@ config IP_NF_NAT_SNMP_BASIC
To compile it as a module, choose M here. If unsure, say N.
+config NF_NAT_SNMP_BASIC
+ tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_NAT
+ ---help---
+
+ This module implements an Application Layer Gateway (ALG) for
+ SNMP payloads. In conjunction with NAT, it allows a network
+ management system to access multiple private networks with
+ conflicting addresses. It works by modifying IP addresses
+ inside SNMP payloads to match IP-layer NAT mapping.
+
+ This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
+# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker.
+# From kconfig-language.txt:
+#
+# <expr> '&&' <expr> (6)
+#
+# (6) Returns the result of min(/expr/, /expr/).
+config NF_NAT_PROTO_GRE
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_GRE
+
+config IP_NF_NAT_FTP
+ tristate
+ depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT
+ default IP_NF_NAT && IP_NF_FTP
+
+config NF_NAT_FTP
+ tristate
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_FTP
+
config IP_NF_NAT_IRC
tristate
depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
default IP_NF_NAT if IP_NF_IRC=y
default m if IP_NF_IRC=m
-# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
-# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. Argh.
-config IP_NF_NAT_FTP
+config NF_NAT_IRC
tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_FTP=y
- default m if IP_NF_FTP=m
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_IRC
config IP_NF_NAT_TFTP
tristate
@@ -498,30 +530,56 @@ config IP_NF_NAT_TFTP
default IP_NF_NAT if IP_NF_TFTP=y
default m if IP_NF_TFTP=m
+config NF_NAT_TFTP
+ tristate
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_TFTP
+
config IP_NF_NAT_AMANDA
tristate
depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
default IP_NF_NAT if IP_NF_AMANDA=y
default m if IP_NF_AMANDA=m
+config NF_NAT_AMANDA
+ tristate
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_AMANDA
+
config IP_NF_NAT_PPTP
tristate
depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
default IP_NF_NAT if IP_NF_PPTP=y
default m if IP_NF_PPTP=m
+config NF_NAT_PPTP
+ tristate
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_PPTP
+ select NF_NAT_PROTO_GRE
+
config IP_NF_NAT_H323
tristate
depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
default IP_NF_NAT if IP_NF_H323=y
default m if IP_NF_H323=m
+config NF_NAT_H323
+ tristate
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_H323
+
config IP_NF_NAT_SIP
tristate
depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
default IP_NF_NAT if IP_NF_SIP=y
default m if IP_NF_SIP=m
+config NF_NAT_SIP
+ tristate
+ depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_SIP
+
# mangle + specific targets
config IP_NF_MANGLE
tristate "Packet mangling"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 09aaed1a806..15e741aeb29 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -5,17 +5,23 @@
# objects for the standalone - connection tracking / NAT
ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
+ifneq ($(CONFIG_NF_NAT),)
+iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
+else
iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
+endif
ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
-ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ip_conntrack_helper_h323_asn1.o
+ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o
ip_nat_h323-objs := ip_nat_helper_h323.o
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
+obj-$(CONFIG_NF_NAT) += nf_nat.o
# conntrack netlink interface
obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -34,7 +40,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o
obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
-# NAT helpers
+# NAT helpers (ip_conntrack)
obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
@@ -43,6 +49,19 @@ obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o
+# NAT helpers (nf_conntrack)
+obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
+obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
+obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
+obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
+obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
+obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
+obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
+obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
+
+# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
@@ -50,10 +69,10 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
-obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
@@ -89,6 +108,11 @@ obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
# objects for l3 independent conntrack
nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
+ifeq ($(CONFIG_PROC_FS),y)
+nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
+endif
+endif
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0849f1cced1..9aa22398b3d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -358,6 +358,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
for (;;) {
struct arpt_standard_target *t
= (void *)arpt_get_target(e);
+ int visited = e->comefrom & (1 << hook);
if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
printk("arptables: loop hook %u pos %u %08X.\n",
@@ -368,13 +369,20 @@ static int mark_source_chains(struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
/* Unconditional return/END. */
- if (e->target_offset == sizeof(struct arpt_entry)
+ if ((e->target_offset == sizeof(struct arpt_entry)
&& (strcmp(t->target.u.user.name,
ARPT_STANDARD_TARGET) == 0)
&& t->verdict < 0
- && unconditional(&e->arp)) {
+ && unconditional(&e->arp)) || visited) {
unsigned int oldpos, size;
+ if (t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
/* Return: backtrack through the last
* big jump.
*/
@@ -404,6 +412,14 @@ static int mark_source_chains(struct xt_table_info *newinfo,
if (strcmp(t->target.u.user.name,
ARPT_STANDARD_TARGET) == 0
&& newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct arpt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -426,8 +442,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
static inline int standard_check(const struct arpt_entry_target *t,
unsigned int max_offset)
{
- struct arpt_standard_target *targ = (void *)t;
-
/* Check standard info. */
if (t->u.target_size
!= ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
@@ -437,18 +451,6 @@ static inline int standard_check(const struct arpt_entry_target *t,
return 0;
}
- if (targ->verdict >= 0
- && targ->verdict > max_offset - sizeof(struct arpt_entry)) {
- duprintf("arpt_standard_check: bad verdict (%i)\n",
- targ->verdict);
- return 0;
- }
-
- if (targ->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("arpt_standard_check: bad negative verdict (%i)\n",
- targ->verdict);
- return 0;
- }
return 1;
}
@@ -466,7 +468,13 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
return -EINVAL;
}
+ if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
+ return -EINVAL;
+
t = arpt_get_target(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
t->u.user.revision),
"arpt_%s", t->u.user.name);
@@ -633,7 +641,7 @@ static int translate_table(const char *name,
if (ret != 0) {
ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ cleanup_entry, &i);
return ret;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index 6c7383a8e42..ad246ba7790 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -92,6 +92,7 @@ static int help(struct sk_buff **pskb,
char pbuf[sizeof("65535")], *tmp;
u_int16_t port, len;
int ret = NF_ACCEPT;
+ typeof(ip_nat_amanda_hook) ip_nat_amanda;
/* Only look at packets from the Amanda server */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
@@ -161,9 +162,11 @@ static int help(struct sk_buff **pskb,
exp->mask.dst.protonum = 0xFF;
exp->mask.dst.u.tcp.port = htons(0xFFFF);
- if (ip_nat_amanda_hook)
- ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff,
- len, exp);
+ /* RCU read locked by nf_hook_slow */
+ ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook);
+ if (ip_nat_amanda)
+ ret = ip_nat_amanda(pskb, ctinfo, off - dataoff,
+ len, exp);
else if (ip_conntrack_expect_related(exp) != 0)
ret = NF_DROP;
ip_conntrack_expect_put(exp);
@@ -180,7 +183,7 @@ static struct ip_conntrack_helper amanda_helper = {
.help = help,
.name = "amanda",
- .tuple = { .src = { .u = { __constant_htons(10080) } },
+ .tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } },
.dst = { .protonum = IPPROTO_UDP },
},
.mask = { .src = { .u = { 0xFFFF } },
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 143c4668538..8556a4f4f60 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -40,9 +40,6 @@
/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
@@ -68,8 +65,8 @@ static LIST_HEAD(helpers);
unsigned int ip_conntrack_htable_size __read_mostly = 0;
int ip_conntrack_max __read_mostly;
struct list_head *ip_conntrack_hash __read_mostly;
-static kmem_cache_t *ip_conntrack_cachep __read_mostly;
-static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
+static struct kmem_cache *ip_conntrack_cachep __read_mostly;
+static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
struct ip_conntrack ip_conntrack_untracked;
unsigned int ip_ct_log_invalid __read_mostly;
static LIST_HEAD(unconfirmed);
@@ -201,7 +198,6 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
/* ip_conntrack_expect helper functions */
void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
{
- ASSERT_WRITE_LOCK(&ip_conntrack_lock);
IP_NF_ASSERT(!timer_pending(&exp->timeout));
list_del(&exp->list);
CONNTRACK_STAT_INC(expect_delete);
@@ -225,22 +221,22 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
struct ip_conntrack_expect *i;
list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
- atomic_inc(&i->use);
+ if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
return i;
- }
}
return NULL;
}
/* Just find a expectation corresponding to a tuple. */
struct ip_conntrack_expect *
-ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
+ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
{
struct ip_conntrack_expect *i;
read_lock_bh(&ip_conntrack_lock);
i = __ip_conntrack_expect_find(tuple);
+ if (i)
+ atomic_inc(&i->use);
read_unlock_bh(&ip_conntrack_lock);
return i;
@@ -294,7 +290,6 @@ static void
clean_from_lists(struct ip_conntrack *ct)
{
DEBUGP("clean_from_lists(%p)\n", ct);
- ASSERT_WRITE_LOCK(&ip_conntrack_lock);
list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
@@ -373,7 +368,6 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_tuple_hash *h;
unsigned int hash = hash_conntrack(tuple);
- ASSERT_READ_LOCK(&ip_conntrack_lock);
list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
if (tuplehash_to_ctrack(h) != ignored_conntrack &&
ip_ct_tuple_equal(tuple, &h->tuple)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 93dcf960662..0410c99caca 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -310,6 +310,7 @@ static int help(struct sk_buff **pskb,
struct ip_conntrack_expect *exp;
unsigned int i;
int found = 0, ends_in_nl;
+ typeof(ip_nat_ftp_hook) ip_nat_ftp;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED
@@ -433,9 +434,10 @@ static int help(struct sk_buff **pskb,
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
- if (ip_nat_ftp_hook)
- ret = ip_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype,
- matchoff, matchlen, exp, &seq);
+ ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook);
+ if (ip_nat_ftp)
+ ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
+ matchoff, matchlen, exp, &seq);
else {
/* Can't expect this? Best to drop packet now. */
if (ip_conntrack_expect_related(exp) != 0)
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index 7b7441202bf..aabfe1c0690 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -237,6 +237,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
u_int16_t rtp_port;
struct ip_conntrack_expect *rtp_exp;
struct ip_conntrack_expect *rtcp_exp;
+ typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
/* Read RTP or RTCP address */
if (!get_h245_addr(*data, addr, &ip, &port) ||
@@ -279,11 +280,11 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
rtcp_exp->flags = 0;
if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip && nat_rtp_rtcp_hook) {
+ ct->tuplehash[!dir].tuple.dst.ip &&
+ (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) {
/* NAT needed */
- ret = nat_rtp_rtcp_hook(pskb, ct, ctinfo, data, dataoff,
- addr, port, rtp_port, rtp_exp,
- rtcp_exp);
+ ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ addr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
rtp_exp->expectfn = NULL;
rtcp_exp->expectfn = NULL;
@@ -328,6 +329,7 @@ static int expect_t120(struct sk_buff **pskb,
__be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
+ typeof(nat_t120_hook) nat_t120;
/* Read T.120 address */
if (!get_h245_addr(*data, addr, &ip, &port) ||
@@ -350,10 +352,11 @@ static int expect_t120(struct sk_buff **pskb,
exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */
if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip && nat_t120_hook) {
+ ct->tuplehash[!dir].tuple.dst.ip &&
+ (nat_t120 = rcu_dereference(nat_t120_hook))) {
/* NAT needed */
- ret = nat_t120_hook(pskb, ct, ctinfo, data, dataoff, addr,
- port, exp);
+ ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr,
+ port, exp);
} else { /* Conntrack only */
exp->expectfn = NULL;
if (ip_conntrack_expect_related(exp) == 0) {
@@ -651,6 +654,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
__be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
+ typeof(nat_h245_hook) nat_h245;
/* Read h245Address */
if (!get_h225_addr(*data, addr, &ip, &port) ||
@@ -673,10 +677,11 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->flags = 0;
if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip && nat_h245_hook) {
+ ct->tuplehash[!dir].tuple.dst.ip &&
+ (nat_h245 = rcu_dereference(nat_h245_hook))) {
/* NAT needed */
- ret = nat_h245_hook(pskb, ct, ctinfo, data, dataoff, addr,
- port, exp);
+ ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr,
+ port, exp);
} else { /* Conntrack only */
exp->expectfn = ip_conntrack_h245_expect;
@@ -712,6 +717,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
__be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp = NULL;
+ typeof(nat_callforwarding_hook) nat_callforwarding;
/* Read alternativeAddress */
if (!get_h225_addr(*data, addr, &ip, &port) || port == 0)
@@ -759,10 +765,11 @@ static int expect_callforwarding(struct sk_buff **pskb,
exp->flags = 0;
if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip && nat_callforwarding_hook) {
+ ct->tuplehash[!dir].tuple.dst.ip &&
+ (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) {
/* Need NAT */
- ret = nat_callforwarding_hook(pskb, ct, ctinfo, data, dataoff,
- addr, port, exp);
+ ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
+ addr, port, exp);
} else { /* Conntrack only */
exp->expectfn = ip_conntrack_q931_expect;
@@ -793,6 +800,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
int i;
__be32 ip;
u_int16_t port;
+ typeof(set_h225_addr_hook) set_h225_addr;
DEBUGP("ip_ct_q931: Setup\n");
@@ -803,8 +811,10 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
return -1;
}
+ set_h225_addr = rcu_dereference(set_h225_addr_hook);
+
if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
- (set_h225_addr_hook) &&
+ (set_h225_addr) &&
get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) &&
ip != ct->tuplehash[!dir].tuple.src.ip) {
DEBUGP("ip_ct_q931: set destCallSignalAddress "
@@ -812,17 +822,17 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
NIPQUAD(ip), port,
NIPQUAD(ct->tuplehash[!dir].tuple.src.ip),
ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr_hook(pskb, data, dataoff,
- &setup->destCallSignalAddress,
- ct->tuplehash[!dir].tuple.src.ip,
- ntohs(ct->tuplehash[!dir].tuple.src.
- u.tcp.port));
+ ret = set_h225_addr(pskb, data, dataoff,
+ &setup->destCallSignalAddress,
+ ct->tuplehash[!dir].tuple.src.ip,
+ ntohs(ct->tuplehash[!dir].tuple.src.
+ u.tcp.port));
if (ret < 0)
return -1;
}
if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
- (set_h225_addr_hook) &&
+ (set_h225_addr) &&
get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port)
&& ip != ct->tuplehash[!dir].tuple.dst.ip) {
DEBUGP("ip_ct_q931: set sourceCallSignalAddress "
@@ -830,11 +840,11 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
NIPQUAD(ip), port,
NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr_hook(pskb, data, dataoff,
- &setup->sourceCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- ntohs(ct->tuplehash[!dir].tuple.dst.
- u.tcp.port));
+ ret = set_h225_addr(pskb, data, dataoff,
+ &setup->sourceCallSignalAddress,
+ ct->tuplehash[!dir].tuple.dst.ip,
+ ntohs(ct->tuplehash[!dir].tuple.dst.
+ u.tcp.port));
if (ret < 0)
return -1;
}
@@ -1153,7 +1163,7 @@ static struct ip_conntrack_helper ip_conntrack_helper_q931 = {
.me = THIS_MODULE,
.max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ ,
.timeout = 240,
- .tuple = {.src = {.u = {__constant_htons(Q931_PORT)}},
+ .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}},
.dst = {.protonum = IPPROTO_TCP}},
.mask = {.src = {.u = {0xFFFF}},
.dst = {.protonum = 0xFF}},
@@ -1231,6 +1241,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
__be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp;
+ typeof(nat_q931_hook) nat_q931;
/* Look for the first related address */
for (i = 0; i < count; i++) {
@@ -1258,9 +1269,9 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
exp->mask.dst.protonum = 0xFF;
exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */
- if (nat_q931_hook) { /* Need NAT */
- ret = nat_q931_hook(pskb, ct, ctinfo, data, addr, i,
- port, exp);
+ nat_q931 = rcu_dereference(nat_q931_hook);
+ if (nat_q931) { /* Need NAT */
+ ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp);
} else { /* Conntrack only */
exp->expectfn = ip_conntrack_q931_expect;
@@ -1288,11 +1299,14 @@ static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, GatekeeperRequest * grq)
{
+ typeof(set_ras_addr_hook) set_ras_addr;
+
DEBUGP("ip_ct_ras: GRQ\n");
- if (set_ras_addr_hook) /* NATed */
- return set_ras_addr_hook(pskb, ct, ctinfo, data,
- &grq->rasAddress, 1);
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr) /* NATed */
+ return set_ras_addr(pskb, ct, ctinfo, data,
+ &grq->rasAddress, 1);
return 0;
}
@@ -1362,6 +1376,7 @@ static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
{
struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
int ret;
+ typeof(set_ras_addr_hook) set_ras_addr;
DEBUGP("ip_ct_ras: RRQ\n");
@@ -1371,10 +1386,11 @@ static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
if (ret < 0)
return -1;
- if (set_ras_addr_hook) {
- ret = set_ras_addr_hook(pskb, ct, ctinfo, data,
- rrq->rasAddress.item,
- rrq->rasAddress.count);
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr) {
+ ret = set_ras_addr(pskb, ct, ctinfo, data,
+ rrq->rasAddress.item,
+ rrq->rasAddress.count);
if (ret < 0)
return -1;
}
@@ -1397,13 +1413,15 @@ static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
int dir = CTINFO2DIR(ctinfo);
int ret;
struct ip_conntrack_expect *exp;
+ typeof(set_sig_addr_hook) set_sig_addr;
DEBUGP("ip_ct_ras: RCF\n");
- if (set_sig_addr_hook) {
- ret = set_sig_addr_hook(pskb, ct, ctinfo, data,
- rcf->callSignalAddress.item,
- rcf->callSignalAddress.count);
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr) {
+ ret = set_sig_addr(pskb, ct, ctinfo, data,
+ rcf->callSignalAddress.item,
+ rcf->callSignalAddress.count);
if (ret < 0)
return -1;
}
@@ -1417,7 +1435,7 @@ static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
DEBUGP
("ip_ct_ras: set RAS connection timeout to %u seconds\n",
info->timeout);
- ip_ct_refresh_acct(ct, ctinfo, NULL, info->timeout * HZ);
+ ip_ct_refresh(ct, *pskb, info->timeout * HZ);
/* Set expect timeout */
read_lock_bh(&ip_conntrack_lock);
@@ -1448,13 +1466,15 @@ static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
int dir = CTINFO2DIR(ctinfo);
int ret;
+ typeof(set_sig_addr_hook) set_sig_addr;
DEBUGP("ip_ct_ras: URQ\n");
- if (set_sig_addr_hook) {
- ret = set_sig_addr_hook(pskb, ct, ctinfo, data,
- urq->callSignalAddress.item,
- urq->callSignalAddress.count);
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr) {
+ ret = set_sig_addr(pskb, ct, ctinfo, data,
+ urq->callSignalAddress.item,
+ urq->callSignalAddress.count);
if (ret < 0)
return -1;
}
@@ -1465,7 +1485,7 @@ static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
info->sig_port[!dir] = 0;
/* Give it 30 seconds for UCF or URJ */
- ip_ct_refresh_acct(ct, ctinfo, NULL, 30 * HZ);
+ ip_ct_refresh(ct, *pskb, 30 * HZ);
return 0;
}
@@ -1479,28 +1499,30 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
int dir = CTINFO2DIR(ctinfo);
__be32 ip;
u_int16_t port;
+ typeof(set_h225_addr_hook) set_h225_addr;
DEBUGP("ip_ct_ras: ARQ\n");
+ set_h225_addr = rcu_dereference(set_h225_addr_hook);
if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) &&
ip == ct->tuplehash[dir].tuple.src.ip &&
- port == info->sig_port[dir] && set_h225_addr_hook) {
+ port == info->sig_port[dir] && set_h225_addr) {
/* Answering ARQ */
- return set_h225_addr_hook(pskb, data, 0,
- &arq->destCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- info->sig_port[!dir]);
+ return set_h225_addr(pskb, data, 0,
+ &arq->destCallSignalAddress,
+ ct->tuplehash[!dir].tuple.dst.ip,
+ info->sig_port[!dir]);
}
if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr_hook) {
+ ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) {
/* Calling ARQ */
- return set_h225_addr_hook(pskb, data, 0,
- &arq->srcCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- port);
+ return set_h225_addr(pskb, data, 0,
+ &arq->srcCallSignalAddress,
+ ct->tuplehash[!dir].tuple.dst.ip,
+ port);
}
return 0;
@@ -1516,6 +1538,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
__be32 ip;
u_int16_t port;
struct ip_conntrack_expect *exp;
+ typeof(set_sig_addr_hook) set_sig_addr;
DEBUGP("ip_ct_ras: ACF\n");
@@ -1523,10 +1546,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
return 0;
if (ip == ct->tuplehash[dir].tuple.dst.ip) { /* Answering ACF */
- if (set_sig_addr_hook)
- return set_sig_addr_hook(pskb, ct, ctinfo, data,
- &acf->destCallSignalAddress,
- 1);
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr)
+ return set_sig_addr(pskb, ct, ctinfo, data,
+ &acf->destCallSignalAddress, 1);
return 0;
}
@@ -1566,11 +1589,14 @@ static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, LocationRequest * lrq)
{
+ typeof(set_ras_addr_hook) set_ras_addr;
+
DEBUGP("ip_ct_ras: LRQ\n");
- if (set_ras_addr_hook)
- return set_ras_addr_hook(pskb, ct, ctinfo, data,
- &lrq->replyAddress, 1);
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr)
+ return set_ras_addr(pskb, ct, ctinfo, data,
+ &lrq->replyAddress, 1);
return 0;
}
@@ -1629,20 +1655,24 @@ static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct,
unsigned char **data, InfoRequestResponse * irr)
{
int ret;
+ typeof(set_ras_addr_hook) set_ras_addr;
+ typeof(set_sig_addr_hook) set_sig_addr;
DEBUGP("ip_ct_ras: IRR\n");
- if (set_ras_addr_hook) {
- ret = set_ras_addr_hook(pskb, ct, ctinfo, data,
- &irr->rasAddress, 1);
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr) {
+ ret = set_ras_addr(pskb, ct, ctinfo, data,
+ &irr->rasAddress, 1);
if (ret < 0)
return -1;
}
- if (set_sig_addr_hook) {
- ret = set_sig_addr_hook(pskb, ct, ctinfo, data,
- irr->callSignalAddress.item,
- irr->callSignalAddress.count);
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr) {
+ ret = set_sig_addr(pskb, ct, ctinfo, data,
+ irr->callSignalAddress.item,
+ irr->callSignalAddress.count);
if (ret < 0)
return -1;
}
@@ -1746,7 +1776,7 @@ static struct ip_conntrack_helper ip_conntrack_helper_ras = {
.me = THIS_MODULE,
.max_expected = 32,
.timeout = 240,
- .tuple = {.src = {.u = {__constant_htons(RAS_PORT)}},
+ .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}},
.dst = {.protonum = IPPROTO_UDP}},
.mask = {.src = {.u = {0xFFFE}},
.dst = {.protonum = 0xFF}},
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index a2af5e0c7f9..4d19373bbf0 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -124,6 +124,8 @@ EXPORT_SYMBOL(pptp_msg_name);
static void pptp_expectfn(struct ip_conntrack *ct,
struct ip_conntrack_expect *exp)
{
+ typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn;
+
DEBUGP("increasing timeouts\n");
/* increase timeout of GRE data channel conntrack entry */
@@ -133,7 +135,9 @@ static void pptp_expectfn(struct ip_conntrack *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- if (!ip_nat_pptp_hook_expectfn) {
+ rcu_read_lock();
+ ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn);
+ if (!ip_nat_pptp_expectfn) {
struct ip_conntrack_tuple inv_t;
struct ip_conntrack_expect *exp_other;
@@ -142,7 +146,7 @@ static void pptp_expectfn(struct ip_conntrack *ct,
DEBUGP("trying to unexpect other dir: ");
DUMP_TUPLE(&inv_t);
- exp_other = ip_conntrack_expect_find(&inv_t);
+ exp_other = ip_conntrack_expect_find_get(&inv_t);
if (exp_other) {
/* delete other expectation. */
DEBUGP("found\n");
@@ -153,8 +157,9 @@ static void pptp_expectfn(struct ip_conntrack *ct,
}
} else {
/* we need more than simple inversion */
- ip_nat_pptp_hook_expectfn(ct, exp);
+ ip_nat_pptp_expectfn(ct, exp);
}
+ rcu_read_unlock();
}
static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
@@ -176,7 +181,7 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
ip_conntrack_put(sibling);
return 1;
} else {
- exp = ip_conntrack_expect_find(t);
+ exp = ip_conntrack_expect_find_get(t);
if (exp) {
DEBUGP("unexpect_related of expect %p\n", exp);
ip_conntrack_unexpect_related(exp);
@@ -226,6 +231,7 @@ exp_gre(struct ip_conntrack *ct,
{
struct ip_conntrack_expect *exp_orig, *exp_reply;
int ret = 1;
+ typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre;
exp_orig = ip_conntrack_expect_alloc(ct);
if (exp_orig == NULL)
@@ -262,8 +268,9 @@ exp_gre(struct ip_conntrack *ct,
exp_reply->tuple.dst.u.gre.key = peer_callid;
exp_reply->tuple.dst.protonum = IPPROTO_GRE;
- if (ip_nat_pptp_hook_exp_gre)
- ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+ ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre);
+ if (ip_nat_pptp_exp_gre)
+ ip_nat_pptp_exp_gre(exp_orig, exp_reply);
if (ip_conntrack_expect_related(exp_orig) != 0)
goto out_put_both;
if (ip_conntrack_expect_related(exp_reply) != 0)
@@ -303,6 +310,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
u_int16_t msg;
__be16 cid = 0, pcid = 0;
+ typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound;
msg = ntohs(ctlh->messageType);
DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
@@ -402,9 +410,9 @@ pptp_inbound_pkt(struct sk_buff **pskb,
goto invalid;
}
- if (ip_nat_pptp_hook_inbound)
- return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
- pptpReq);
+ ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound);
+ if (ip_nat_pptp_inbound)
+ return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -427,6 +435,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
u_int16_t msg;
__be16 cid = 0, pcid = 0;
+ typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound;
msg = ntohs(ctlh->messageType);
DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -492,9 +501,9 @@ pptp_outbound_pkt(struct sk_buff **pskb,
goto invalid;
}
- if (ip_nat_pptp_hook_outbound)
- return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
- pptpReq);
+ ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound);
+ if (ip_nat_pptp_outbound)
+ return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 75f7c3db161..91832eca410 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -114,6 +114,7 @@ static int help(struct sk_buff **pskb,
u_int16_t dcc_port;
int i, ret = NF_ACCEPT;
char *addr_beg_p, *addr_end_p;
+ typeof(ip_nat_irc_hook) ip_nat_irc;
DEBUGP("entered\n");
@@ -222,11 +223,12 @@ static int help(struct sk_buff **pskb,
{ .tcp = { htons(0xFFFF) } }, 0xFF }});
exp->expectfn = NULL;
exp->flags = 0;
- if (ip_nat_irc_hook)
- ret = ip_nat_irc_hook(pskb, ctinfo,
- addr_beg_p - ib_ptr,
- addr_end_p - addr_beg_p,
- exp);
+ ip_nat_irc = rcu_dereference(ip_nat_irc_hook);
+ if (ip_nat_irc)
+ ret = ip_nat_irc(pskb, ctinfo,
+ addr_beg_p - ib_ptr,
+ addr_end_p - addr_beg_p,
+ exp);
else if (ip_conntrack_expect_related(exp) != 0)
ret = NF_DROP;
ip_conntrack_expect_put(exp);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 262d0d44ec1..5fcf91d617c 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -153,6 +153,7 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
return ret;
nfattr_failure:
+ ip_conntrack_proto_put(proto);
return -1;
}
@@ -319,8 +320,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
} else if (events & (IPCT_NEW | IPCT_RELATED)) {
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
- /* dump everything */
- events = ~0UL;
group = NFNLGRP_CONNTRACK_NEW;
} else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
type = IPCTNL_MSG_CT_NEW;
@@ -355,28 +354,35 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nfattr_failure;
NFA_NEST_END(skb, nest_parms);
-
- /* NAT stuff is now a status flag */
- if ((events & IPCT_STATUS || events & IPCT_NATINFO)
- && ctnetlink_dump_status(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_REFRESH
- && ctnetlink_dump_timeout(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_PROTOINFO
- && ctnetlink_dump_protoinfo(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_HELPINFO
- && ctnetlink_dump_helpinfo(skb, ct) < 0)
- goto nfattr_failure;
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
- goto nfattr_failure;
+ if (events & IPCT_DESTROY) {
+ if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+ goto nfattr_failure;
+ } else {
+ if (ctnetlink_dump_status(skb, ct) < 0)
+ goto nfattr_failure;
- if (events & IPCT_MARK
- && ctnetlink_dump_mark(skb, ct) < 0)
- goto nfattr_failure;
+ if (ctnetlink_dump_timeout(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if (events & IPCT_PROTOINFO
+ && ctnetlink_dump_protoinfo(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if ((events & IPCT_HELPER || ct->helper)
+ && ctnetlink_dump_helpinfo(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if ((events & IPCT_MARK || ct->mark)
+ && ctnetlink_dump_mark(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if (events & IPCT_COUNTER_FILLING &&
+ (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
+ goto nfattr_failure;
+ }
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, group, 0);
@@ -742,7 +748,6 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
ip_conntrack_put(ct);
return -ENOMEM;
}
- NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
IPCTNL_MSG_CT_NEW, 1, ct);
@@ -945,9 +950,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
ct->status |= IPS_CONFIRMED;
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- goto err;
+ if (cda[CTA_STATUS-1]) {
+ err = ctnetlink_change_status(ct, cda);
+ if (err < 0)
+ goto err;
+ }
if (cda[CTA_PROTOINFO-1]) {
err = ctnetlink_change_protoinfo(ct, cda);
@@ -1256,7 +1263,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- exp = ip_conntrack_expect_find(&tuple);
+ exp = ip_conntrack_expect_find_get(&tuple);
if (!exp)
return -ENOENT;
@@ -1272,8 +1279,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
goto out;
- NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
-
+
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1, exp);
@@ -1310,7 +1316,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
/* bump usage count to 2 */
- exp = ip_conntrack_expect_find(&tuple);
+ exp = ip_conntrack_expect_find_get(&tuple);
if (!exp)
return -ENOENT;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 5fe026f467d..ac1c49ef36a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -34,8 +34,6 @@
#include <linux/interrupt.h>
static DEFINE_RWLOCK(ip_ct_gre_lock);
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index f4f75995a9e..3a26d63eed8 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -52,20 +52,56 @@ unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
const char *dptr);
EXPORT_SYMBOL_GPL(ip_nat_sdp_hook);
-int ct_sip_get_info(const char *dptr, size_t dlen,
- unsigned int *matchoff,
- unsigned int *matchlen,
- struct sip_header_nfo *hnfo);
-EXPORT_SYMBOL_GPL(ct_sip_get_info);
-
-
static int digits_len(const char *dptr, const char *limit, int *shift);
static int epaddr_len(const char *dptr, const char *limit, int *shift);
static int skp_digits_len(const char *dptr, const char *limit, int *shift);
static int skp_epaddr_len(const char *dptr, const char *limit, int *shift);
-struct sip_header_nfo ct_sip_hdrs[] = {
- { /* Via header */
+struct sip_header_nfo {
+ const char *lname;
+ const char *sname;
+ const char *ln_str;
+ size_t lnlen;
+ size_t snlen;
+ size_t ln_strlen;
+ int case_sensitive;
+ int (*match_len)(const char *, const char *, int *);
+};
+
+static struct sip_header_nfo ct_sip_hdrs[] = {
+ [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */
+ .lname = "sip:",
+ .lnlen = sizeof("sip:") - 1,
+ .ln_str = ":",
+ .ln_strlen = sizeof(":") - 1,
+ .match_len = epaddr_len
+ },
+ [POS_REQ_URI] = { /* SIP request URI */
+ .lname = "sip:",
+ .lnlen = sizeof("sip:") - 1,
+ .ln_str = "@",
+ .ln_strlen = sizeof("@") - 1,
+ .match_len = epaddr_len
+ },
+ [POS_FROM] = { /* SIP From header */
+ .lname = "From:",
+ .lnlen = sizeof("From:") - 1,
+ .sname = "\r\nf:",
+ .snlen = sizeof("\r\nf:") - 1,
+ .ln_str = "sip:",
+ .ln_strlen = sizeof("sip:") - 1,
+ .match_len = skp_epaddr_len,
+ },
+ [POS_TO] = { /* SIP To header */
+ .lname = "To:",
+ .lnlen = sizeof("To:") - 1,
+ .sname = "\r\nt:",
+ .snlen = sizeof("\r\nt:") - 1,
+ .ln_str = "sip:",
+ .ln_strlen = sizeof("sip:") - 1,
+ .match_len = skp_epaddr_len,
+ },
+ [POS_VIA] = { /* SIP Via header */
.lname = "Via:",
.lnlen = sizeof("Via:") - 1,
.sname = "\r\nv:",
@@ -74,7 +110,7 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.ln_strlen = sizeof("UDP ") - 1,
.match_len = epaddr_len,
},
- { /* Contact header */
+ [POS_CONTACT] = { /* SIP Contact header */
.lname = "Contact:",
.lnlen = sizeof("Contact:") - 1,
.sname = "\r\nm:",
@@ -83,7 +119,7 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.ln_strlen = sizeof("sip:") - 1,
.match_len = skp_epaddr_len
},
- { /* Content length header */
+ [POS_CONTENT] = { /* SIP Content length header */
.lname = "Content-Length:",
.lnlen = sizeof("Content-Length:") - 1,
.sname = "\r\nl:",
@@ -92,7 +128,8 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.ln_strlen = sizeof(":") - 1,
.match_len = skp_digits_len
},
- { /* SDP media info */
+ [POS_MEDIA] = { /* SDP media info */
+ .case_sensitive = 1,
.lname = "\nm=",
.lnlen = sizeof("\nm=") - 1,
.sname = "\rm=",
@@ -101,7 +138,8 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.ln_strlen = sizeof("audio ") - 1,
.match_len = digits_len
},
- { /* SDP owner address*/
+ [POS_OWNER] = { /* SDP owner address*/
+ .case_sensitive = 1,
.lname = "\no=",
.lnlen = sizeof("\no=") - 1,
.sname = "\ro=",
@@ -110,7 +148,8 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.ln_strlen = sizeof("IN IP4 ") - 1,
.match_len = epaddr_len
},
- { /* SDP connection info */
+ [POS_CONNECTION] = { /* SDP connection info */
+ .case_sensitive = 1,
.lname = "\nc=",
.lnlen = sizeof("\nc=") - 1,
.sname = "\rc=",
@@ -119,16 +158,8 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.ln_strlen = sizeof("IN IP4 ") - 1,
.match_len = epaddr_len
},
- { /* Requests headers */
- .lname = "sip:",
- .lnlen = sizeof("sip:") - 1,
- .sname = "sip:",
- .snlen = sizeof("sip:") - 1, /* yes, i know.. ;) */
- .ln_str = "@",
- .ln_strlen = sizeof("@") - 1,
- .match_len = epaddr_len
- },
- { /* SDP version header */
+ [POS_SDP_HEADER] = { /* SDP version header */
+ .case_sensitive = 1,
.lname = "\nv=",
.lnlen = sizeof("\nv=") - 1,
.sname = "\rv=",
@@ -138,7 +169,6 @@ struct sip_header_nfo ct_sip_hdrs[] = {
.match_len = digits_len
}
};
-EXPORT_SYMBOL_GPL(ct_sip_hdrs);
/* get line lenght until first CR or LF seen. */
int ct_sip_lnlen(const char *line, const char *limit)
@@ -159,13 +189,19 @@ EXPORT_SYMBOL_GPL(ct_sip_lnlen);
/* Linear string search, case sensitive. */
const char *ct_sip_search(const char *needle, const char *haystack,
- size_t needle_len, size_t haystack_len)
+ size_t needle_len, size_t haystack_len,
+ int case_sensitive)
{
const char *limit = haystack + (haystack_len - needle_len);
while (haystack <= limit) {
- if (memcmp(haystack, needle, needle_len) == 0)
- return haystack;
+ if (case_sensitive) {
+ if (strncmp(haystack, needle, needle_len) == 0)
+ return haystack;
+ } else {
+ if (strnicmp(haystack, needle, needle_len) == 0)
+ return haystack;
+ }
haystack++;
}
return NULL;
@@ -263,8 +299,9 @@ static int skp_epaddr_len(const char *dptr, const char *limit, int *shift)
int ct_sip_get_info(const char *dptr, size_t dlen,
unsigned int *matchoff,
unsigned int *matchlen,
- struct sip_header_nfo *hnfo)
+ enum sip_header_pos pos)
{
+ struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
const char *limit, *aux, *k = dptr;
int shift = 0;
@@ -272,12 +309,14 @@ int ct_sip_get_info(const char *dptr, size_t dlen,
while (dptr <= limit) {
if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
- (strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
+ (hnfo->sname == NULL ||
+ strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
dptr++;
continue;
}
aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
- ct_sip_lnlen(dptr, limit));
+ ct_sip_lnlen(dptr, limit),
+ hnfo->case_sensitive);
if (!aux) {
DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
hnfo->lname);
@@ -298,6 +337,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen,
DEBUGP("%s header not found.\n", hnfo->lname);
return 0;
}
+EXPORT_SYMBOL_GPL(ct_sip_get_info);
static int set_expected_rtp(struct sk_buff **pskb,
struct ip_conntrack *ct,
@@ -308,6 +348,7 @@ static int set_expected_rtp(struct sk_buff **pskb,
struct ip_conntrack_expect *exp;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
int ret;
+ typeof(ip_nat_sdp_hook) ip_nat_sdp;
exp = ip_conntrack_expect_alloc(ct);
if (exp == NULL)
@@ -328,8 +369,9 @@ static int set_expected_rtp(struct sk_buff **pskb,
exp->expectfn = NULL;
exp->flags = 0;
- if (ip_nat_sdp_hook)
- ret = ip_nat_sdp_hook(pskb, ctinfo, exp, dptr);
+ ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook);
+ if (ip_nat_sdp)
+ ret = ip_nat_sdp(pskb, ctinfo, exp, dptr);
else {
if (ip_conntrack_expect_related(exp) != 0)
ret = NF_DROP;
@@ -351,6 +393,7 @@ static int sip_help(struct sk_buff **pskb,
int matchoff, matchlen;
__be32 ipaddr;
u_int16_t port;
+ typeof(ip_nat_sip_hook) ip_nat_sip;
/* No Data ? */
dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
@@ -368,8 +411,9 @@ static int sip_help(struct sk_buff **pskb,
goto out;
}
- if (ip_nat_sip_hook) {
- if (!ip_nat_sip_hook(pskb, ctinfo, ct, &dptr)) {
+ ip_nat_sip = rcu_dereference(ip_nat_sip_hook);
+ if (ip_nat_sip) {
+ if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) {
ret = NF_DROP;
goto out;
}
@@ -389,7 +433,7 @@ static int sip_help(struct sk_buff **pskb,
}
/* Get ip and port address from SDP packet. */
if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
- &ct_sip_hdrs[POS_CONNECTION]) > 0) {
+ POS_CONNECTION) > 0) {
/* We'll drop only if there are parse problems. */
if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr,
@@ -398,7 +442,7 @@ static int sip_help(struct sk_buff **pskb,
goto out;
}
if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
- &ct_sip_hdrs[POS_MEDIA]) > 0) {
+ POS_MEDIA) > 0) {
port = simple_strtoul(dptr + matchoff, NULL, 10);
if (port < 1024) {
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 02135756562..86efb544967 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -28,9 +28,6 @@
#include <net/ip.h>
#include <net/route.h>
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -139,7 +136,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
struct ip_conntrack_protocol *proto;
- ASSERT_READ_LOCK(&ip_conntrack_lock);
IP_NF_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */
@@ -926,7 +922,7 @@ EXPORT_SYMBOL(__ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
EXPORT_SYMBOL(ip_conntrack_expect_put);
EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index fe0b634dd37..ef56de2eff0 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -50,6 +50,7 @@ static int tftp_help(struct sk_buff **pskb,
struct tftphdr _tftph, *tfh;
struct ip_conntrack_expect *exp;
unsigned int ret = NF_ACCEPT;
+ typeof(ip_nat_tftp_hook) ip_nat_tftp;
tfh = skb_header_pointer(*pskb,
(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
@@ -81,8 +82,9 @@ static int tftp_help(struct sk_buff **pskb,
DEBUGP("expect: ");
DUMP_TUPLE(&exp->tuple);
DUMP_TUPLE(&exp->mask);
- if (ip_nat_tftp_hook)
- ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
+ ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook);
+ if (ip_nat_tftp)
+ ret = ip_nat_tftp(pskb, ctinfo, exp);
else if (ip_conntrack_expect_related(exp) != 0)
ret = NF_DROP;
ip_conntrack_expect_put(exp);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
index 3a888715bbf..85df1a9aed3 100644
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ b/net/ipv4/netfilter/ip_nat_amanda.c
@@ -70,15 +70,14 @@ static unsigned int help(struct sk_buff **pskb,
static void __exit ip_nat_amanda_fini(void)
{
- ip_nat_amanda_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
+ rcu_assign_pointer(ip_nat_amanda_hook, NULL);
+ synchronize_rcu();
}
static int __init ip_nat_amanda_init(void)
{
- BUG_ON(ip_nat_amanda_hook);
- ip_nat_amanda_hook = help;
+ BUG_ON(rcu_dereference(ip_nat_amanda_hook));
+ rcu_assign_pointer(ip_nat_amanda_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 4b6260a9740..9d1a5175dcd 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -362,12 +362,10 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = nf_csum_update(~iph->saddr, target->src.ip,
- iph->check);
+ nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
iph->saddr = target->src.ip;
} else {
- iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
- iph->check);
+ nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
iph->daddr = target->dst.ip;
}
return 1;
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index a71c233d811..913960e1380 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -156,15 +156,14 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
static void __exit ip_nat_ftp_fini(void)
{
- ip_nat_ftp_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
+ rcu_assign_pointer(ip_nat_ftp_hook, NULL);
+ synchronize_rcu();
}
static int __init ip_nat_ftp_init(void)
{
- BUG_ON(ip_nat_ftp_hook);
- ip_nat_ftp_hook = ip_nat_ftp;
+ BUG_ON(rcu_dereference(ip_nat_ftp_hook));
+ rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp);
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 3bf85848055..ee80feb4b2a 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -188,10 +188,8 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
csum_partial((char *)tcph,
datalen, 0));
} else
- tcph->check = nf_proto_csum_update(*pskb,
- htons(oldlen) ^ htons(0xFFFF),
- htons(datalen),
- tcph->check, 1);
+ nf_proto_csum_replace2(&tcph->check, *pskb,
+ htons(oldlen), htons(datalen), 1);
if (rep_len != match_len) {
set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -264,12 +262,10 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
csum_partial((char *)udph,
datalen, 0));
if (!udph->check)
- udph->check = -1;
+ udph->check = CSUM_MANGLED_0;
} else
- udph->check = nf_proto_csum_update(*pskb,
- htons(oldlen) ^ htons(0xFFFF),
- htons(datalen),
- udph->check, 1);
+ nf_proto_csum_replace2(&udph->check, *pskb,
+ htons(oldlen), htons(datalen), 1);
return 1;
}
EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -307,14 +303,10 @@ sack_adjust(struct sk_buff *skb,
ntohl(sack->start_seq), new_start_seq,
ntohl(sack->end_seq), new_end_seq);
- tcph->check = nf_proto_csum_update(skb,
- ~sack->start_seq,
- new_start_seq,
- tcph->check, 0);
- tcph->check = nf_proto_csum_update(skb,
- ~sack->end_seq,
- new_end_seq,
- tcph->check, 0);
+ nf_proto_csum_replace4(&tcph->check, skb,
+ sack->start_seq, new_start_seq, 0);
+ nf_proto_csum_replace4(&tcph->check, skb,
+ sack->end_seq, new_end_seq, 0);
sack->start_seq = new_start_seq;
sack->end_seq = new_end_seq;
sackoff += sizeof(*sack);
@@ -397,10 +389,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
else
newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
- tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
- tcph->check, 0);
- tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
- tcph->check, 0);
+ nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
+ nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
index 4a7d34466ee..bdc99ef6159 100644
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ b/net/ipv4/netfilter/ip_nat_helper_h323.c
@@ -563,25 +563,25 @@ static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct,
/****************************************************************************/
static int __init init(void)
{
- BUG_ON(set_h245_addr_hook != NULL);
- BUG_ON(set_h225_addr_hook != NULL);
- BUG_ON(set_sig_addr_hook != NULL);
- BUG_ON(set_ras_addr_hook != NULL);
- BUG_ON(nat_rtp_rtcp_hook != NULL);
- BUG_ON(nat_t120_hook != NULL);
- BUG_ON(nat_h245_hook != NULL);
- BUG_ON(nat_callforwarding_hook != NULL);
- BUG_ON(nat_q931_hook != NULL);
-
- set_h245_addr_hook = set_h245_addr;
- set_h225_addr_hook = set_h225_addr;
- set_sig_addr_hook = set_sig_addr;
- set_ras_addr_hook = set_ras_addr;
- nat_rtp_rtcp_hook = nat_rtp_rtcp;
- nat_t120_hook = nat_t120;
- nat_h245_hook = nat_h245;
- nat_callforwarding_hook = nat_callforwarding;
- nat_q931_hook = nat_q931;
+ BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
+
+ rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
+ rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
+ rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
+ rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
+ rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ rcu_assign_pointer(nat_t120_hook, nat_t120);
+ rcu_assign_pointer(nat_h245_hook, nat_h245);
+ rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
+ rcu_assign_pointer(nat_q931_hook, nat_q931);
DEBUGP("ip_nat_h323: init success\n");
return 0;
@@ -590,16 +590,16 @@ static int __init init(void)
/****************************************************************************/
static void __exit fini(void)
{
- set_h245_addr_hook = NULL;
- set_h225_addr_hook = NULL;
- set_sig_addr_hook = NULL;
- set_ras_addr_hook = NULL;
- nat_rtp_rtcp_hook = NULL;
- nat_t120_hook = NULL;
- nat_h245_hook = NULL;
- nat_callforwarding_hook = NULL;
- nat_q931_hook = NULL;
- synchronize_net();
+ rcu_assign_pointer(set_h245_addr_hook, NULL);
+ rcu_assign_pointer(set_h225_addr_hook, NULL);
+ rcu_assign_pointer(set_sig_addr_hook, NULL);
+ rcu_assign_pointer(set_ras_addr_hook, NULL);
+ rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
+ rcu_assign_pointer(nat_t120_hook, NULL);
+ rcu_assign_pointer(nat_h245_hook, NULL);
+ rcu_assign_pointer(nat_callforwarding_hook, NULL);
+ rcu_assign_pointer(nat_q931_hook, NULL);
+ synchronize_rcu();
}
/****************************************************************************/
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 329fdcd7d70..ec957bbb536 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -101,7 +101,7 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
DEBUGP("trying to unexpect other dir: ");
DUMP_TUPLE(&t);
- other_exp = ip_conntrack_expect_find(&t);
+ other_exp = ip_conntrack_expect_find_get(&t);
if (other_exp) {
ip_conntrack_unexpect_related(other_exp);
ip_conntrack_expect_put(other_exp);
@@ -315,17 +315,17 @@ static int __init ip_nat_helper_pptp_init(void)
if (ret < 0)
return ret;
- BUG_ON(ip_nat_pptp_hook_outbound);
- ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
+ BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound));
+ rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt);
- BUG_ON(ip_nat_pptp_hook_inbound);
- ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
+ BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound));
+ rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt);
- BUG_ON(ip_nat_pptp_hook_exp_gre);
- ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
+ BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre));
+ rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre);
- BUG_ON(ip_nat_pptp_hook_expectfn);
- ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
+ BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn));
+ rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected);
printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
return 0;
@@ -335,14 +335,13 @@ static void __exit ip_nat_helper_pptp_fini(void)
{
DEBUGP("cleanup_module\n" );
- ip_nat_pptp_hook_expectfn = NULL;
- ip_nat_pptp_hook_exp_gre = NULL;
- ip_nat_pptp_hook_inbound = NULL;
- ip_nat_pptp_hook_outbound = NULL;
+ rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL);
+ rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL);
+ rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL);
+ rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL);
+ synchronize_rcu();
ip_nat_proto_gre_fini();
- /* Make sure noone calls it, meanwhile */
- synchronize_net();
printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
}
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index a767123e082..feb26b48f1d 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -98,15 +98,14 @@ static unsigned int help(struct sk_buff **pskb,
static void __exit ip_nat_irc_fini(void)
{
- ip_nat_irc_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
+ rcu_assign_pointer(ip_nat_irc_hook, NULL);
+ synchronize_rcu();
}
static int __init ip_nat_irc_init(void)
{
- BUG_ON(ip_nat_irc_hook);
- ip_nat_irc_hook = help;
+ BUG_ON(rcu_dereference(ip_nat_irc_hook));
+ rcu_assign_pointer(ip_nat_irc_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index bf91f9312b3..95810202d84 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -129,11 +129,9 @@ gre_manip_pkt(struct sk_buff **pskb,
}
if (greh->csum) {
/* FIXME: Never tested this code... */
- *(gre_csum(greh)) =
- nf_proto_csum_update(*pskb,
- ~*(gre_key(greh)),
- tuple->dst.u.gre.key,
- *(gre_csum(greh)), 0);
+ nf_proto_csum_replace4(gre_csum(greh), *pskb,
+ *(gre_key(greh)),
+ tuple->dst.u.gre.key, 0);
}
*(gre_key(greh)) = tuple->dst.u.gre.key;
break;
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 3f6efc13ac7..fb716edd5bc 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -24,8 +24,8 @@ icmp_in_range(const struct ip_conntrack_tuple *tuple,
const union ip_conntrack_manip_proto *min,
const union ip_conntrack_manip_proto *max)
{
- return (tuple->src.u.icmp.id >= min->icmp.id
- && tuple->src.u.icmp.id <= max->icmp.id);
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
}
static int
@@ -66,10 +66,8 @@ icmp_manip_pkt(struct sk_buff **pskb,
return 0;
hdr = (struct icmphdr *)((*pskb)->data + hdroff);
- hdr->checksum = nf_proto_csum_update(*pskb,
- hdr->un.echo.id ^ htons(0xFFFF),
- tuple->src.u.icmp.id,
- hdr->checksum, 0);
+ nf_proto_csum_replace2(&hdr->checksum, *pskb,
+ hdr->un.echo.id, tuple->src.u.icmp.id, 0);
hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 12deb13b93b..b586d18b3fb 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,9 +129,8 @@ tcp_manip_pkt(struct sk_buff **pskb,
if (hdrsize < sizeof(*hdr))
return 1;
- hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
- hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport,
- hdr->check, 0);
+ nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index 4bbec7730d1..5ced0877b32 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -115,13 +115,10 @@ udp_manip_pkt(struct sk_buff **pskb,
}
if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
- hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
- hdr->check, 1);
- hdr->check = nf_proto_csum_update(*pskb,
- *portptr ^ htons(0xFFFF), newport,
- hdr->check, 0);
+ nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0);
if (!hdr->check)
- hdr->check = -1;
+ hdr->check = CSUM_MANGLED_0;
}
*portptr = newport;
return 1;
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
index 71fc2730a00..6223abc924f 100644
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ b/net/ipv4/netfilter/ip_nat_sip.c
@@ -29,27 +29,70 @@ MODULE_DESCRIPTION("SIP NAT helper");
#define DEBUGP(format, args...)
#endif
-extern struct sip_header_nfo ct_sip_hdrs[];
+struct addr_map {
+ struct {
+ char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned int srclen, srciplen;
+ unsigned int dstlen, dstiplen;
+ } addr[IP_CT_DIR_MAX];
+};
+
+static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map)
+{
+ struct ip_conntrack_tuple *t;
+ enum ip_conntrack_dir dir;
+ unsigned int n;
+
+ for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
+ t = &ct->tuplehash[dir].tuple;
+
+ n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
+ NIPQUAD(t->src.ip));
+ map->addr[dir].srciplen = n;
+ n += sprintf(map->addr[dir].src + n, ":%u",
+ ntohs(t->src.u.udp.port));
+ map->addr[dir].srclen = n;
+
+ n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
+ NIPQUAD(t->dst.ip));
+ map->addr[dir].dstiplen = n;
+ n += sprintf(map->addr[dir].dst + n, ":%u",
+ ntohs(t->dst.u.udp.port));
+ map->addr[dir].dstlen = n;
+ }
+}
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr, size_t dlen,
- char *buffer, int bufflen,
- struct sip_header_nfo *hnfo)
+static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
+ struct ip_conntrack *ct, const char **dptr, size_t dlen,
+ enum sip_header_pos pos, struct addr_map *map)
{
- unsigned int matchlen, matchoff;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned int matchlen, matchoff, addrlen;
+ char *addr;
- if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, hnfo) <= 0)
- return 0;
+ if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
+ return 1;
+
+ if ((matchlen == map->addr[dir].srciplen ||
+ matchlen == map->addr[dir].srclen) &&
+ memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
+ addr = map->addr[!dir].dst;
+ addrlen = map->addr[!dir].dstlen;
+ } else if ((matchlen == map->addr[dir].dstiplen ||
+ matchlen == map->addr[dir].dstlen) &&
+ memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
+ addr = map->addr[!dir].src;
+ addrlen = map->addr[!dir].srclen;
+ } else
+ return 1;
if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen, buffer, bufflen))
+ matchoff, matchlen, addr, addrlen))
return 0;
-
- /* We need to reload this. Thanks Patrick. */
*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
return 1;
+
}
static unsigned int ip_nat_sip(struct sk_buff **pskb,
@@ -57,70 +100,61 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
struct ip_conntrack *ct,
const char **dptr)
{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned int bufflen, dataoff;
- __be32 ip;
- __be16 port;
+ enum sip_header_pos pos;
+ struct addr_map map;
+ int dataoff, datalen;
dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ datalen = (*pskb)->len - dataoff;
+ if (datalen < sizeof("SIP/2.0") - 1)
+ return NF_DROP;
+
+ addr_map_init(ct, &map);
+
+ /* Basic rules: requests and responses. */
+ if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
+ /* 10.2: Constructing the REGISTER Request:
+ *
+ * The "userinfo" and "@" components of the SIP URI MUST NOT
+ * be present.
+ */
+ if (datalen >= sizeof("REGISTER") - 1 &&
+ strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
+ pos = POS_REG_REQ_URI;
+ else
+ pos = POS_REQ_URI;
+
+ if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
+ return NF_DROP;
+ }
+
+ if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
+ !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
+ !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
+ !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+ return NF_DROP;
+ return NF_ACCEPT;
+}
- ip = ct->tuplehash[!dir].tuple.dst.ip;
- port = ct->tuplehash[!dir].tuple.dst.u.udp.port;
- bufflen = sprintf(buffer, "%u.%u.%u.%u:%u", NIPQUAD(ip), ntohs(port));
+static unsigned int mangle_sip_packet(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack *ct,
+ const char **dptr, size_t dlen,
+ char *buffer, int bufflen,
+ enum sip_header_pos pos)
+{
+ unsigned int matchlen, matchoff;
- /* short packet ? */
- if (((*pskb)->len - dataoff) < (sizeof("SIP/2.0") - 1))
+ if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
return 0;
- /* Basic rules: requests and responses. */
- if (memcmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) == 0) {
- const char *aux;
-
- if ((ctinfo) < IP_CT_IS_REPLY) {
- mangle_sip_packet(pskb, ctinfo, ct, dptr,
- (*pskb)->len - dataoff,
- buffer, bufflen,
- &ct_sip_hdrs[POS_CONTACT]);
- return 1;
- }
+ if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ matchoff, matchlen, buffer, bufflen))
+ return 0;
- if (!mangle_sip_packet(pskb, ctinfo, ct, dptr,
- (*pskb)->len - dataoff,
- buffer, bufflen, &ct_sip_hdrs[POS_VIA]))
- return 0;
-
- /* This search should ignore case, but later.. */
- aux = ct_sip_search("CSeq:", *dptr, sizeof("CSeq:") - 1,
- (*pskb)->len - dataoff);
- if (!aux)
- return 0;
-
- if (!ct_sip_search("REGISTER", aux, sizeof("REGISTER"),
- ct_sip_lnlen(aux, *dptr + (*pskb)->len - dataoff)))
- return 1;
-
- return mangle_sip_packet(pskb, ctinfo, ct, dptr,
- (*pskb)->len - dataoff,
- buffer, bufflen,
- &ct_sip_hdrs[POS_CONTACT]);
- }
- if ((ctinfo) < IP_CT_IS_REPLY) {
- if (!mangle_sip_packet(pskb, ctinfo, ct, dptr,
- (*pskb)->len - dataoff,
- buffer, bufflen, &ct_sip_hdrs[POS_VIA]))
- return 0;
-
- /* Mangle Contact if exists only. - watch udp_nat_mangle()! */
- mangle_sip_packet(pskb, ctinfo, ct, dptr, (*pskb)->len - dataoff,
- buffer, bufflen, &ct_sip_hdrs[POS_CONTACT]);
- return 1;
- }
- /* This mangle requests headers. */
- return mangle_sip_packet(pskb, ctinfo, ct, dptr,
- ct_sip_lnlen(*dptr,
- *dptr + (*pskb)->len - dataoff),
- buffer, bufflen, &ct_sip_hdrs[POS_REQ_HEADER]);
+ /* We need to reload this. Thanks Patrick. */
+ *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ return 1;
}
static int mangle_content_len(struct sk_buff **pskb,
@@ -136,7 +170,7 @@ static int mangle_content_len(struct sk_buff **pskb,
/* Get actual SDP lenght */
if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
- &matchlen, &ct_sip_hdrs[POS_SDP_HEADER]) > 0) {
+ &matchlen, POS_SDP_HEADER) > 0) {
/* since ct_sip_get_info() give us a pointer passing 'v='
we need to add 2 bytes in this count. */
@@ -144,7 +178,7 @@ static int mangle_content_len(struct sk_buff **pskb,
/* Now, update SDP lenght */
if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
- &matchlen, &ct_sip_hdrs[POS_CONTENT]) > 0) {
+ &matchlen, POS_CONTENT) > 0) {
bufflen = sprintf(buffer, "%u", c_len);
@@ -170,17 +204,17 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
/* Mangle owner and contact info. */
bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, &ct_sip_hdrs[POS_OWNER]))
+ buffer, bufflen, POS_OWNER))
return 0;
if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, &ct_sip_hdrs[POS_CONNECTION]))
+ buffer, bufflen, POS_CONNECTION))
return 0;
/* Mangle media port. */
bufflen = sprintf(buffer, "%u", port);
if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, &ct_sip_hdrs[POS_MEDIA]))
+ buffer, bufflen, POS_MEDIA))
return 0;
return mangle_content_len(pskb, ctinfo, ct, dptr);
@@ -230,18 +264,17 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
static void __exit fini(void)
{
- ip_nat_sip_hook = NULL;
- ip_nat_sdp_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
+ rcu_assign_pointer(ip_nat_sip_hook, NULL);
+ rcu_assign_pointer(ip_nat_sdp_hook, NULL);
+ synchronize_rcu();
}
static int __init init(void)
{
- BUG_ON(ip_nat_sip_hook);
- BUG_ON(ip_nat_sdp_hook);
- ip_nat_sip_hook = ip_nat_sip;
- ip_nat_sdp_hook = ip_nat_sdp;
+ BUG_ON(rcu_dereference(ip_nat_sip_hook));
+ BUG_ON(rcu_dereference(ip_nat_sdp_hook));
+ rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip);
+ rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp);
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 168f45fa189..c3d9f3b090c 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -64,7 +64,7 @@ MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
#define SNMP_PORT 161
#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (u_int8_t )((n) & 0xff)
+#define NOCT1(n) (*(u8 *)n)
static int debug;
static DEFINE_SPINLOCK(snmp_lock);
@@ -613,7 +613,7 @@ struct snmp_v1_trap
static inline void mangle_address(unsigned char *begin,
unsigned char *addr,
const struct oct1_map *map,
- u_int16_t *check);
+ __sum16 *check);
struct snmp_cnv
{
unsigned int class;
@@ -873,38 +873,24 @@ static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
* Fast checksum update for possibly oddly-aligned UDP byte, from the
* code example in the draft.
*/
-static void fast_csum(unsigned char *csum,
+static void fast_csum(__sum16 *csum,
const unsigned char *optr,
const unsigned char *nptr,
- int odd)
+ int offset)
{
- long x, old, new;
-
- x = csum[0] * 256 + csum[1];
-
- x =~ x & 0xFFFF;
-
- if (odd) old = optr[0] * 256;
- else old = optr[0];
-
- x -= old & 0xFFFF;
- if (x <= 0) {
- x--;
- x &= 0xFFFF;
- }
-
- if (odd) new = nptr[0] * 256;
- else new = nptr[0];
-
- x += new & 0xFFFF;
- if (x & 0x10000) {
- x++;
- x &= 0xFFFF;
+ unsigned char s[4];
+
+ if (offset & 1) {
+ s[0] = s[2] = 0;
+ s[1] = ~*optr;
+ s[3] = *nptr;
+ } else {
+ s[1] = s[3] = 0;
+ s[0] = ~*optr;
+ s[2] = *nptr;
}
-
- x =~ x & 0xFFFF;
- csum[0] = x / 256;
- csum[1] = x & 0xFF;
+
+ *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
}
/*
@@ -915,9 +901,9 @@ static void fast_csum(unsigned char *csum,
static inline void mangle_address(unsigned char *begin,
unsigned char *addr,
const struct oct1_map *map,
- u_int16_t *check)
+ __sum16 *check)
{
- if (map->from == NOCT1(*addr)) {
+ if (map->from == NOCT1(addr)) {
u_int32_t old;
if (debug)
@@ -927,11 +913,8 @@ static inline void mangle_address(unsigned char *begin,
/* Update UDP checksum if being used */
if (*check) {
- unsigned char odd = !((addr - begin) % 2);
-
- fast_csum((unsigned char *)check,
- &map->from, &map->to, odd);
-
+ fast_csum(check,
+ &map->from, &map->to, addr - begin);
}
if (debug)
@@ -943,7 +926,7 @@ static inline void mangle_address(unsigned char *begin,
static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
struct snmp_v1_trap *trap,
const struct oct1_map *map,
- u_int16_t *check)
+ __sum16 *check)
{
unsigned int cls, con, tag, len;
unsigned char *end;
@@ -1037,7 +1020,7 @@ static void hex_dump(unsigned char *buf, size_t len)
static int snmp_parse_mangle(unsigned char *msg,
u_int16_t len,
const struct oct1_map *map,
- u_int16_t *check)
+ __sum16 *check)
{
unsigned char *eoc, *end;
unsigned int cls, con, tag, vers, pdutype;
@@ -1223,12 +1206,12 @@ static int snmp_translate(struct ip_conntrack *ct,
*/
if (dir == IP_CT_DIR_ORIGINAL) {
/* SNAT traps */
- map.from = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
- map.to = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
+ map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
+ map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- map.to = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
+ map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
+ map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
}
if (map.from == map.to)
@@ -1294,11 +1277,11 @@ static struct ip_conntrack_helper snmp_helper = {
.help = help,
.name = "snmp",
- .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } },
- .dst = { .protonum = IPPROTO_UDP },
+ .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}},
+ .dst = {.protonum = IPPROTO_UDP},
},
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
+ .mask = {.src = {.u = {0xFFFF}},
+ .dst = {.protonum = 0xFF},
},
};
@@ -1309,11 +1292,11 @@ static struct ip_conntrack_helper snmp_trap_helper = {
.help = help,
.name = "snmp_trap",
- .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } },
- .dst = { .protonum = IPPROTO_UDP },
+ .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}},
+ .dst = {.protonum = IPPROTO_UDP},
},
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
+ .mask = {.src = {.u = {0xFFFF}},
+ .dst = {.protonum = 0xFF},
},
};
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index d85d2de5044..ad66328baa5 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -44,12 +44,6 @@
#define DEBUGP(format, args...)
#endif
-#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
- : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
- : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
- : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
- : "*ERROR*")))
-
#ifdef CONFIG_XFRM
static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
{
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 94a78015451..604793536fc 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -55,15 +55,14 @@ static unsigned int help(struct sk_buff **pskb,
static void __exit ip_nat_tftp_fini(void)
{
- ip_nat_tftp_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
+ rcu_assign_pointer(ip_nat_tftp_hook, NULL);
+ synchronize_rcu();
}
static int __init ip_nat_tftp_init(void)
{
- BUG_ON(ip_nat_tftp_hook);
- ip_nat_tftp_hook = help;
+ BUG_ON(rcu_dereference(ip_nat_tftp_hook));
+ rcu_assign_pointer(ip_nat_tftp_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 7edad790478..cd520df4dcf 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -243,7 +243,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
pmsg->data_len = data_len;
pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
- pmsg->mark = entry->skb->nfmark;
+ pmsg->mark = entry->skb->mark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
@@ -351,9 +351,10 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
if (v->data_len < sizeof(*user_iph))
return 0;
diff = v->data_len - e->skb->len;
- if (diff < 0)
- skb_trim(e->skb, v->data_len);
- else if (diff > 0) {
+ if (diff < 0) {
+ if (pskb_trim(e->skb, v->data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4b90927619b..09696f16aa9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -384,6 +384,7 @@ mark_source_chains(struct xt_table_info *newinfo,
for (;;) {
struct ipt_standard_target *t
= (void *)ipt_get_target(e);
+ int visited = e->comefrom & (1 << hook);
if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
printk("iptables: loop hook %u pos %u %08X.\n",
@@ -394,13 +395,20 @@ mark_source_chains(struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
/* Unconditional return/END. */
- if (e->target_offset == sizeof(struct ipt_entry)
+ if ((e->target_offset == sizeof(struct ipt_entry)
&& (strcmp(t->target.u.user.name,
IPT_STANDARD_TARGET) == 0)
&& t->verdict < 0
- && unconditional(&e->ip)) {
+ && unconditional(&e->ip)) || visited) {
unsigned int oldpos, size;
+ if (t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
/* Return: backtrack through the last
big jump. */
do {
@@ -438,6 +446,13 @@ mark_source_chains(struct xt_table_info *newinfo,
if (strcmp(t->target.u.user.name,
IPT_STANDARD_TARGET) == 0
&& newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct ipt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -470,28 +485,47 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
}
static inline int
-standard_check(const struct ipt_entry_target *t,
- unsigned int max_offset)
+check_entry(struct ipt_entry *e, const char *name)
{
- struct ipt_standard_target *targ = (void *)t;
+ struct ipt_entry_target *t;
- /* Check standard info. */
- if (targ->verdict >= 0
- && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
- duprintf("ipt_standard_check: bad verdict (%i)\n",
- targ->verdict);
- return 0;
+ if (!ip_checkentry(&e->ip)) {
+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ return -EINVAL;
}
- if (targ->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("ipt_standard_check: bad negative verdict (%i)\n",
- targ->verdict);
- return 0;
+
+ if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset)
+ return -EINVAL;
+
+ t = ipt_get_target(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int check_match(struct ipt_entry_match *m, const char *name,
+ const struct ipt_ip *ip, unsigned int hookmask)
+{
+ struct ipt_match *match;
+ int ret;
+
+ match = m->u.kernel.match;
+ ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
+ name, hookmask, ip->proto,
+ ip->invflags & IPT_INV_PROTO);
+ if (!ret && m->u.kernel.match->checkentry
+ && !m->u.kernel.match->checkentry(name, ip, match, m->data,
+ hookmask)) {
+ duprintf("ip_tables: check failed for `%s'.\n",
+ m->u.kernel.match->name);
+ ret = -EINVAL;
}
- return 1;
+ return ret;
}
static inline int
-check_match(struct ipt_entry_match *m,
+find_check_match(struct ipt_entry_match *m,
const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
@@ -504,26 +538,15 @@ check_match(struct ipt_entry_match *m,
m->u.user.revision),
"ipt_%s", m->u.user.name);
if (IS_ERR(match) || !match) {
- duprintf("check_match: `%s' not found\n", m->u.user.name);
+ duprintf("find_check_match: `%s' not found\n", m->u.user.name);
return match ? PTR_ERR(match) : -ENOENT;
}
m->u.kernel.match = match;
- ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
- name, hookmask, ip->proto,
- ip->invflags & IPT_INV_PROTO);
+ ret = check_match(m, name, ip, hookmask);
if (ret)
goto err;
- if (m->u.kernel.match->checkentry
- && !m->u.kernel.match->checkentry(name, ip, match, m->data,
- hookmask)) {
- duprintf("ip_tables: check failed for `%s'.\n",
- m->u.kernel.match->name);
- ret = -EINVAL;
- goto err;
- }
-
(*i)++;
return 0;
err:
@@ -531,10 +554,29 @@ err:
return ret;
}
-static struct ipt_target ipt_standard_target;
+static inline int check_target(struct ipt_entry *e, const char *name)
+{
+ struct ipt_entry_target *t;
+ struct ipt_target *target;
+ int ret;
+
+ t = ipt_get_target(e);
+ target = t->u.kernel.target;
+ ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
+ name, e->comefrom, e->ip.proto,
+ e->ip.invflags & IPT_INV_PROTO);
+ if (!ret && t->u.kernel.target->checkentry
+ && !t->u.kernel.target->checkentry(name, e, target,
+ t->data, e->comefrom)) {
+ duprintf("ip_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ ret = -EINVAL;
+ }
+ return ret;
+}
static inline int
-check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
unsigned int *i)
{
struct ipt_entry_target *t;
@@ -542,13 +584,13 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
int ret;
unsigned int j;
- if (!ip_checkentry(&e->ip)) {
- duprintf("ip_tables: ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
j = 0;
- ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+ ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
+ e->comefrom, &j);
if (ret != 0)
goto cleanup_matches;
@@ -558,32 +600,16 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
t->u.user.revision),
"ipt_%s", t->u.user.name);
if (IS_ERR(target) || !target) {
- duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
ret = target ? PTR_ERR(target) : -ENOENT;
goto cleanup_matches;
}
t->u.kernel.target = target;
- ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
- name, e->comefrom, e->ip.proto,
- e->ip.invflags & IPT_INV_PROTO);
+ ret = check_target(e, name);
if (ret)
goto err;
- if (t->u.kernel.target == &ipt_standard_target) {
- if (!standard_check(t, size)) {
- ret = -EINVAL;
- goto err;
- }
- } else if (t->u.kernel.target->checkentry
- && !t->u.kernel.target->checkentry(name, e, target, t->data,
- e->comefrom)) {
- duprintf("ip_tables: check failed for `%s'.\n",
- t->u.kernel.target->name);
- ret = -EINVAL;
- goto err;
- }
-
(*i)++;
return 0;
err:
@@ -718,11 +744,11 @@ translate_table(const char *name,
/* Finally, each sanity check must pass */
i = 0;
ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry, name, size, &i);
+ find_check_entry, name, size, &i);
if (ret != 0) {
IPT_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ cleanup_entry, &i);
return ret;
}
@@ -1458,10 +1484,9 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
return -EINVAL;
}
- if (!ip_checkentry(&e->ip)) {
- duprintf("ip_tables: ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
off = 0;
entry_offset = (void *)e - (void *)base;
@@ -1477,7 +1502,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
t->u.user.revision),
"ipt_%s", t->u.user.name);
if (IS_ERR(target) || !target) {
- duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
ret = target ? PTR_ERR(target) : -ENOENT;
goto cleanup_matches;
}
@@ -1513,36 +1539,10 @@ cleanup_matches:
static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
void **dstptr, compat_uint_t *size, const char *name,
- const struct ipt_ip *ip, unsigned int hookmask, int *i)
+ const struct ipt_ip *ip, unsigned int hookmask)
{
- struct ipt_entry_match *dm;
- struct ipt_match *match;
- int ret;
-
- dm = (struct ipt_entry_match *)*dstptr;
- match = m->u.kernel.match;
xt_compat_match_from_user(m, dstptr, size);
-
- ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
- name, hookmask, ip->proto,
- ip->invflags & IPT_INV_PROTO);
- if (ret)
- goto err;
-
- if (m->u.kernel.match->checkentry
- && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
- hookmask)) {
- duprintf("ip_tables: check failed for `%s'.\n",
- m->u.kernel.match->name);
- ret = -EINVAL;
- goto err;
- }
- (*i)++;
return 0;
-
-err:
- module_put(m->u.kernel.match->me);
- return ret;
}
static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1553,19 +1553,18 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
struct ipt_target *target;
struct ipt_entry *de;
unsigned int origsize;
- int ret, h, j;
+ int ret, h;
ret = 0;
origsize = *size;
de = (struct ipt_entry *)*dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
- j = 0;
*dstptr += sizeof(struct compat_ipt_entry);
ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
- name, &de->ip, de->comefrom, &j);
+ name, &de->ip, de->comefrom);
if (ret)
- goto cleanup_matches;
+ return ret;
de->target_offset = e->target_offset - (origsize - *size);
t = ipt_get_target(e);
target = t->u.kernel.target;
@@ -1578,34 +1577,18 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
+ return ret;
+}
- t = ipt_get_target(de);
- target = t->u.kernel.target;
- ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
- name, e->comefrom, e->ip.proto,
- e->ip.invflags & IPT_INV_PROTO);
- if (ret)
- goto err;
+static inline int compat_check_entry(struct ipt_entry *e, const char *name)
+{
+ int ret;
- ret = -EINVAL;
- if (t->u.kernel.target == &ipt_standard_target) {
- if (!standard_check(t, *size))
- goto err;
- } else if (t->u.kernel.target->checkentry
- && !t->u.kernel.target->checkentry(name, de, target,
- t->data, de->comefrom)) {
- duprintf("ip_tables: compat: check failed for `%s'.\n",
- t->u.kernel.target->name);
- goto err;
- }
- ret = 0;
- return ret;
+ ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom);
+ if (ret)
+ return ret;
-err:
- module_put(t->u.kernel.target->me);
-cleanup_matches:
- IPT_MATCH_ITERATE(e, cleanup_match, &j);
- return ret;
+ return check_target(e, name);
}
static int
@@ -1618,7 +1601,7 @@ translate_compat_table(const char *name,
unsigned int *hook_entries,
unsigned int *underflows)
{
- unsigned int i;
+ unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
unsigned int size;
@@ -1636,21 +1619,21 @@ translate_compat_table(const char *name,
}
duprintf("translate_compat_table: size %u\n", info->size);
- i = 0;
+ j = 0;
xt_compat_lock(AF_INET);
/* Walk through entries, checking offsets. */
ret = IPT_ENTRY_ITERATE(entry0, total_size,
check_compat_entry_size_and_hooks,
info, &size, entry0,
entry0 + total_size,
- hook_entries, underflows, &i, name);
+ hook_entries, underflows, &j, name);
if (ret != 0)
goto out_unlock;
ret = -EINVAL;
- if (i != number) {
+ if (j != number) {
duprintf("translate_compat_table: %u not %u entries\n",
- i, number);
+ j, number);
goto out_unlock;
}
@@ -1696,6 +1679,11 @@ translate_compat_table(const char *name,
if (!mark_source_chains(newinfo, valid_hooks, entry1))
goto free_newinfo;
+ ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
+ name);
+ if (ret)
+ goto free_newinfo;
+
/* And one copy for every other CPU */
for_each_possible_cpu(i)
if (newinfo->entries[i] && newinfo->entries[i] != entry1)
@@ -1709,8 +1697,10 @@ translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
out:
+ IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &j);
return ret;
out_unlock:
+ compat_flush_offsets();
xt_compat_unlock(AF_INET);
goto out;
}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7a29d6e7baa..b1c11160b9d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -40,8 +40,6 @@
#define DEBUGP
#endif
-#define ASSERT_READ_LOCK(x)
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("iptables target for CLUSTERIP");
@@ -123,7 +121,6 @@ __clusterip_config_find(__be32 clusterip)
{
struct list_head *pos;
- ASSERT_READ_LOCK(&clusterip_lock);
list_for_each(pos, &clusterip_configs) {
struct clusterip_config *c = list_entry(pos,
struct clusterip_config, list);
@@ -170,7 +167,6 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
- char buffer[16];
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
@@ -187,12 +183,17 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
atomic_set(&c->entries, 1);
#ifdef CONFIG_PROC_FS
- /* create proc dir entry */
- sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
- c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir);
- if (!c->pde) {
- kfree(c);
- return NULL;
+ {
+ char buffer[16];
+
+ /* create proc dir entry */
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR,
+ clusterip_procdir);
+ if (!c->pde) {
+ kfree(c);
+ return NULL;
+ }
}
c->pde->proc_fops = &clusterip_proc_fops;
c->pde->data = c;
@@ -205,6 +206,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
return c;
}
+#ifdef CONFIG_PROC_FS
static int
clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
{
@@ -232,6 +234,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
return 1;
}
+#endif
static inline u_int32_t
clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
@@ -444,6 +447,12 @@ checkentry(const char *tablename,
cipinfo->config = config;
}
+ if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%d\n", target->family);
+ return 0;
+ }
+
return 1;
}
@@ -457,6 +466,8 @@ static void destroy(const struct xt_target *target, void *targinfo)
clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
+
+ nf_ct_l3proto_module_put(target->family);
}
static struct ipt_target clusterip_tgt = {
@@ -680,7 +691,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
{
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
- struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode);
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
struct clusterip_config *c = pde->data;
unsigned long nodenum;
@@ -737,8 +748,10 @@ static int __init ipt_clusterip_init(void)
CLUSTERIP_VERSION);
return 0;
+#ifdef CONFIG_PROC_FS
cleanup_hook:
nf_unregister_hook(&cip_arp_ops);
+#endif /* CONFIG_PROC_FS */
cleanup_target:
ipt_unregister_target(&clusterip_tgt);
return ret;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 1aa4517fbcd..b55d670a24d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -28,17 +28,16 @@ static inline int
set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
struct iphdr *iph = (*pskb)->nh.iph;
- u_int16_t oldtos;
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
+ __u8 oldtos;
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return 0;
iph = (*pskb)->nh.iph;
oldtos = iph->tos;
iph->tos &= ~IPT_ECN_IP_MASK;
iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
- iph->check = nf_csum_update(htons(oldtos) ^ htons(0xFFFF),
- htons(iph->tos), iph->check);
+ nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
}
return 1;
}
@@ -72,10 +71,8 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
if (einfo->operation & IPT_ECN_OP_SET_CWR)
tcph->cwr = einfo->proto.tcp.cwr;
- tcph->check = nf_proto_csum_update((*pskb),
- oldval ^ htons(0xFFFF),
- ((__be16 *)tcph)[6],
- tcph->check, 0);
+ nf_proto_csum_replace2(&tcph->check, *pskb,
+ oldval, ((__be16 *)tcph)[6], 0);
return 1;
}
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 7dc820df8bc..c96de16fefa 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -171,11 +171,15 @@ static void dump_packet(const struct nf_loginfo *info,
}
break;
}
- case IPPROTO_UDP: {
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE: {
struct udphdr _udph, *uh;
- /* Max length: 10 "PROTO=UDP " */
- printk("PROTO=UDP ");
+ if (ih->protocol == IPPROTO_UDP)
+ /* Max length: 10 "PROTO=UDP " */
+ printk("PROTO=UDP " );
+ else /* Max length: 14 "PROTO=UDPLITE " */
+ printk("PROTO=UDPLITE ");
if (ntohs(ih->frag_off) & IP_OFFSET)
break;
@@ -341,6 +345,7 @@ static void dump_packet(const struct nf_loginfo *info,
/* IP: 40+46+6+11+127 = 230 */
/* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
/* UDP: 10+max(25,20) = 35 */
+ /* UDPLITE: 14+max(25,20) = 39 */
/* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
/* ESP: 10+max(25)+15 = 50 */
/* AH: 9+max(25)+15 = 49 */
@@ -425,13 +430,8 @@ ipt_log_target(struct sk_buff **pskb,
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- if (loginfo->logflags & IPT_LOG_NFLOG)
- nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
- "%s", loginfo->prefix);
- else
- ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
- loginfo->prefix);
-
+ ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+ loginfo->prefix);
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 3dbfcfac8a8..28b9233956b 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -2,7 +2,7 @@
(depending on route). */
/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -20,7 +20,11 @@
#include <net/checksum.h>
#include <net/route.h>
#include <linux/netfilter_ipv4.h>
+#ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat_rule.h>
+#else
#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#endif
#include <linux/netfilter_ipv4/ip_tables.h>
MODULE_LICENSE("GPL");
@@ -65,23 +69,33 @@ masquerade_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
+#ifdef CONFIG_NF_NAT_NEEDED
+ struct nf_conn_nat *nat;
+#endif
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr;
struct ip_nat_range newrange;
+ const struct ip_nat_multi_range_compat *mr;
struct rtable *rt;
__be32 newsrc;
IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
ct = ip_conntrack_get(*pskb, &ctinfo);
+#ifdef CONFIG_NF_NAT_NEEDED
+ nat = nfct_nat(ct);
+#endif
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
*/
+#ifdef CONFIG_NF_NAT_NEEDED
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+#else
if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
+#endif
return NF_ACCEPT;
mr = targinfo;
@@ -93,7 +107,11 @@ masquerade_target(struct sk_buff **pskb,
}
write_lock_bh(&masq_lock);
+#ifdef CONFIG_NF_NAT_NEEDED
+ nat->masq_index = out->ifindex;
+#else
ct->nat.masq_index = out->ifindex;
+#endif
write_unlock_bh(&masq_lock);
/* Transfer from original range. */
@@ -109,10 +127,17 @@ masquerade_target(struct sk_buff **pskb,
static inline int
device_cmp(struct ip_conntrack *i, void *ifindex)
{
+#ifdef CONFIG_NF_NAT_NEEDED
+ struct nf_conn_nat *nat = nfct_nat(i);
+#endif
int ret;
read_lock_bh(&masq_lock);
+#ifdef CONFIG_NF_NAT_NEEDED
+ ret = (nat->masq_index == (int)(long)ifindex);
+#else
ret = (i->nat.masq_index == (int)(long)ifindex);
+#endif
read_unlock_bh(&masq_lock);
return ret;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 58a88f22710..9390e90f2b2 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -15,7 +15,11 @@
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
+#ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat_rule.h>
+#else
#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#endif
#define MODULENAME "NETMAP"
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c0dcfe9d610..462eceb3a1b 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -1,6 +1,6 @@
/* Redirect. Simple mapping which alters dst to a local IP address. */
/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -18,7 +18,11 @@
#include <net/protocol.h>
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
+#ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat_rule.h>
+#else
#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#endif
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index ad0312d0e4f..f0319e5ee43 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -76,7 +76,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
/* This packet will not be the same as the other: clear nf fields */
nf_reset(nskb);
- nskb->nfmark = 0;
+ nskb->mark = 0;
skb_init_secmark(nskb);
tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
@@ -114,6 +114,14 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->window = 0;
tcph->urg_ptr = 0;
+ /* Adjust TCP checksum */
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
+ nskb->nh.iph->saddr,
+ nskb->nh.iph->daddr,
+ csum_partial((char *)tcph,
+ sizeof(struct tcphdr), 0));
+
/* Set DF, id = 0 */
nskb->nh.iph->frag_off = htons(IP_DF);
nskb->nh.iph->id = 0;
@@ -129,14 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (ip_route_me_harder(&nskb, addr_type))
goto free_nskb;
- /* Adjust TCP checksum */
nskb->ip_summed = CHECKSUM_NONE;
- tcph->check = 0;
- tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
- nskb->nh.iph->saddr,
- nskb->nh.iph->daddr,
- csum_partial((char *)tcph,
- sizeof(struct tcphdr), 0));
+
/* Adjust IP TTL */
nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index b38b13328d7..3dcf2941133 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -34,7 +34,11 @@
#include <net/protocol.h>
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
+#ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat_rule.h>
+#else
#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#endif
#include <linux/netfilter_ipv4/ipt_SAME.h>
MODULE_LICENSE("GPL");
@@ -152,11 +156,17 @@ same_target(struct sk_buff **pskb,
Here we calculate the index in same->iparray which
holds the ipaddress we should use */
+#ifdef CONFIG_NF_NAT_NEEDED
+ tmpip = ntohl(t->src.u3.ip);
+
+ if (!(same->info & IPT_SAME_NODST))
+ tmpip += ntohl(t->dst.u3.ip);
+#else
tmpip = ntohl(t->src.ip);
if (!(same->info & IPT_SAME_NODST))
tmpip += ntohl(t->dst.ip);
-
+#endif
aindex = tmpip % same->ipnum;
new_ip = htonl(same->iparray[aindex]);
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 108b6b76311..93eb5c3c188 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -97,10 +97,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
opt[i+2] = (newmss & 0xff00) >> 8;
opt[i+3] = (newmss & 0x00ff);
- tcph->check = nf_proto_csum_update(*pskb,
- htons(oldmss)^htons(0xFFFF),
- htons(newmss),
- tcph->check, 0);
+ nf_proto_csum_replace2(&tcph->check, *pskb,
+ htons(oldmss), htons(newmss), 0);
return IPT_CONTINUE;
}
}
@@ -126,28 +124,22 @@ ipt_tcpmss_target(struct sk_buff **pskb,
opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
- tcph->check = nf_proto_csum_update(*pskb,
- htons(tcplen) ^ htons(0xFFFF),
- htons(tcplen + TCPOLEN_MSS),
- tcph->check, 1);
+ nf_proto_csum_replace2(&tcph->check, *pskb,
+ htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = (newmss & 0x00ff);
- tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt),
- tcph->check, 0);
+ nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
- tcph->check = nf_proto_csum_update(*pskb,
- oldval ^ htons(0xFFFF),
- ((__be16 *)tcph)[6],
- tcph->check, 0);
+ nf_proto_csum_replace2(&tcph->check, *pskb,
+ oldval, ((__be16 *)tcph)[6], 0);
newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
- iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF),
- newtotlen, iph->check);
+ nf_csum_replace2(&iph->check, iph->tot_len, newtotlen);
iph->tot_len = newtotlen;
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 83b80b3a5d2..18e74ac4d42 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -30,16 +30,15 @@ target(struct sk_buff **pskb,
{
const struct ipt_tos_target_info *tosinfo = targinfo;
struct iphdr *iph = (*pskb)->nh.iph;
- u_int16_t oldtos;
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
+ __u8 oldtos;
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
iph = (*pskb)->nh.iph;
oldtos = iph->tos;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
- iph->check = nf_csum_update(htons(oldtos) ^ htons(0xFFFF),
- htons(iph->tos), iph->check);
+ nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
}
return IPT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index ac9517d62af..fffe5ca82e9 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -54,9 +54,8 @@ ipt_ttl_target(struct sk_buff **pskb,
}
if (new_ttl != iph->ttl) {
- iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF),
- htons(new_ttl << 8),
- iph->check);
+ nf_csum_replace2(&iph->check, htons(iph->ttl << 8),
+ htons(new_ttl << 8));
iph->ttl = new_ttl;
}
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2b104ea54f4..dbd34783a64 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -239,7 +239,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
pm->data_len = copy_len;
pm->timestamp_sec = skb->tstamp.off_sec;
pm->timestamp_usec = skb->tstamp.off_usec;
- pm->mark = skb->nfmark;
+ pm->mark = skb->mark;
pm->hook = hooknum;
if (prefix != NULL)
strncpy(pm->prefix, prefix, sizeof(pm->prefix));
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 126db44e71a..4db0e73c56f 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -401,7 +401,7 @@ static int recent_seq_open(struct inode *inode, struct file *file)
static ssize_t recent_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
- struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode);
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
struct recent_table *t = pde->data;
struct recent_entry *e;
char buf[sizeof("+255.255.255.255")], *c = buf;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index b91f3582359..af293988944 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -132,7 +132,7 @@ ipt_local_hook(unsigned int hook,
unsigned int ret;
u_int8_t tos;
__be32 saddr, daddr;
- unsigned long nfmark;
+ u_int32_t mark;
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
@@ -143,7 +143,7 @@ ipt_local_hook(unsigned int hook,
}
/* Save things which could affect route */
- nfmark = (*pskb)->nfmark;
+ mark = (*pskb)->mark;
saddr = (*pskb)->nh.iph->saddr;
daddr = (*pskb)->nh.iph->daddr;
tos = (*pskb)->nh.iph->tos;
@@ -153,9 +153,7 @@ ipt_local_hook(unsigned int hook,
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
&& ((*pskb)->nh.iph->saddr != saddr
|| (*pskb)->nh.iph->daddr != daddr
-#ifdef CONFIG_IP_ROUTE_FWMARK
- || (*pskb)->nfmark != nfmark
-#endif
+ || (*pskb)->mark != mark
|| (*pskb)->nh.iph->tos != tos))
if (ip_route_me_harder(pskb, RTN_UNSPEC))
ret = NF_DROP;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 0af803df82b..471b638cede 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -27,7 +27,7 @@
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
@@ -38,12 +38,10 @@
#define DEBUGP(format, args...)
#endif
-DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
-
static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
- u_int32_t _addrs[2], *ap;
+ __be32 _addrs[2], *ap;
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
@@ -113,10 +111,12 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
return NF_ACCEPT;
}
-int nat_module_is_loaded = 0;
+int nf_nat_module_is_loaded = 0;
+EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded);
+
static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
{
- if (nat_module_is_loaded)
+ if (nf_nat_module_is_loaded)
return NF_CT_F_NAT;
return NF_CT_F_BASIC;
@@ -268,43 +268,59 @@ static struct nf_hook_ops ipv4_conntrack_ops[] = {
},
};
-#ifdef CONFIG_SYSCTL
-/* From nf_conntrack_proto_icmp.c */
-extern unsigned int nf_ct_icmp_timeout;
-static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
-static ctl_table nf_ct_sysctl_table[] = {
+static ctl_table ip_ct_sysctl_table[] = {
{
- .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT,
- .procname = "nf_conntrack_icmp_timeout",
- .data = &nf_ct_icmp_timeout,
- .maxlen = sizeof(unsigned int),
+ .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
+ .procname = "ip_conntrack_max",
+ .data = &nf_conntrack_max,
+ .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
+ .proc_handler = &proc_dointvec,
},
- { .ctl_name = 0 }
-};
-
-static ctl_table nf_ct_netfilter_table[] = {
{
- .ctl_name = NET_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = nf_ct_sysctl_table,
+ .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
+ .procname = "ip_conntrack_count",
+ .data = &nf_conntrack_count,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
+ .procname = "ip_conntrack_buckets",
+ .data = &nf_conntrack_htable_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
+ .procname = "ip_conntrack_checksum",
+ .data = &nf_conntrack_checksum,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
},
- { .ctl_name = 0 }
-};
-
-static ctl_table nf_ct_net_table[] = {
{
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = nf_ct_netfilter_table,
+ .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
+ .procname = "ip_conntrack_log_invalid",
+ .data = &nf_ct_log_invalid,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
},
- { .ctl_name = 0 }
+ {
+ .ctl_name = 0
+ }
};
-#endif
+#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
/* Fast function for those who don't want to parse /proc (and I don't
blame them). */
@@ -396,10 +412,8 @@ static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
return -EINVAL;
- t->src.u3.ip =
- *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
- t->dst.u3.ip =
- *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+ t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+ t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
return 0;
}
@@ -426,14 +440,15 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.tuple_to_nfattr = ipv4_tuple_to_nfattr,
.nfattr_to_tuple = ipv4_nfattr_to_tuple,
#endif
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path,
+ .ctl_table = ip_ct_sysctl_table,
+#endif
.me = THIS_MODULE,
};
-extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
-extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
-extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
-
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
static int __init nf_conntrack_l3proto_ipv4_init(void)
@@ -448,19 +463,19 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
return ret;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register tcp.\n");
goto cleanup_sockopt;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register udp.\n");
goto cleanup_tcp;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
if (ret < 0) {
printk("nf_conntrack_ipv4: can't register icmp.\n");
goto cleanup_udp;
@@ -478,28 +493,24 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
printk("nf_conntrack_ipv4: can't register hooks.\n");
goto cleanup_ipv4;
}
-#ifdef CONFIG_SYSCTL
- nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
- if (nf_ct_ipv4_sysctl_header == NULL) {
- printk("nf_conntrack: can't register to sysctl.\n");
- ret = -ENOMEM;
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ ret = nf_conntrack_ipv4_compat_init();
+ if (ret < 0)
goto cleanup_hooks;
- }
#endif
return ret;
-
-#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
cleanup_hooks:
- nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
#endif
cleanup_ipv4:
nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
cleanup_icmp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
cleanup_udp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
cleanup_tcp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
return ret;
@@ -508,18 +519,16 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
static void __exit nf_conntrack_l3proto_ipv4_fini(void)
{
synchronize_net();
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ nf_conntrack_ipv4_compat_fini();
#endif
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
nf_unregister_sockopt(&so_getorigdst);
}
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);
-
-EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
new file mode 100644
index 00000000000..3b31bc64960
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -0,0 +1,412 @@
+/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#ifdef CONFIG_NF_CT_ACCT
+static unsigned int
+seq_print_counters(struct seq_file *s,
+ const struct ip_conntrack_counter *counter)
+{
+ return seq_printf(s, "packets=%llu bytes=%llu ",
+ (unsigned long long)counter->packets,
+ (unsigned long long)counter->bytes);
+}
+#else
+#define seq_print_counters(x, y) 0
+#endif
+
+struct ct_iter_state {
+ unsigned int bucket;
+};
+
+static struct list_head *ct_get_first(struct seq_file *seq)
+{
+ struct ct_iter_state *st = seq->private;
+
+ for (st->bucket = 0;
+ st->bucket < nf_conntrack_htable_size;
+ st->bucket++) {
+ if (!list_empty(&nf_conntrack_hash[st->bucket]))
+ return nf_conntrack_hash[st->bucket].next;
+ }
+ return NULL;
+}
+
+static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+{
+ struct ct_iter_state *st = seq->private;
+
+ head = head->next;
+ while (head == &nf_conntrack_hash[st->bucket]) {
+ if (++st->bucket >= nf_conntrack_htable_size)
+ return NULL;
+ head = nf_conntrack_hash[st->bucket].next;
+ }
+ return head;
+}
+
+static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct list_head *head = ct_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock_bh(&nf_conntrack_lock);
+ return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
+ read_unlock_bh(&nf_conntrack_lock);
+}
+
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+ const struct nf_conntrack_tuple_hash *hash = v;
+ const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+
+ NF_CT_ASSERT(ct);
+
+ /* we only want to print DIR_ORIGINAL */
+ if (NF_CT_DIRECTION(hash))
+ return 0;
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET)
+ return 0;
+
+ l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.src.l3num);
+ NF_CT_ASSERT(l3proto);
+ l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.src.l3num,
+ ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.protonum);
+ NF_CT_ASSERT(l4proto);
+
+ if (seq_printf(s, "%-8s %u %ld ",
+ l4proto->name,
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
+ return -ENOSPC;
+
+ if (l3proto->print_conntrack(s, ct))
+ return -ENOSPC;
+
+ if (l4proto->print_conntrack(s, ct))
+ return -ENOSPC;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto))
+ return -ENOSPC;
+
+ if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL]))
+ return -ENOSPC;
+
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
+ if (seq_printf(s, "[UNREPLIED] "))
+ return -ENOSPC;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto))
+ return -ENOSPC;
+
+ if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY]))
+ return -ENOSPC;
+
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (seq_printf(s, "[ASSURED] "))
+ return -ENOSPC;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (seq_printf(s, "mark=%u ", ct->mark))
+ return -ENOSPC;
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (seq_printf(s, "secmark=%u ", ct->secmark))
+ return -ENOSPC;
+#endif
+
+ if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ return -ENOSPC;
+
+ return 0;
+}
+
+static struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ struct ct_iter_state *st;
+ int ret;
+
+ st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
+ if (st == NULL)
+ return -ENOMEM;
+ ret = seq_open(file, &ct_seq_ops);
+ if (ret)
+ goto out_free;
+ seq = file->private_data;
+ seq->private = st;
+ memset(st, 0, sizeof(struct ct_iter_state));
+ return ret;
+out_free:
+ kfree(st);
+ return ret;
+}
+
+static struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/* expects */
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct list_head *e = &nf_conntrack_expect_list;
+ loff_t i;
+
+ /* strange seq_file api calls stop even if we fail,
+ * thus we need to grab lock since stop unlocks */
+ read_lock_bh(&nf_conntrack_lock);
+
+ if (list_empty(e))
+ return NULL;
+
+ for (i = 0; i <= *pos; i++) {
+ e = e->next;
+ if (e == &nf_conntrack_expect_list)
+ return NULL;
+ }
+ return e;
+}
+
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct list_head *e = v;
+
+ ++*pos;
+ e = e->next;
+
+ if (e == &nf_conntrack_expect_list)
+ return NULL;
+
+ return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
+ read_unlock_bh(&nf_conntrack_lock);
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_expect *exp = v;
+
+ if (exp->tuple.src.l3num != AF_INET)
+ return 0;
+
+ if (exp->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&exp->timeout)
+ ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+
+ seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
+
+ print_tuple(s, &exp->tuple,
+ __nf_ct_l3proto_find(exp->tuple.src.l3num),
+ __nf_ct_l4proto_find(exp->tuple.src.l3num,
+ exp->tuple.dst.protonum));
+ return seq_putc(s, '\n');
+}
+
+static struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &exp_seq_ops);
+}
+
+static struct file_operations ip_exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return &per_cpu(nf_conntrack_stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ int cpu;
+
+ for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return &per_cpu(nf_conntrack_stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
+ struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x \n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete
+ );
+ return 0;
+}
+
+static struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ct_cpu_seq_ops);
+}
+
+static struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+
+ proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
+ if (!proc)
+ goto err1;
+
+ proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+ &ip_exp_file_ops);
+ if (!proc_exp)
+ goto err2;
+
+ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+ if (!proc_stat)
+ goto err3;
+
+ proc_stat->proc_fops = &ct_cpu_seq_fops;
+ proc_stat->owner = THIS_MODULE;
+
+ return 0;
+
+err3:
+ proc_net_remove("ip_conntrack_expect");
+err2:
+ proc_net_remove("ip_conntrack");
+err1:
+ return -ENOMEM;
+}
+
+void __exit nf_conntrack_ipv4_compat_fini(void)
+{
+ remove_proc_entry("ip_conntrack", proc_net_stat);
+ proc_net_remove("ip_conntrack_expect");
+ proc_net_remove("ip_conntrack");
+}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 790f00d500c..db9e7c45d3b 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,10 +22,10 @@
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
+static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
@@ -152,7 +152,7 @@ icmp_error_message(struct sk_buff *skb,
struct icmphdr icmp;
struct iphdr ip;
} _in, *inside;
- struct nf_conntrack_protocol *innerproto;
+ struct nf_conntrack_l4proto *innerproto;
struct nf_conntrack_tuple_hash *h;
int dataoff;
@@ -170,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
- innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
+ innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -311,7 +311,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
tuple->dst.u.icmp.code =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
tuple->src.u.icmp.id =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
if (tuple->dst.u.icmp.type >= sizeof(invmap)
|| !invmap[tuple->dst.u.icmp.type])
@@ -321,11 +321,42 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
}
#endif
-struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *icmp_sysctl_header;
+static struct ctl_table icmp_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT,
+ .procname = "nf_conntrack_icmp_timeout",
+ .data = &nf_ct_icmp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table icmp_compat_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
+ .procname = "ip_conntrack_icmp_timeout",
+ .data = &nf_ct_icmp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
- .list = { NULL, NULL },
.l3proto = PF_INET,
- .proto = IPPROTO_ICMP,
+ .l4proto = IPPROTO_ICMP,
.name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
@@ -341,6 +372,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
.tuple_to_nfattr = icmp_tuple_to_nfattr,
.nfattr_to_tuple = icmp_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_header = &icmp_sysctl_header,
+ .ctl_table = icmp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = icmp_compat_sysctl_table,
+#endif
+#endif
};
-
-EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_icmp);
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
new file mode 100644
index 00000000000..0f17098917b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -0,0 +1,78 @@
+/* Amanda extension for TCP NAT alteration.
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on a copy of HW's ip_nat_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_amanda");
+
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+{
+ char buffer[sizeof("65535")];
+ u_int16_t port;
+ unsigned int ret;
+
+ /* Connection comes from client. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_ORIGINAL;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one (ie. same IP: it will be TCP and master is UDP). */
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ sprintf(buffer, "%u", port);
+ ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+ matchoff, matchlen,
+ buffer, strlen(buffer));
+ if (ret != NF_ACCEPT)
+ nf_conntrack_unexpect_related(exp);
+ return ret;
+}
+
+static void __exit nf_nat_amanda_fini(void)
+{
+ rcu_assign_pointer(nf_nat_amanda_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_amanda_init(void)
+{
+ BUG_ON(rcu_dereference(nf_nat_amanda_hook));
+ rcu_assign_pointer(nf_nat_amanda_hook, help);
+ return 0;
+}
+
+module_init(nf_nat_amanda_init);
+module_exit(nf_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
new file mode 100644
index 00000000000..86a92272b05
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -0,0 +1,647 @@
+/* NAT for netfilter; shared with compatibility layer. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h> /* For tcp_prot in getorigdst */
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <linux/jhash.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static DEFINE_RWLOCK(nf_nat_lock);
+
+static struct nf_conntrack_l3proto *l3proto = NULL;
+
+/* Calculated at init based on memory size */
+static unsigned int nf_nat_htable_size;
+
+static struct list_head *bysource;
+
+#define MAX_IP_NAT_PROTO 256
+static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
+
+static inline struct nf_nat_protocol *
+__nf_nat_proto_find(u_int8_t protonum)
+{
+ return nf_nat_protos[protonum];
+}
+
+struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+ struct nf_nat_protocol *p;
+
+ /* we need to disable preemption to make sure 'p' doesn't get
+ * removed until we've grabbed the reference */
+ preempt_disable();
+ p = __nf_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &nf_nat_unknown_protocol;
+ preempt_enable();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
+
+void
+nf_nat_proto_put(struct nf_nat_protocol *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_put);
+
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct nf_conntrack_tuple *tuple)
+{
+ /* Original src, to ensure we map it consistently if poss. */
+ return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all,
+ tuple->dst.protonum, 0) % nf_nat_htable_size;
+}
+
+/* Noone using conntrack by the time this called. */
+static void nf_nat_cleanup_conntrack(struct nf_conn *conn)
+{
+ struct nf_conn_nat *nat;
+ if (!(conn->status & IPS_NAT_DONE_MASK))
+ return;
+
+ nat = nfct_nat(conn);
+ write_lock_bh(&nf_nat_lock);
+ list_del(&nat->info.bysource);
+ write_unlock_bh(&nf_nat_lock);
+}
+
+/* Is this tuple already taken? (not by us) */
+int
+nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_conntrack)
+{
+ /* Conntrack tracking doesn't keep track of outgoing tuples; only
+ incoming ones. NAT means they don't have a fixed mapping,
+ so we invert the tuple and look for the incoming reply.
+
+ We could keep a separate hash if this proves too slow. */
+ struct nf_conntrack_tuple reply;
+
+ nf_ct_invert_tuplepr(&reply, tuple);
+ return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+EXPORT_SYMBOL(nf_nat_used_tuple);
+
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range. */
+static int
+in_range(const struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range)
+{
+ struct nf_nat_protocol *proto;
+
+ proto = __nf_nat_proto_find(tuple->dst.protonum);
+ /* If we are supposed to map IPs, then we must be in the
+ range specified, otherwise let this drag us onto a new src IP. */
+ if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+ if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
+ ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
+ return 0;
+ }
+
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
+ proto->in_range(tuple, IP_NAT_MANIP_SRC,
+ &range->min, &range->max))
+ return 1;
+
+ return 0;
+}
+
+static inline int
+same_src(const struct nf_conn *ct,
+ const struct nf_conntrack_tuple *tuple)
+{
+ const struct nf_conntrack_tuple *t;
+
+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ return (t->dst.protonum == tuple->dst.protonum &&
+ t->src.u3.ip == tuple->src.u3.ip &&
+ t->src.u.all == tuple->src.u.all);
+}
+
+/* Only called for SRC manip */
+static int
+find_appropriate_src(const struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *result,
+ const struct nf_nat_range *range)
+{
+ unsigned int h = hash_by_src(tuple);
+ struct nf_conn_nat *nat;
+ struct nf_conn *ct;
+
+ read_lock_bh(&nf_nat_lock);
+ list_for_each_entry(nat, &bysource[h], info.bysource) {
+ ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data));
+ if (same_src(ct, tuple)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(result, range)) {
+ read_unlock_bh(&nf_nat_lock);
+ return 1;
+ }
+ }
+ }
+ read_unlock_bh(&nf_nat_lock);
+ return 0;
+}
+
+/* For [FUTURE] fragmentation handling, we want the least-used
+ src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
+ if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+ 1-65535, we don't do pro-rata allocation based on ports; we choose
+ the ip with the lowest src-ip/dst-ip/proto usage.
+*/
+static void
+find_best_ips_proto(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ const struct nf_conn *ct,
+ enum nf_nat_manip_type maniptype)
+{
+ __be32 *var_ipp;
+ /* Host order */
+ u_int32_t minip, maxip, j;
+
+ /* No IP mapping? Do nothing. */
+ if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+ return;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ var_ipp = &tuple->src.u3.ip;
+ else
+ var_ipp = &tuple->dst.u3.ip;
+
+ /* Fast path: only one choice. */
+ if (range->min_ip == range->max_ip) {
+ *var_ipp = range->min_ip;
+ return;
+ }
+
+ /* Hashing source and destination IPs gives a fairly even
+ * spread in practice (if there are a small number of IPs
+ * involved, there usually aren't that many connections
+ * anyway). The consistency means that servers see the same
+ * client coming from the same IP (some Internet Banking sites
+ * like this), even across reboots. */
+ minip = ntohl(range->min_ip);
+ maxip = ntohl(range->max_ip);
+ j = jhash_2words((__force u32)tuple->src.u3.ip,
+ (__force u32)tuple->dst.u3.ip, 0);
+ *var_ipp = htonl(minip + j % (maxip - minip + 1));
+}
+
+/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
+ * we change the source to map into the range. For NF_IP_PRE_ROUTING
+ * and NF_IP_LOCAL_OUT, we change the destination to map into the
+ * range. It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __ip_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig_tuple,
+ const struct nf_nat_range *range,
+ struct nf_conn *ct,
+ enum nf_nat_manip_type maniptype)
+{
+ struct nf_nat_protocol *proto;
+
+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
+ and that same mapping gives a unique tuple within the given
+ range, use that.
+
+ This is only required for source (ie. NAT/masq) mappings.
+ So far, we don't do local source mappings, so multiple
+ manips not an issue. */
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ if (find_appropriate_src(orig_tuple, tuple, range)) {
+ DEBUGP("get_unique_tuple: Found current src map\n");
+ if (!nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+ }
+
+ /* 2) Select the least-used IP/proto combination in the given
+ range. */
+ *tuple = *orig_tuple;
+ find_best_ips_proto(tuple, range, ct, maniptype);
+
+ /* 3) The per-protocol part of the manip is made to map into
+ the range to make a unique tuple. */
+
+ proto = nf_nat_proto_find_get(orig_tuple->dst.protonum);
+
+ /* Only bother mapping if it's not already in range and unique */
+ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
+ proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
+ !nf_nat_used_tuple(tuple, ct)) {
+ nf_nat_proto_put(proto);
+ return;
+ }
+
+ /* Last change: get protocol to try to obtain unique tuple. */
+ proto->unique_tuple(tuple, range, maniptype, ct);
+
+ nf_nat_proto_put(proto);
+}
+
+unsigned int
+nf_nat_setup_info(struct nf_conn *ct,
+ const struct nf_nat_range *range,
+ unsigned int hooknum)
+{
+ struct nf_conntrack_tuple curr_tuple, new_tuple;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+ struct nf_nat_info *info = &nat->info;
+ int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+ hooknum == NF_IP_POST_ROUTING ||
+ hooknum == NF_IP_LOCAL_IN ||
+ hooknum == NF_IP_LOCAL_OUT);
+ BUG_ON(nf_nat_initialized(ct, maniptype));
+
+ /* What we've got will look like inverse of reply. Normally
+ this is what is in the conntrack, except for prior
+ manipulations (future optimization: if num_manips == 0,
+ orig_tp =
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
+ nf_ct_invert_tuplepr(&curr_tuple,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+
+ if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+ struct nf_conntrack_tuple reply;
+
+ /* Alter conntrack table so will recognize replies. */
+ nf_ct_invert_tuplepr(&reply, &new_tuple);
+ nf_conntrack_alter_reply(ct, &reply);
+
+ /* Non-atomic: we own this at the moment. */
+ if (maniptype == IP_NAT_MANIP_SRC)
+ ct->status |= IPS_SRC_NAT;
+ else
+ ct->status |= IPS_DST_NAT;
+ }
+
+ /* Place in source hash if this is the first time. */
+ if (have_to_hash) {
+ unsigned int srchash;
+
+ srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ write_lock_bh(&nf_nat_lock);
+ list_add(&info->bysource, &bysource[srchash]);
+ write_unlock_bh(&nf_nat_lock);
+ }
+
+ /* It's done. */
+ if (maniptype == IP_NAT_MANIP_DST)
+ set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+ else
+ set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL(nf_nat_setup_info);
+
+/* Returns true if succeeded. */
+static int
+manip_pkt(u_int16_t proto,
+ struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ struct nf_nat_protocol *p;
+
+ if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
+ return 0;
+
+ iph = (void *)(*pskb)->data + iphdroff;
+
+ /* Manipulate protcol part. */
+ p = nf_nat_proto_find_get(proto);
+ if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
+ nf_nat_proto_put(p);
+ return 0;
+ }
+ nf_nat_proto_put(p);
+
+ iph = (void *)(*pskb)->data + iphdroff;
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return 1;
+}
+
+/* Do packet manipulations according to nf_nat_setup_info. */
+unsigned int nf_nat_packet(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned long statusbit;
+ enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
+
+ if (mtype == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ /* Non-atomic: these bits don't change. */
+ if (ct->status & statusbit) {
+ struct nf_conntrack_tuple target;
+
+ /* We are aiming to look like inverse of other direction. */
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+ if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_nat_packet);
+
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int nf_nat_icmp_reply_translation(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ struct nf_conntrack_tuple inner, target;
+ int hdrlen = (*pskb)->nh.iph->ihl * 4;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned long statusbit;
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+
+ if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
+ return 0;
+
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+
+ /* We're actually going to mangle it beyond trivial checksum
+ adjustment, so make sure the current checksum is correct. */
+ if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+ return 0;
+
+ /* Must be RELATED */
+ NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
+ (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+
+ /* Redirects on non-null nats must be dropped, else they'll
+ start talking to each other without our translation, and be
+ confused... --RR */
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ /* If NAT isn't finished, assume it and drop. */
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
+ *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
+
+ if (!nf_ct_get_tuple(*pskb,
+ (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
+ (*pskb)->nh.iph->ihl*4 +
+ sizeof(struct icmphdr) + inside->ip.ihl*4,
+ (u_int16_t)AF_INET,
+ inside->ip.protocol,
+ &inner,
+ l3proto,
+ __nf_ct_l4proto_find((u_int16_t)PF_INET,
+ inside->ip.protocol)))
+ return 0;
+
+ /* Change inner back to look like incoming packet. We do the
+ opposite manip on this hook to normal, because it might not
+ pass all hooks (locally-generated ICMP). Consider incoming
+ packet: PREROUTING (DST manip), routing produces ICMP, goes
+ through POSTROUTING (which must correct the DST manip). */
+ if (!manip_pkt(inside->ip.protocol, pskb,
+ (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
+ &ct->tuplehash[!dir].tuple,
+ !manip))
+ return 0;
+
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt inner. */
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(*pskb, hdrlen,
+ (*pskb)->len - hdrlen, 0));
+ }
+
+ /* Change outer to look the reply to an incoming packet
+ * (proto 0 means don't invert per-proto part). */
+ if (manip == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ if (!manip_pkt(0, pskb, 0, &target, manip))
+ return 0;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+/* Protocol registration. */
+int nf_nat_protocol_register(struct nf_nat_protocol *proto)
+{
+ int ret = 0;
+
+ write_lock_bh(&nf_nat_lock);
+ if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+ nf_nat_protos[proto->protonum] = proto;
+ out:
+ write_unlock_bh(&nf_nat_lock);
+ return ret;
+}
+EXPORT_SYMBOL(nf_nat_protocol_register);
+
+/* Noone stores the protocol anywhere; simply delete it. */
+void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
+{
+ write_lock_bh(&nf_nat_lock);
+ nf_nat_protos[proto->protonum] = &nf_nat_unknown_protocol;
+ write_unlock_bh(&nf_nat_lock);
+
+ /* Someone could be still looking at the proto in a bh. */
+ synchronize_net();
+}
+EXPORT_SYMBOL(nf_nat_protocol_unregister);
+
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+int
+nf_nat_port_range_to_nfattr(struct sk_buff *skb,
+ const struct nf_nat_range *range)
+{
+ NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
+ &range->min.tcp.port);
+ NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
+ &range->max.tcp.port);
+
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range);
+
+int
+nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range)
+{
+ int ret = 0;
+
+ /* we have to return whether we actually parsed something or not */
+
+ if (tb[CTA_PROTONAT_PORT_MIN-1]) {
+ ret = 1;
+ range->min.tcp.port =
+ *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
+ }
+
+ if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
+ if (ret)
+ range->max.tcp.port = range->min.tcp.port;
+ } else {
+ ret = 1;
+ range->max.tcp.port =
+ *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
+#endif
+
+static int __init nf_nat_init(void)
+{
+ size_t i;
+
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ /* One vmalloc for both hash tables */
+ bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size);
+ if (!bysource)
+ return -ENOMEM;
+
+ /* Sew in builtin protocols. */
+ write_lock_bh(&nf_nat_lock);
+ for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+ nf_nat_protos[i] = &nf_nat_unknown_protocol;
+ nf_nat_protos[IPPROTO_TCP] = &nf_nat_protocol_tcp;
+ nf_nat_protos[IPPROTO_UDP] = &nf_nat_protocol_udp;
+ nf_nat_protos[IPPROTO_ICMP] = &nf_nat_protocol_icmp;
+ write_unlock_bh(&nf_nat_lock);
+
+ for (i = 0; i < nf_nat_htable_size; i++) {
+ INIT_LIST_HEAD(&bysource[i]);
+ }
+
+ /* FIXME: Man, this is a hack. <SIGH> */
+ NF_CT_ASSERT(nf_conntrack_destroyed == NULL);
+ nf_conntrack_destroyed = &nf_nat_cleanup_conntrack;
+
+ /* Initialize fake conntrack so that NAT will skip it */
+ nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+
+ l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
+ return 0;
+}
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct nf_conn *i, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
+ memset(nat, 0, sizeof(nat));
+ i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+ return 0;
+}
+
+static void __exit nf_nat_cleanup(void)
+{
+ nf_ct_iterate_cleanup(&clean_nat, NULL);
+ nf_conntrack_destroyed = NULL;
+ vfree(bysource);
+ nf_ct_l3proto_put(l3proto);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(nf_nat_init);
+module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
new file mode 100644
index 00000000000..751b5980175
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -0,0 +1,179 @@
+/* FTP extension for TCP NAT alteration. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp NAT helper");
+MODULE_ALIAS("ip_nat_ftp");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* FIXME: Time out? --RR */
+
+static int
+mangle_rfc959_packet(struct sk_buff **pskb,
+ __be32 newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
+{
+ char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
+
+ sprintf(buffer, "%u,%u,%u,%u,%u,%u",
+ NIPQUAD(newip), port>>8, port&0xFF);
+
+ DEBUGP("calling nf_nat_mangle_tcp_packet\n");
+
+ *seq += strlen(buffer) - matchlen;
+ return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_eprt_packet(struct sk_buff **pskb,
+ __be32 newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
+{
+ char buffer[sizeof("|1|255.255.255.255|65535|")];
+
+ sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
+
+ DEBUGP("calling nf_nat_mangle_tcp_packet\n");
+
+ *seq += strlen(buffer) - matchlen;
+ return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_epsv_packet(struct sk_buff **pskb,
+ __be32 newip,
+ u_int16_t port,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
+{
+ char buffer[sizeof("|||65535|")];
+
+ sprintf(buffer, "|||%u|", port);
+
+ DEBUGP("calling nf_nat_mangle_tcp_packet\n");
+
+ *seq += strlen(buffer) - matchlen;
+ return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ matchlen, buffer, strlen(buffer));
+}
+
+static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
+ unsigned int, unsigned int, struct nf_conn *,
+ enum ip_conntrack_info, u32 *seq)
+= {
+ [NF_CT_FTP_PORT] = mangle_rfc959_packet,
+ [NF_CT_FTP_PASV] = mangle_rfc959_packet,
+ [NF_CT_FTP_EPRT] = mangle_eprt_packet,
+ [NF_CT_FTP_EPSV] = mangle_epsv_packet
+};
+
+/* So, this packet has hit the connection tracking matching code.
+ Mangle it, and change the expectation to match the new version. */
+static unsigned int nf_nat_ftp(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum nf_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp,
+ u32 *seq)
+{
+ __be32 newip;
+ u_int16_t port;
+ int dir = CTINFO2DIR(ctinfo);
+ struct nf_conn *ct = exp->master;
+
+ DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+
+ /* Connection will come from wherever this packet goes, hence !dir */
+ newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = !dir;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one. */
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
+ seq)) {
+ nf_conntrack_unexpect_related(exp);
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+static void __exit nf_nat_ftp_fini(void)
+{
+ rcu_assign_pointer(nf_nat_ftp_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_ftp_init(void)
+{
+ BUG_ON(rcu_dereference(nf_nat_ftp_hook));
+ rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
+ return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+ printk(KERN_INFO KBUILD_MODNAME
+ ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(nf_nat_ftp_init);
+module_exit(nf_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
new file mode 100644
index 00000000000..fb9ab0114c2
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -0,0 +1,596 @@
+/*
+ * H.323 extension for NAT alteration.
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * Based on the 'brute force' H.323 NAT module by
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/****************************************************************************/
+static int set_addr(struct sk_buff **pskb,
+ unsigned char **data, int dataoff,
+ unsigned int addroff, __be32 ip, __be16 port)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo);
+ struct {
+ __be32 ip;
+ __be16 port;
+ } __attribute__ ((__packed__)) buf;
+ struct tcphdr _tcph, *th;
+
+ buf.ip = ip;
+ buf.port = port;
+ addroff += dataoff;
+
+ if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
+ if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ if (net_ratelimit())
+ printk("nf_nat_h323: nf_nat_mangle_tcp_packet"
+ " error\n");
+ return -1;
+ }
+
+ /* Relocate data pointer */
+ th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return -1;
+ *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+ th->doff * 4 + dataoff;
+ } else {
+ if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ if (net_ratelimit())
+ printk("nf_nat_h323: nf_nat_mangle_udp_packet"
+ " error\n");
+ return -1;
+ }
+ /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+ * or pull everything in a linear buffer, so we can safely
+ * use the skb pointers now */
+ *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+ sizeof(struct udphdr);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_h225_addr(struct sk_buff **pskb,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr,
+ union nf_conntrack_address *addr, __be16 port)
+{
+ return set_addr(pskb, data, dataoff, taddr->ipAddress.ip,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_h245_addr(struct sk_buff **pskb,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ union nf_conntrack_address *addr, __be16 port)
+{
+ return set_addr(pskb, data, dataoff,
+ taddr->unicastAddress.iPAddress.network,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_conntrack_address addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
+ if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GW->GK */
+
+ /* Fix for Gnomemeeting */
+ if (i > 0 &&
+ get_h225_addr(ct, *data, &taddr[0],
+ &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000)
+ i = 0;
+
+ DEBUGP
+ ("nf_nat_ras: set signal address "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(ip), port,
+ NIPQUAD(ct->tuplehash[!dir].tuple.dst.
+ ip), info->sig_port[!dir]);
+ return set_h225_addr(pskb, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.dst.u3,
+ info->sig_port[!dir]);
+ } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GK->GW */
+ DEBUGP
+ ("nf_nat_ras: set signal address "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(ip), port,
+ NIPQUAD(ct->tuplehash[!dir].tuple.src.
+ ip), info->sig_port[!dir]);
+ return set_h225_addr(pskb, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.src.u3,
+ info->sig_port[!dir]);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_conntrack_address addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+ addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == ct->tuplehash[dir].tuple.src.u.udp.port) {
+ DEBUGP("nf_nat_ras: set rasAddress "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(ip), ntohs(port),
+ NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
+ ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
+ port));
+ return set_h225_addr(pskb, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.
+ dst.u.udp.port);
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ __be16 port, __be16 rtp_port,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+ rtp_exp->expectfn = nf_nat_follow_master;
+ rtp_exp->dir = !dir;
+ rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+ rtcp_exp->expectfn = nf_nat_follow_master;
+ rtcp_exp->dir = !dir;
+
+ /* Lookup existing expects */
+ for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
+ if (info->rtp_port[i][dir] == rtp_port) {
+ /* Expected */
+
+ /* Use allocated ports first. This will refresh
+ * the expects */
+ rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir];
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(ntohs(info->rtp_port[i][dir]) + 1);
+ break;
+ } else if (info->rtp_port[i][dir] == 0) {
+ /* Not expected */
+ break;
+ }
+ }
+
+ /* Run out of expectations */
+ if (i >= H323_RTP_CHANNEL_MAX) {
+ if (net_ratelimit())
+ printk("nf_nat_h323: out of expectations\n");
+ return 0;
+ }
+
+ /* Try to get a pair of ports. */
+ for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+ nated_port != 0; nated_port += 2) {
+ rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
+ if (nf_conntrack_expect_related(rtp_exp) == 0) {
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(nated_port + 1);
+ if (nf_conntrack_expect_related(rtcp_exp) == 0)
+ break;
+ nf_conntrack_unexpect_related(rtp_exp);
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_h323: out of RTP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(pskb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons((port & htons(1)) ? nated_port + 1 :
+ nated_port)) == 0) {
+ /* Save ports */
+ info->rtp_port[i][dir] = rtp_port;
+ info->rtp_port[i][!dir] = htons(nated_port);
+ } else {
+ nf_conntrack_unexpect_related(rtp_exp);
+ nf_conntrack_unexpect_related(rtcp_exp);
+ return -1;
+ }
+
+ /* Success */
+ DEBUGP("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(rtp_exp->tuple.src.ip),
+ ntohs(rtp_exp->tuple.src.u.udp.port),
+ NIPQUAD(rtp_exp->tuple.dst.ip),
+ ntohs(rtp_exp->tuple.dst.u.udp.port));
+ DEBUGP("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(rtcp_exp->tuple.src.ip),
+ ntohs(rtcp_exp->tuple.src.u.udp.port),
+ NIPQUAD(rtcp_exp->tuple.dst.ip),
+ ntohs(rtcp_exp->tuple.dst.u.udp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_h323: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(pskb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) < 0) {
+ nf_conntrack_unexpect_related(exp);
+ return -1;
+ }
+
+ DEBUGP("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(pskb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+ } else {
+ nf_conntrack_unexpect_related(exp);
+ return -1;
+ }
+
+ DEBUGP("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************
+ * This conntrack expect function replaces nf_conntrack_q931_expect()
+ * which was set by nf_conntrack_h323.c.
+ ****************************************************************************/
+static void ip_nat_q931_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct ip_nat_range range;
+
+ if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
+ nf_nat_follow_master(new, this);
+ return;
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+
+ /* hook doesn't matter, but it has to do source manip */
+ nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = this->saved_proto;
+ range.min_ip = range.max_ip =
+ new->master->tuplehash[!this->dir].tuple.src.u3.ip;
+
+ /* hook doesn't matter, but it has to do destination manip */
+ nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
+}
+
+/****************************************************************************/
+static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, TransportAddress *taddr, int idx,
+ __be16 port, struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+ union nf_conntrack_address addr;
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_q931_expect;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_ras: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(pskb, data, 0, &taddr[idx],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
+ /* Fix for Gnomemeeting */
+ if (idx > 0 &&
+ get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+ set_h225_addr_hook(pskb, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ }
+ } else {
+ nf_conntrack_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ DEBUGP("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static void ip_nat_callforwarding_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+
+ /* hook doesn't matter, but it has to do source manip */
+ nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = this->saved_proto;
+ range.min_ip = range.max_ip = this->saved_ip;
+
+ /* hook doesn't matter, but it has to do destination manip */
+ nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
+}
+
+/****************************************************************************/
+static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ exp->saved_ip = exp->tuple.dst.u3.ip;
+ exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_callforwarding_expect;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (nated_port == 0) { /* No port available */
+ if (net_ratelimit())
+ printk("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (!set_h225_addr(pskb, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ nf_conntrack_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ DEBUGP("nf_nat_q931: expect Call Forwarding "
+ "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
+ NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
+ NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int __init init(void)
+{
+ BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
+ BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
+
+ rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
+ rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
+ rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
+ rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
+ rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ rcu_assign_pointer(nat_t120_hook, nat_t120);
+ rcu_assign_pointer(nat_h245_hook, nat_h245);
+ rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
+ rcu_assign_pointer(nat_q931_hook, nat_q931);
+
+ DEBUGP("nf_nat_h323: init success\n");
+ return 0;
+}
+
+/****************************************************************************/
+static void __exit fini(void)
+{
+ rcu_assign_pointer(set_h245_addr_hook, NULL);
+ rcu_assign_pointer(set_h225_addr_hook, NULL);
+ rcu_assign_pointer(set_sig_addr_hook, NULL);
+ rcu_assign_pointer(set_ras_addr_hook, NULL);
+ rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
+ rcu_assign_pointer(nat_t120_hook, NULL);
+ rcu_assign_pointer(nat_h245_hook, NULL);
+ rcu_assign_pointer(nat_callforwarding_hook, NULL);
+ rcu_assign_pointer(nat_q931_hook, NULL);
+ synchronize_rcu();
+}
+
+/****************************************************************************/
+module_init(init);
+module_exit(fini);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_h323");
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
new file mode 100644
index 00000000000..98fbfc84d18
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -0,0 +1,433 @@
+/* ip_nat_helper.c - generic support functions for NAT helpers
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+#if 0
+#define DEBUGP printk
+#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
+#else
+#define DEBUGP(format, args...)
+#define DUMP_OFFSET(x)
+#endif
+
+static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
+
+/* Setup TCP sequence correction given this change at this sequence */
+static inline void
+adjust_tcp_sequence(u32 seq,
+ int sizediff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir;
+ struct nf_nat_seq *this_way, *other_way;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n",
+ (*skb)->len, new_size);
+
+ dir = CTINFO2DIR(ctinfo);
+
+ this_way = &nat->info.seq[dir];
+ other_way = &nat->info.seq[!dir];
+
+ DEBUGP("nf_nat_resize_packet: Seq_offset before: ");
+ DUMP_OFFSET(this_way);
+
+ spin_lock_bh(&nf_nat_seqofs_lock);
+
+ /* SYN adjust. If it's uninitialized, or this is after last
+ * correction, record it: we don't handle more than one
+ * adjustment in the window, but do deal with common case of a
+ * retransmit */
+ if (this_way->offset_before == this_way->offset_after ||
+ before(this_way->correction_pos, seq)) {
+ this_way->correction_pos = seq;
+ this_way->offset_before = this_way->offset_after;
+ this_way->offset_after += sizediff;
+ }
+ spin_unlock_bh(&nf_nat_seqofs_lock);
+
+ DEBUGP("nf_nat_resize_packet: Seq_offset after: ");
+ DUMP_OFFSET(this_way);
+}
+
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+ unsigned int dataoff,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ unsigned char *data;
+
+ BUG_ON(skb_is_nonlinear(skb));
+ data = (unsigned char *)skb->nh.iph + dataoff;
+
+ /* move post-replacement */
+ memmove(data + match_offset + rep_len,
+ data + match_offset + match_len,
+ skb->tail - (data + match_offset + match_len));
+
+ /* insert data from buffer */
+ memcpy(data + match_offset, rep_buffer, rep_len);
+
+ /* update skb info */
+ if (rep_len > match_len) {
+ DEBUGP("nf_nat_mangle_packet: Extending packet by "
+ "%u from %u bytes\n", rep_len - match_len,
+ skb->len);
+ skb_put(skb, rep_len - match_len);
+ } else {
+ DEBUGP("nf_nat_mangle_packet: Shrinking packet from "
+ "%u from %u bytes\n", match_len - rep_len,
+ skb->len);
+ __skb_trim(skb, skb->len + rep_len - match_len);
+ }
+
+ /* fix IP hdr checksum information */
+ skb->nh.iph->tot_len = htons(skb->len);
+ ip_send_check(skb->nh.iph);
+}
+
+/* Unusual, but possible case. */
+static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+{
+ struct sk_buff *nskb;
+
+ if ((*pskb)->len + extra > 65535)
+ return 0;
+
+ nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
+ if (!nskb)
+ return 0;
+
+ /* Transfer socket to new skb. */
+ if ((*pskb)->sk)
+ skb_set_owner_w(nskb, (*pskb)->sk);
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ return 1;
+}
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+int
+nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int oldlen, datalen;
+
+ if (!skb_make_writable(pskb, (*pskb)->len))
+ return 0;
+
+ if (rep_len > match_len &&
+ rep_len - match_len > skb_tailroom(*pskb) &&
+ !enlarge_skb(pskb, rep_len - match_len))
+ return 0;
+
+ SKB_LINEAR_ASSERT(*pskb);
+
+ iph = (*pskb)->nh.iph;
+ tcph = (void *)iph + iph->ihl*4;
+
+ oldlen = (*pskb)->len - iph->ihl*4;
+ mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+ match_offset, match_len, rep_buffer, rep_len);
+
+ datalen = (*pskb)->len - iph->ihl*4;
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, datalen,
+ iph->saddr, iph->daddr,
+ csum_partial((char *)tcph,
+ datalen, 0));
+ } else
+ nf_proto_csum_replace2(&tcph->check, *pskb,
+ htons(oldlen), htons(datalen), 1);
+
+ if (rep_len != match_len) {
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+ adjust_tcp_sequence(ntohl(tcph->seq),
+ (int)rep_len - (int)match_len,
+ ct, ctinfo);
+ /* Tell TCP window tracking about seq change */
+ nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4,
+ ct, CTINFO2DIR(ctinfo));
+ }
+ return 1;
+}
+EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
+ * should be fairly easy to do.
+ */
+int
+nf_nat_mangle_udp_packet(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int match_offset,
+ unsigned int match_len,
+ const char *rep_buffer,
+ unsigned int rep_len)
+{
+ struct iphdr *iph;
+ struct udphdr *udph;
+ int datalen, oldlen;
+
+ /* UDP helpers might accidentally mangle the wrong packet */
+ iph = (*pskb)->nh.iph;
+ if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
+ match_offset + match_len)
+ return 0;
+
+ if (!skb_make_writable(pskb, (*pskb)->len))
+ return 0;
+
+ if (rep_len > match_len &&
+ rep_len - match_len > skb_tailroom(*pskb) &&
+ !enlarge_skb(pskb, rep_len - match_len))
+ return 0;
+
+ iph = (*pskb)->nh.iph;
+ udph = (void *)iph + iph->ihl*4;
+
+ oldlen = (*pskb)->len - iph->ihl*4;
+ mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+ match_offset, match_len, rep_buffer, rep_len);
+
+ /* update the length of the UDP packet */
+ datalen = (*pskb)->len - iph->ihl*4;
+ udph->len = htons(datalen);
+
+ /* fix udp checksum if udp checksum was previously calculated */
+ if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+ return 1;
+
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ udph->check = 0;
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ csum_partial((char *)udph,
+ datalen, 0));
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ } else
+ nf_proto_csum_replace2(&udph->check, *pskb,
+ htons(oldlen), htons(datalen), 1);
+
+ return 1;
+}
+EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
+
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct sk_buff *skb,
+ struct tcphdr *tcph,
+ unsigned int sackoff,
+ unsigned int sackend,
+ struct nf_nat_seq *natseq)
+{
+ while (sackoff < sackend) {
+ struct tcp_sack_block_wire *sack;
+ __be32 new_start_seq, new_end_seq;
+
+ sack = (void *)skb->data + sackoff;
+ if (after(ntohl(sack->start_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_start_seq = htonl(ntohl(sack->start_seq)
+ - natseq->offset_after);
+ else
+ new_start_seq = htonl(ntohl(sack->start_seq)
+ - natseq->offset_before);
+
+ if (after(ntohl(sack->end_seq) - natseq->offset_before,
+ natseq->correction_pos))
+ new_end_seq = htonl(ntohl(sack->end_seq)
+ - natseq->offset_after);
+ else
+ new_end_seq = htonl(ntohl(sack->end_seq)
+ - natseq->offset_before);
+
+ DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+ ntohl(sack->start_seq), new_start_seq,
+ ntohl(sack->end_seq), new_end_seq);
+
+ nf_proto_csum_replace4(&tcph->check, skb,
+ sack->start_seq, new_start_seq, 0);
+ nf_proto_csum_replace4(&tcph->check, skb,
+ sack->end_seq, new_end_seq, 0);
+ sack->start_seq = new_start_seq;
+ sack->end_seq = new_end_seq;
+ sackoff += sizeof(*sack);
+ }
+}
+
+/* TCP SACK sequence number adjustment */
+static inline unsigned int
+nf_nat_sack_adjust(struct sk_buff **pskb,
+ struct tcphdr *tcph,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dir, optoff, optend;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
+ optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+
+ if (!skb_make_writable(pskb, optend))
+ return 0;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ while (optoff < optend) {
+ /* Usually: option, length. */
+ unsigned char *op = (*pskb)->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ /* no partial options */
+ if (optoff + 1 == optend ||
+ optoff + op[1] > optend ||
+ op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_SACK &&
+ op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+ ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+ sack_adjust(*pskb, tcph, optoff+2,
+ optoff+op[1],
+ &nat->info.seq[!dir]);
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+
+/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
+int
+nf_nat_seq_adjust(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct tcphdr *tcph;
+ int dir;
+ __be32 newseq, newack;
+ struct nf_conn_nat *nat = nfct_nat(ct);
+ struct nf_nat_seq *this_way, *other_way;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ this_way = &nat->info.seq[dir];
+ other_way = &nat->info.seq[!dir];
+
+ if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ return 0;
+
+ tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ if (after(ntohl(tcph->seq), this_way->correction_pos))
+ newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
+ else
+ newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
+
+ if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+ newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
+ else
+ newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
+
+ nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
+ nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
+
+ DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+ ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+ ntohl(newack));
+
+ tcph->seq = newseq;
+ tcph->ack_seq = newack;
+
+ if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+ return 0;
+
+ nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir);
+
+ return 1;
+}
+EXPORT_SYMBOL(nf_nat_seq_adjust);
+
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void nf_nat_follow_master(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ /* hook doesn't matter, but it has to do source manip */
+ nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ /* hook doesn't matter, but it has to do destination manip */
+ nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
+EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
new file mode 100644
index 00000000000..9b8c0daea74
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -0,0 +1,101 @@
+/* IRC extension for TCP NAT alteration.
+ *
+ * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ * based on a copy of RR's ip_nat_ftp.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_irc");
+
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+{
+ char buffer[sizeof("4294967296 65635")];
+ u_int32_t ip;
+ u_int16_t port;
+ unsigned int ret;
+
+ DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
+ expect->seq, exp_irc_info->len, ntohl(tcph->seq));
+
+ /* Reply comes from server. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
+ sprintf(buffer, "%u %u", ip, port);
+ DEBUGP("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
+ buffer, NIPQUAD(ip), port);
+
+ ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+ matchoff, matchlen, buffer,
+ strlen(buffer));
+ if (ret != NF_ACCEPT)
+ nf_conntrack_unexpect_related(exp);
+ return ret;
+}
+
+static void __exit nf_nat_irc_fini(void)
+{
+ rcu_assign_pointer(nf_nat_irc_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_irc_init(void)
+{
+ BUG_ON(rcu_dereference(nf_nat_irc_hook));
+ rcu_assign_pointer(nf_nat_irc_hook, help);
+ return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+ printk(KERN_INFO KBUILD_MODNAME
+ ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+ return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(nf_nat_irc_init);
+module_exit(nf_nat_irc_fini);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
new file mode 100644
index 00000000000..0ae45b79a4e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -0,0 +1,315 @@
+/*
+ * nf_nat_pptp.c
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * (needs netfilter tuple reservation)
+ */
+
+#include <linux/module.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_NAT_PPTP_VERSION "3.0"
+
+#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+MODULE_ALIAS("ip_nat_pptp");
+
+#if 0
+extern const char *pptp_msg_name[];
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+ __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static void pptp_nat_expected(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_conn *master = ct->master;
+ struct nf_conntrack_expect *other_exp;
+ struct nf_conntrack_tuple t;
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+ struct ip_nat_range range;
+
+ ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
+ nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
+
+ /* And here goes the grand finale of corrosion... */
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ DEBUGP("we are PNS->PAC\n");
+ /* therefore, build tuple for PAC->PNS */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = ct_pptp_info->pac_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ } else {
+ DEBUGP("we are PAC->PNS\n");
+ /* build tuple for PNS->PAC */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = nat_pptp_info->pns_call_id;
+ t.dst.u3.ip = master->tuplehash[exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = nat_pptp_info->pac_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ }
+
+ DEBUGP("trying to unexpect other dir: ");
+ NF_CT_DUMP_TUPLE(&t);
+ other_exp = nf_conntrack_expect_find_get(&t);
+ if (other_exp) {
+ nf_conntrack_unexpect_related(other_exp);
+ nf_conntrack_expect_put(other_exp);
+ DEBUGP("success\n");
+ } else {
+ DEBUGP("not found!\n");
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.min = range.max = exp->saved_proto;
+ }
+ /* hook doesn't matter, but it has to do source manip */
+ nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ if (exp->dir == IP_CT_DIR_REPLY) {
+ range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.min = range.max = exp->saved_proto;
+ }
+ /* hook doesn't matter, but it has to do destination manip */
+ nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+
+{
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_callid;
+ unsigned int cid_off;
+
+ ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info;
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ new_callid = ct_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = new_callid;
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+ break;
+ default:
+ DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+ (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+ * down to here */
+ DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
+
+ /* mangle packet */
+ if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static void
+pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
+ struct nf_conntrack_expect *expect_reply)
+{
+ struct nf_conn *ct = expect_orig->master;
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+
+ ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info;
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ /* save original PAC call ID in nat_info */
+ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+ /* alter expectation for PNS->PAC direction */
+ expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+ expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+ expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
+
+ /* alter expectation for PAC->PNS direction */
+ expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+ expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+ expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ expect_reply->dir = IP_CT_DIR_REPLY;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+{
+ struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_pcid;
+ unsigned int pcid_off;
+
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+ new_pcid = nat_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REPLY:
+ pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+ break;
+ case PPTP_IN_CALL_CONNECT:
+ pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
+ break;
+ case PPTP_IN_CALL_REQUEST:
+ /* only need to nat in case PAC is behind NAT box */
+ return NF_ACCEPT;
+ case PPTP_WAN_ERROR_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
+ break;
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
+ break;
+ case PPTP_SET_LINK_INFO:
+ pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
+ break;
+ default:
+ DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
+ pptp_msg_name[msg]:pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+ * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+ /* mangle packet */
+ DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
+
+ if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ pcid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static int __init nf_nat_helper_pptp_init(void)
+{
+ nf_nat_need_gre();
+
+ BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound));
+ rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+
+ BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound));
+ rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+
+ BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre));
+ rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+
+ BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn));
+ rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ return 0;
+}
+
+static void __exit nf_nat_helper_pptp_fini(void)
+{
+ rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL);
+ rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL);
+ synchronize_rcu();
+}
+
+module_init(nf_nat_helper_pptp_init);
+module_exit(nf_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
new file mode 100644
index 00000000000..d3de579e09d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -0,0 +1,179 @@
+/*
+ * nf_nat_proto_gre.c
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+ __FUNCTION__, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* is key in given range between min and max */
+static int
+gre_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ __be16 key;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ key = tuple->src.u.gre.key;
+ else
+ key = tuple->dst.u.gre.key;
+
+ return ntohs(key) >= ntohs(min->gre.key) &&
+ ntohs(key) <= ntohs(max->gre.key);
+}
+
+/* generate unique tuple ... */
+static int
+gre_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *conntrack)
+{
+ static u_int16_t key;
+ __be16 *keyptr;
+ unsigned int min, i, range_size;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.gre.key;
+ else
+ keyptr = &tuple->dst.u.gre.key;
+
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ DEBUGP("%p: NATing GRE PPTP\n", conntrack);
+ min = 1;
+ range_size = 0xffff;
+ } else {
+ min = ntohs(range->min.gre.key);
+ range_size = ntohs(range->max.gre.key) - min + 1;
+ }
+
+ DEBUGP("min = %u, range_size = %u\n", min, range_size);
+
+ for (i = 0; i < range_size; i++, key++) {
+ *keyptr = htons(min + key % range_size);
+ if (!nf_nat_used_tuple(tuple, conntrack))
+ return 1;
+ }
+
+ DEBUGP("%p: no NAT mapping\n", conntrack);
+ return 0;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static int
+gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct gre_hdr *greh;
+ struct gre_hdr_pptp *pgreh;
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ unsigned int hdroff = iphdroff + iph->ihl * 4;
+
+ /* pgreh includes two optional 32bit fields which are not required
+ * to be there. That's where the magic '8' comes from */
+ if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8))
+ return 0;
+
+ greh = (void *)(*pskb)->data + hdroff;
+ pgreh = (struct gre_hdr_pptp *)greh;
+
+ /* we only have destination manip of a packet, since 'source key'
+ * is not present in the packet itself */
+ if (maniptype != IP_NAT_MANIP_DST)
+ return 1;
+ switch (greh->version) {
+ case 0:
+ if (!greh->key) {
+ DEBUGP("can't nat GRE w/o key\n");
+ break;
+ }
+ if (greh->csum) {
+ /* FIXME: Never tested this code... */
+ nf_proto_csum_replace4(gre_csum(greh), *pskb,
+ *(gre_key(greh)),
+ tuple->dst.u.gre.key, 0);
+ }
+ *(gre_key(greh)) = tuple->dst.u.gre.key;
+ break;
+ case GRE_VERSION_PPTP:
+ DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
+ break;
+ default:
+ DEBUGP("can't nat unknown GRE version\n");
+ return 0;
+ }
+ return 1;
+}
+
+static struct nf_nat_protocol gre __read_mostly = {
+ .name = "GRE",
+ .protonum = IPPROTO_GRE,
+ .manip_pkt = gre_manip_pkt,
+ .in_range = gre_in_range,
+ .unique_tuple = gre_unique_tuple,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+ .range_to_nfattr = nf_nat_port_range_to_nfattr,
+ .nfattr_to_range = nf_nat_port_nfattr_to_range,
+#endif
+};
+
+int __init nf_nat_proto_gre_init(void)
+{
+ return nf_nat_protocol_register(&gre);
+}
+
+void __exit nf_nat_proto_gre_fini(void)
+{
+ nf_nat_protocol_unregister(&gre);
+}
+
+module_init(nf_nat_proto_gre_init);
+module_exit(nf_nat_proto_gre_fini);
+
+void nf_nat_need_gre(void)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(nf_nat_need_gre);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
new file mode 100644
index 00000000000..dcfd772972d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -0,0 +1,86 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static int
+icmp_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static int
+icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xFFFF;
+
+ for (i = 0; i < range_size; i++, id++) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
+ (id % range_size));
+ if (!nf_nat_used_tuple(tuple, ct))
+ return 1;
+ }
+ return 0;
+}
+
+static int
+icmp_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct icmphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+
+ if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+ return 0;
+
+ hdr = (struct icmphdr *)((*pskb)->data + hdroff);
+ nf_proto_csum_replace2(&hdr->checksum, *pskb,
+ hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+ hdr->un.echo.id = tuple->src.u.icmp.id;
+ return 1;
+}
+
+struct nf_nat_protocol nf_nat_protocol_icmp = {
+ .name = "ICMP",
+ .protonum = IPPROTO_ICMP,
+ .me = THIS_MODULE,
+ .manip_pkt = icmp_manip_pkt,
+ .in_range = icmp_in_range,
+ .unique_tuple = icmp_unique_tuple,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+ .range_to_nfattr = nf_nat_port_range_to_nfattr,
+ .nfattr_to_range = nf_nat_port_nfattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
new file mode 100644
index 00000000000..7e26a7e9bee
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -0,0 +1,148 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+
+static int
+tcp_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ __be16 port;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ port = tuple->src.u.tcp.port;
+ else
+ port = tuple->dst.u.tcp.port;
+
+ return ntohs(port) >= ntohs(min->tcp.port) &&
+ ntohs(port) <= ntohs(max->tcp.port);
+}
+
+static int
+tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t port;
+ __be16 *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ portptr = &tuple->src.u.tcp.port;
+ else
+ portptr = &tuple->dst.u.tcp.port;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == IP_NAT_MANIP_DST)
+ return 0;
+
+ /* Map privileged onto privileged. */
+ if (ntohs(*portptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*portptr)<512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min.tcp.port);
+ range_size = ntohs(range->max.tcp.port) - min + 1;
+ }
+
+ for (i = 0; i < range_size; i++, port++) {
+ *portptr = htons(min + port % range_size);
+ if (!nf_nat_used_tuple(tuple, ct))
+ return 1;
+ }
+ return 0;
+}
+
+static int
+tcp_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct tcphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be16 *portptr, newport, oldport;
+ int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+ /* this could be a inner header returned in icmp packet; in such
+ cases we cannot update the checksum field since it is outside of
+ the 8 bytes of transport layer headers we are guaranteed */
+ if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+ hdrsize = sizeof(struct tcphdr);
+
+ if (!skb_make_writable(pskb, hdroff + hdrsize))
+ return 0;
+
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.tcp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.tcp.port;
+ portptr = &hdr->dest;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return 1;
+
+ nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
+ return 1;
+}
+
+struct nf_nat_protocol nf_nat_protocol_tcp = {
+ .name = "TCP",
+ .protonum = IPPROTO_TCP,
+ .me = THIS_MODULE,
+ .manip_pkt = tcp_manip_pkt,
+ .in_range = tcp_in_range,
+ .unique_tuple = tcp_unique_tuple,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+ .range_to_nfattr = nf_nat_port_range_to_nfattr,
+ .nfattr_to_range = nf_nat_port_nfattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
new file mode 100644
index 00000000000..ab0ce4c8699
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -0,0 +1,138 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static int
+udp_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ __be16 port;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ port = tuple->src.u.udp.port;
+ else
+ port = tuple->dst.u.udp.port;
+
+ return ntohs(port) >= ntohs(min->udp.port) &&
+ ntohs(port) <= ntohs(max->udp.port);
+}
+
+static int
+udp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t port;
+ __be16 *portptr;
+ unsigned int range_size, min, i;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ portptr = &tuple->src.u.udp.port;
+ else
+ portptr = &tuple->dst.u.udp.port;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == IP_NAT_MANIP_DST)
+ return 0;
+
+ if (ntohs(*portptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*portptr)<512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min.udp.port);
+ range_size = ntohs(range->max.udp.port) - min + 1;
+ }
+
+ for (i = 0; i < range_size; i++, port++) {
+ *portptr = htons(min + port % range_size);
+ if (!nf_nat_used_tuple(tuple, ct))
+ return 1;
+ }
+ return 0;
+}
+
+static int
+udp_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct udphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be16 *portptr, newport;
+
+ if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+ return 0;
+
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct udphdr *)((*pskb)->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.udp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+ if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
+ nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport,
+ 0);
+ if (!hdr->check)
+ hdr->check = CSUM_MANGLED_0;
+ }
+ *portptr = newport;
+ return 1;
+}
+
+struct nf_nat_protocol nf_nat_protocol_udp = {
+ .name = "UDP",
+ .protonum = IPPROTO_UDP,
+ .me = THIS_MODULE,
+ .manip_pkt = udp_manip_pkt,
+ .in_range = udp_in_range,
+ .unique_tuple = udp_unique_tuple,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+ .range_to_nfattr = nf_nat_port_range_to_nfattr,
+ .nfattr_to_range = nf_nat_port_nfattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
new file mode 100644
index 00000000000..f50d0203f9c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -0,0 +1,54 @@
+/* The "unknown" protocol. This is what is used for protocols we
+ * don't understand. It's returned by ip_ct_find_proto().
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static int unknown_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type manip_type,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return 1;
+}
+
+static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ /* Sorry: we can't help you; if it's not unique, we can't frob
+ anything. */
+ return 0;
+}
+
+static int
+unknown_manip_pkt(struct sk_buff **pskb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ return 1;
+}
+
+struct nf_nat_protocol nf_nat_unknown_protocol = {
+ .name = "unknown",
+ /* .me isn't set: getting a ref to this cannot fail. */
+ .manip_pkt = unknown_manip_pkt,
+ .in_range = unknown_in_range,
+ .unique_tuple = unknown_unique_tuple,
+};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
new file mode 100644
index 00000000000..b868ee0195d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -0,0 +1,343 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Everything about the rules for NAT. */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/bitops.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} nat_initial_table __initdata = {
+ .repl = {
+ .name = "nat",
+ .valid_hooks = NAT_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_IP_PRE_ROUTING] = 0,
+ [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
+ [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+ .underflow = {
+ [NF_IP_PRE_ROUTING] = 0,
+ [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
+ [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+ },
+ .entries = {
+ /* PRE_ROUTING */
+ {
+ .entry = {
+ .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_standard),
+ },
+ .target = {
+ .target = {
+ .u = {
+ .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
+ },
+ },
+ .verdict = -NF_ACCEPT - 1,
+ },
+ },
+ /* POST_ROUTING */
+ {
+ .entry = {
+ .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_standard),
+ },
+ .target = {
+ .target = {
+ .u = {
+ .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
+ },
+ },
+ .verdict = -NF_ACCEPT - 1,
+ },
+ },
+ /* LOCAL_OUT */
+ {
+ .entry = {
+ .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_standard),
+ },
+ .target = {
+ .target = {
+ .u = {
+ .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
+ },
+ },
+ .verdict = -NF_ACCEPT - 1,
+ },
+ },
+ },
+ /* ERROR */
+ .term = {
+ .entry = {
+ .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_error),
+ },
+ .target = {
+ .target = {
+ .u = {
+ .user = {
+ .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)),
+ .name = IPT_ERROR_TARGET,
+ },
+ },
+ },
+ .errorname = "ERROR",
+ },
+ }
+};
+
+static struct ipt_table nat_table = {
+ .name = "nat",
+ .valid_hooks = NAT_VALID_HOOKS,
+ .lock = RW_LOCK_UNLOCKED,
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+/* Source NAT */
+static unsigned int ipt_snat_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const struct xt_target *target,
+ const void *targinfo)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
+
+ NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
+
+ ct = nf_ct_get(*pskb, &ctinfo);
+
+ /* Connection must be valid and new. */
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ NF_CT_ASSERT(out);
+
+ return nf_nat_setup_info(ct, &mr->range[0], hooknum);
+}
+
+/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
+static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+{
+ static int warned = 0;
+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
+ struct rtable *rt;
+
+ if (ip_route_output_key(&rt, &fl) != 0)
+ return;
+
+ if (rt->rt_src != srcip && !warned) {
+ printk("NAT: no longer support implicit source local NAT\n");
+ printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
+ NIPQUAD(srcip), NIPQUAD(dstip));
+ warned = 1;
+ }
+ ip_rt_put(rt);
+}
+
+static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const struct xt_target *target,
+ const void *targinfo)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
+
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+ hooknum == NF_IP_LOCAL_OUT);
+
+ ct = nf_ct_get(*pskb, &ctinfo);
+
+ /* Connection must be valid and new. */
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ if (hooknum == NF_IP_LOCAL_OUT &&
+ mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+ warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+ mr->range[0].min_ip);
+
+ return nf_nat_setup_info(ct, &mr->range[0], hooknum);
+}
+
+static int ipt_snat_checkentry(const char *tablename,
+ const void *entry,
+ const struct xt_target *target,
+ void *targinfo,
+ unsigned int hook_mask)
+{
+ struct nf_nat_multi_range_compat *mr = targinfo;
+
+ /* Must be a valid range */
+ if (mr->rangesize != 1) {
+ printk("SNAT: multiple ranges no longer supported\n");
+ return 0;
+ }
+ return 1;
+}
+
+static int ipt_dnat_checkentry(const char *tablename,
+ const void *entry,
+ const struct xt_target *target,
+ void *targinfo,
+ unsigned int hook_mask)
+{
+ struct nf_nat_multi_range_compat *mr = targinfo;
+
+ /* Must be a valid range */
+ if (mr->rangesize != 1) {
+ printk("DNAT: multiple ranges no longer supported\n");
+ return 0;
+ }
+ return 1;
+}
+
+inline unsigned int
+alloc_null_binding(struct nf_conn *ct,
+ struct nf_nat_info *info,
+ unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ Use reply in case it's already been mangled (eg local packet).
+ */
+ __be32 ip
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
+ : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+ struct nf_nat_range range
+ = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
+
+ DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
+ ct, NIPQUAD(ip));
+ return nf_nat_setup_info(ct, &range, hooknum);
+}
+
+unsigned int
+alloc_null_binding_confirmed(struct nf_conn *ct,
+ struct nf_nat_info *info,
+ unsigned int hooknum)
+{
+ __be32 ip
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
+ : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+ u_int16_t all
+ = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
+ : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
+ struct nf_nat_range range
+ = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
+
+ DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
+ ct, NIPQUAD(ip));
+ return nf_nat_setup_info(ct, &range, hooknum);
+}
+
+int nf_nat_rule_find(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct,
+ struct nf_nat_info *info)
+{
+ int ret;
+
+ ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
+
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ /* NUL mapping */
+ ret = alloc_null_binding(ct, info, hooknum);
+ }
+ return ret;
+}
+
+static struct ipt_target ipt_snat_reg = {
+ .name = "SNAT",
+ .target = ipt_snat_target,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = 1 << NF_IP_POST_ROUTING,
+ .checkentry = ipt_snat_checkentry,
+ .family = AF_INET,
+};
+
+static struct xt_target ipt_dnat_reg = {
+ .name = "DNAT",
+ .target = ipt_dnat_target,
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .table = "nat",
+ .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
+ .checkentry = ipt_dnat_checkentry,
+ .family = AF_INET,
+};
+
+int __init nf_nat_rule_init(void)
+{
+ int ret;
+
+ ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
+ if (ret != 0)
+ return ret;
+ ret = xt_register_target(&ipt_snat_reg);
+ if (ret != 0)
+ goto unregister_table;
+
+ ret = xt_register_target(&ipt_dnat_reg);
+ if (ret != 0)
+ goto unregister_snat;
+
+ return ret;
+
+ unregister_snat:
+ xt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+ ipt_unregister_table(&nat_table);
+
+ return ret;
+}
+
+void nf_nat_rule_cleanup(void)
+{
+ xt_unregister_target(&ipt_dnat_reg);
+ xt_unregister_target(&ipt_snat_reg);
+ ipt_unregister_table(&nat_table);
+}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
new file mode 100644
index 00000000000..3d524b95731
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -0,0 +1,283 @@
+/* SIP extension for UDP NAT alteration.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_nat_ftp.c and other modules.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP NAT helper");
+MODULE_ALIAS("ip_nat_sip");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+struct addr_map {
+ struct {
+ char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned int srclen, srciplen;
+ unsigned int dstlen, dstiplen;
+ } addr[IP_CT_DIR_MAX];
+};
+
+static void addr_map_init(struct nf_conn *ct, struct addr_map *map)
+{
+ struct nf_conntrack_tuple *t;
+ enum ip_conntrack_dir dir;
+ unsigned int n;
+
+ for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
+ t = &ct->tuplehash[dir].tuple;
+
+ n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
+ NIPQUAD(t->src.u3.ip));
+ map->addr[dir].srciplen = n;
+ n += sprintf(map->addr[dir].src + n, ":%u",
+ ntohs(t->src.u.udp.port));
+ map->addr[dir].srclen = n;
+
+ n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
+ NIPQUAD(t->dst.u3.ip));
+ map->addr[dir].dstiplen = n;
+ n += sprintf(map->addr[dir].dst + n, ":%u",
+ ntohs(t->dst.u.udp.port));
+ map->addr[dir].dstlen = n;
+ }
+}
+
+static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
+ struct nf_conn *ct, const char **dptr, size_t dlen,
+ enum sip_header_pos pos, struct addr_map *map)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned int matchlen, matchoff, addrlen;
+ char *addr;
+
+ if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
+ return 1;
+
+ if ((matchlen == map->addr[dir].srciplen ||
+ matchlen == map->addr[dir].srclen) &&
+ memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
+ addr = map->addr[!dir].dst;
+ addrlen = map->addr[!dir].dstlen;
+ } else if ((matchlen == map->addr[dir].dstiplen ||
+ matchlen == map->addr[dir].dstlen) &&
+ memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
+ addr = map->addr[!dir].src;
+ addrlen = map->addr[!dir].srclen;
+ } else
+ return 1;
+
+ if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ matchoff, matchlen, addr, addrlen))
+ return 0;
+ *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ return 1;
+
+}
+
+static unsigned int ip_nat_sip(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conn *ct,
+ const char **dptr)
+{
+ enum sip_header_pos pos;
+ struct addr_map map;
+ int dataoff, datalen;
+
+ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ datalen = (*pskb)->len - dataoff;
+ if (datalen < sizeof("SIP/2.0") - 1)
+ return NF_DROP;
+
+ addr_map_init(ct, &map);
+
+ /* Basic rules: requests and responses. */
+ if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
+ /* 10.2: Constructing the REGISTER Request:
+ *
+ * The "userinfo" and "@" components of the SIP URI MUST NOT
+ * be present.
+ */
+ if (datalen >= sizeof("REGISTER") - 1 &&
+ strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
+ pos = POS_REG_REQ_URI;
+ else
+ pos = POS_REQ_URI;
+
+ if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
+ return NF_DROP;
+ }
+
+ if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
+ !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
+ !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
+ !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static unsigned int mangle_sip_packet(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conn *ct,
+ const char **dptr, size_t dlen,
+ char *buffer, int bufflen,
+ enum sip_header_pos pos)
+{
+ unsigned int matchlen, matchoff;
+
+ if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
+ return 0;
+
+ if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ matchoff, matchlen, buffer, bufflen))
+ return 0;
+
+ /* We need to reload this. Thanks Patrick. */
+ *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ return 1;
+}
+
+static int mangle_content_len(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conn *ct,
+ const char *dptr)
+{
+ unsigned int dataoff, matchoff, matchlen;
+ char buffer[sizeof("65536")];
+ int bufflen;
+
+ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+
+ /* Get actual SDP lenght */
+ if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+ &matchlen, POS_SDP_HEADER) > 0) {
+
+ /* since ct_sip_get_info() give us a pointer passing 'v='
+ we need to add 2 bytes in this count. */
+ int c_len = (*pskb)->len - dataoff - matchoff + 2;
+
+ /* Now, update SDP length */
+ if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+ &matchlen, POS_CONTENT) > 0) {
+
+ bufflen = sprintf(buffer, "%u", c_len);
+ return nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ matchoff, matchlen,
+ buffer, bufflen);
+ }
+ }
+ return 0;
+}
+
+static unsigned int mangle_sdp(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conn *ct,
+ __be32 newip, u_int16_t port,
+ const char *dptr)
+{
+ char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ unsigned int dataoff, bufflen;
+
+ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+
+ /* Mangle owner and contact info. */
+ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
+ if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ buffer, bufflen, POS_OWNER_IP4))
+ return 0;
+
+ if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ buffer, bufflen, POS_CONNECTION_IP4))
+ return 0;
+
+ /* Mangle media port. */
+ bufflen = sprintf(buffer, "%u", port);
+ if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ buffer, bufflen, POS_MEDIA))
+ return 0;
+
+ return mangle_content_len(pskb, ctinfo, ct, dptr);
+}
+
+/* So, this packet has hit the connection tracking matching code.
+ Mangle it, and change the expectation to match the new version. */
+static unsigned int ip_nat_sdp(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_expect *exp,
+ const char *dptr)
+{
+ struct nf_conn *ct = exp->master;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ __be32 newip;
+ u_int16_t port;
+
+ DEBUGP("ip_nat_sdp():\n");
+
+ /* Connection will come from reply */
+ newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+
+ exp->tuple.dst.u3.ip = newip;
+ exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
+ exp->dir = !dir;
+
+ /* When you see the packet, we need to NAT it the same as the
+ this one. */
+ exp->expectfn = nf_nat_follow_master;
+
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
+ exp->tuple.dst.u.udp.port = htons(port);
+ if (nf_conntrack_expect_related(exp) == 0)
+ break;
+ }
+
+ if (port == 0)
+ return NF_DROP;
+
+ if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
+ nf_conntrack_unexpect_related(exp);
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+static void __exit nf_nat_sip_fini(void)
+{
+ rcu_assign_pointer(nf_nat_sip_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_sip_init(void)
+{
+ BUG_ON(rcu_dereference(nf_nat_sip_hook));
+ BUG_ON(rcu_dereference(nf_nat_sdp_hook));
+ rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
+ rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp);
+ return 0;
+}
+
+module_init(nf_nat_sip_init);
+module_exit(nf_nat_sip_fini);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
new file mode 100644
index 00000000000..f12528fe1bf
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -0,0 +1,1332 @@
+/*
+ * nf_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified. The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Updates:
+ * 2000-08-06: Convert to new helper API (Harald Welte).
+ *
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+MODULE_ALIAS("ip_nat_snmp_basic");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+#define NOCT1(n) (*(u8 *)n)
+
+static int debug;
+static DEFINE_SPINLOCK(snmp_lock);
+
+/*
+ * Application layer address mapping mimics the NAT mapping, but
+ * only for the first octet in this case (a more flexible system
+ * can be implemented if needed).
+ */
+struct oct1_map
+{
+ u_int8_t from;
+ u_int8_t to;
+};
+
+
+/*****************************************************************************
+ *
+ * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* Class */
+#define ASN1_UNI 0 /* Universal */
+#define ASN1_APL 1 /* Application */
+#define ASN1_CTX 2 /* Context */
+#define ASN1_PRV 3 /* Private */
+
+/* Tag */
+#define ASN1_EOC 0 /* End Of Contents */
+#define ASN1_BOL 1 /* Boolean */
+#define ASN1_INT 2 /* Integer */
+#define ASN1_BTS 3 /* Bit String */
+#define ASN1_OTS 4 /* Octet String */
+#define ASN1_NUL 5 /* Null */
+#define ASN1_OJI 6 /* Object Identifier */
+#define ASN1_OJD 7 /* Object Description */
+#define ASN1_EXT 8 /* External */
+#define ASN1_SEQ 16 /* Sequence */
+#define ASN1_SET 17 /* Set */
+#define ASN1_NUMSTR 18 /* Numerical String */
+#define ASN1_PRNSTR 19 /* Printable String */
+#define ASN1_TEXSTR 20 /* Teletext String */
+#define ASN1_VIDSTR 21 /* Video String */
+#define ASN1_IA5STR 22 /* IA5 String */
+#define ASN1_UNITIM 23 /* Universal Time */
+#define ASN1_GENTIM 24 /* General Time */
+#define ASN1_GRASTR 25 /* Graphical String */
+#define ASN1_VISSTR 26 /* Visible String */
+#define ASN1_GENSTR 27 /* General String */
+
+/* Primitive / Constructed methods*/
+#define ASN1_PRI 0 /* Primitive */
+#define ASN1_CON 1 /* Constructed */
+
+/*
+ * Error codes.
+ */
+#define ASN1_ERR_NOERROR 0
+#define ASN1_ERR_DEC_EMPTY 2
+#define ASN1_ERR_DEC_EOC_MISMATCH 3
+#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
+#define ASN1_ERR_DEC_BADVALUE 5
+
+/*
+ * ASN.1 context.
+ */
+struct asn1_ctx
+{
+ int error; /* Error condition */
+ unsigned char *pointer; /* Octet just to be decoded */
+ unsigned char *begin; /* First octet */
+ unsigned char *end; /* Octet after last octet */
+};
+
+/*
+ * Octet string (not null terminated)
+ */
+struct asn1_octstr
+{
+ unsigned char *data;
+ unsigned int len;
+};
+
+static void asn1_open(struct asn1_ctx *ctx,
+ unsigned char *buf,
+ unsigned int len)
+{
+ ctx->begin = buf;
+ ctx->end = buf + len;
+ ctx->pointer = buf;
+ ctx->error = ASN1_ERR_NOERROR;
+}
+
+static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
+{
+ if (ctx->pointer >= ctx->end) {
+ ctx->error = ASN1_ERR_DEC_EMPTY;
+ return 0;
+ }
+ *ch = *(ctx->pointer)++;
+ return 1;
+}
+
+static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
+{
+ unsigned char ch;
+
+ *tag = 0;
+
+ do
+ {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *tag <<= 7;
+ *tag |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned char ch;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *cls = (ch & 0xC0) >> 6;
+ *con = (ch & 0x20) >> 5;
+ *tag = (ch & 0x1F);
+
+ if (*tag == 0x1F) {
+ if (!asn1_tag_decode(ctx, tag))
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
+ unsigned int *def,
+ unsigned int *len)
+{
+ unsigned char ch, cnt;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch == 0x80)
+ *def = 0;
+ else {
+ *def = 1;
+
+ if (ch < 0x80)
+ *len = ch;
+ else {
+ cnt = (unsigned char) (ch & 0x7F);
+ *len = 0;
+
+ while (cnt > 0) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *len <<= 8;
+ *len |= ch;
+ cnt--;
+ }
+ }
+ }
+ return 1;
+}
+
+static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
+ unsigned char **eoc,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned int def, len;
+
+ if (!asn1_id_decode(ctx, cls, con, tag))
+ return 0;
+
+ def = len = 0;
+ if (!asn1_length_decode(ctx, &def, &len))
+ return 0;
+
+ if (def)
+ *eoc = ctx->pointer + len;
+ else
+ *eoc = NULL;
+ return 1;
+}
+
+static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ unsigned char ch;
+
+ if (eoc == 0) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+ return 1;
+ } else {
+ if (ctx->pointer != eoc) {
+ ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
+ return 0;
+ }
+ return 1;
+ }
+}
+
+static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ ctx->pointer = eoc;
+ return 1;
+}
+
+static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = (signed char) ch;
+ len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned int *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned int)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned char **octets,
+ unsigned int *len)
+{
+ unsigned char *ptr;
+
+ *len = 0;
+
+ *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
+ if (*octets == NULL) {
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+
+ ptr = *octets;
+ while (ctx->pointer < eoc) {
+ if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+ kfree(*octets);
+ *octets = NULL;
+ return 0;
+ }
+ (*len)++;
+ }
+ return 1;
+}
+
+static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
+ unsigned long *subid)
+{
+ unsigned char ch;
+
+ *subid = 0;
+
+ do {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *subid <<= 7;
+ *subid |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long **oid,
+ unsigned int *len)
+{
+ unsigned long subid;
+ unsigned int size;
+ unsigned long *optr;
+
+ size = eoc - ctx->pointer + 1;
+ *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+ if (*oid == NULL) {
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+
+ optr = *oid;
+
+ if (!asn1_subid_decode(ctx, &subid)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (subid < 40) {
+ optr [0] = 0;
+ optr [1] = subid;
+ } else if (subid < 80) {
+ optr [0] = 1;
+ optr [1] = subid - 40;
+ } else {
+ optr [0] = 2;
+ optr [1] = subid - 80;
+ }
+
+ *len = 2;
+ optr += 2;
+
+ while (ctx->pointer < eoc) {
+ if (++(*len) > size) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (!asn1_subid_decode(ctx, optr++)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * SNMP decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* SNMP Versions */
+#define SNMP_V1 0
+#define SNMP_V2C 1
+#define SNMP_V2 2
+#define SNMP_V3 3
+
+/* Default Sizes */
+#define SNMP_SIZE_COMM 256
+#define SNMP_SIZE_OBJECTID 128
+#define SNMP_SIZE_BUFCHR 256
+#define SNMP_SIZE_BUFINT 128
+#define SNMP_SIZE_SMALLOBJECTID 16
+
+/* Requests */
+#define SNMP_PDU_GET 0
+#define SNMP_PDU_NEXT 1
+#define SNMP_PDU_RESPONSE 2
+#define SNMP_PDU_SET 3
+#define SNMP_PDU_TRAP1 4
+#define SNMP_PDU_BULK 5
+#define SNMP_PDU_INFORM 6
+#define SNMP_PDU_TRAP2 7
+
+/* Errors */
+#define SNMP_NOERROR 0
+#define SNMP_TOOBIG 1
+#define SNMP_NOSUCHNAME 2
+#define SNMP_BADVALUE 3
+#define SNMP_READONLY 4
+#define SNMP_GENERROR 5
+#define SNMP_NOACCESS 6
+#define SNMP_WRONGTYPE 7
+#define SNMP_WRONGLENGTH 8
+#define SNMP_WRONGENCODING 9
+#define SNMP_WRONGVALUE 10
+#define SNMP_NOCREATION 11
+#define SNMP_INCONSISTENTVALUE 12
+#define SNMP_RESOURCEUNAVAILABLE 13
+#define SNMP_COMMITFAILED 14
+#define SNMP_UNDOFAILED 15
+#define SNMP_AUTHORIZATIONERROR 16
+#define SNMP_NOTWRITABLE 17
+#define SNMP_INCONSISTENTNAME 18
+
+/* General SNMP V1 Traps */
+#define SNMP_TRAP_COLDSTART 0
+#define SNMP_TRAP_WARMSTART 1
+#define SNMP_TRAP_LINKDOWN 2
+#define SNMP_TRAP_LINKUP 3
+#define SNMP_TRAP_AUTFAILURE 4
+#define SNMP_TRAP_EQPNEIGHBORLOSS 5
+#define SNMP_TRAP_ENTSPECIFIC 6
+
+/* SNMPv1 Types */
+#define SNMP_NULL 0
+#define SNMP_INTEGER 1 /* l */
+#define SNMP_OCTETSTR 2 /* c */
+#define SNMP_DISPLAYSTR 2 /* c */
+#define SNMP_OBJECTID 3 /* ul */
+#define SNMP_IPADDR 4 /* uc */
+#define SNMP_COUNTER 5 /* ul */
+#define SNMP_GAUGE 6 /* ul */
+#define SNMP_TIMETICKS 7 /* ul */
+#define SNMP_OPAQUE 8 /* c */
+
+/* Additional SNMPv2 Types */
+#define SNMP_UINTEGER 5 /* ul */
+#define SNMP_BITSTR 9 /* uc */
+#define SNMP_NSAP 10 /* uc */
+#define SNMP_COUNTER64 11 /* ul */
+#define SNMP_NOSUCHOBJECT 12
+#define SNMP_NOSUCHINSTANCE 13
+#define SNMP_ENDOFMIBVIEW 14
+
+union snmp_syntax
+{
+ unsigned char uc[0]; /* 8 bit unsigned */
+ char c[0]; /* 8 bit signed */
+ unsigned long ul[0]; /* 32 bit unsigned */
+ long l[0]; /* 32 bit signed */
+};
+
+struct snmp_object
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned short type;
+ unsigned int syntax_len;
+ union snmp_syntax syntax;
+};
+
+struct snmp_request
+{
+ unsigned long id;
+ unsigned int error_status;
+ unsigned int error_index;
+};
+
+struct snmp_v1_trap
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned long ip_address; /* pointer */
+ unsigned int general;
+ unsigned int specific;
+ unsigned long time;
+};
+
+/* SNMP types */
+#define SNMP_IPA 0
+#define SNMP_CNT 1
+#define SNMP_GGE 2
+#define SNMP_TIT 3
+#define SNMP_OPQ 4
+#define SNMP_C64 6
+
+/* SNMP errors */
+#define SERR_NSO 0
+#define SERR_NSI 1
+#define SERR_EOM 2
+
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check);
+struct snmp_cnv
+{
+ unsigned int class;
+ unsigned int tag;
+ int syntax;
+};
+
+static struct snmp_cnv snmp_conv [] =
+{
+ {ASN1_UNI, ASN1_NUL, SNMP_NULL},
+ {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
+ {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
+ {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
+ {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
+ {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
+ {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
+ {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
+ {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
+ {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
+
+ /* SNMPv2 data types and errors */
+ {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
+ {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
+ {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
+ {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
+ {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
+ {0, 0, -1}
+};
+
+static unsigned char snmp_tag_cls2syntax(unsigned int tag,
+ unsigned int cls,
+ unsigned short *syntax)
+{
+ struct snmp_cnv *cnv;
+
+ cnv = snmp_conv;
+
+ while (cnv->syntax != -1) {
+ if (cnv->tag == tag && cnv->class == cls) {
+ *syntax = cnv->syntax;
+ return 1;
+ }
+ cnv++;
+ }
+ return 0;
+}
+
+static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
+ struct snmp_object **obj)
+{
+ unsigned int cls, con, tag, len, idlen;
+ unsigned short type;
+ unsigned char *eoc, *end, *p;
+ unsigned long *lp, *id;
+ unsigned long ul;
+ long l;
+
+ *obj = NULL;
+ id = NULL;
+
+ if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &id, &idlen))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
+ kfree(id);
+ return 0;
+ }
+
+ if (con != ASN1_PRI) {
+ kfree(id);
+ return 0;
+ }
+
+ type = 0;
+ if (!snmp_tag_cls2syntax(tag, cls, &type)) {
+ kfree(id);
+ return 0;
+ }
+
+ l = 0;
+ switch (type) {
+ case SNMP_INTEGER:
+ len = sizeof(long);
+ if (!asn1_long_decode(ctx, end, &l)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len,
+ GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ (*obj)->syntax.l[0] = l;
+ break;
+ case SNMP_OCTETSTR:
+ case SNMP_OPAQUE:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len,
+ GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.c, p, len);
+ kfree(p);
+ break;
+ case SNMP_NULL:
+ case SNMP_NOSUCHOBJECT:
+ case SNMP_NOSUCHINSTANCE:
+ case SNMP_ENDOFMIBVIEW:
+ len = 0;
+ *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ if (!asn1_null_decode(ctx, end)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ break;
+ case SNMP_OBJECTID:
+ if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
+ kfree(id);
+ return 0;
+ }
+ len *= sizeof(unsigned long);
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(lp);
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.ul, lp, len);
+ kfree(lp);
+ break;
+ case SNMP_IPADDR:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ if (len != 4) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ memcpy((*obj)->syntax.uc, p, len);
+ kfree(p);
+ break;
+ case SNMP_COUNTER:
+ case SNMP_GAUGE:
+ case SNMP_TIMETICKS:
+ len = sizeof(unsigned long);
+ if (!asn1_ulong_decode(ctx, end, &ul)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ if (net_ratelimit())
+ printk("OOM in bsalg (%d)\n", __LINE__);
+ return 0;
+ }
+ (*obj)->syntax.ul[0] = ul;
+ break;
+ default:
+ kfree(id);
+ return 0;
+ }
+
+ (*obj)->syntax_len = len;
+ (*obj)->type = type;
+ (*obj)->id = id;
+ (*obj)->id_len = idlen;
+
+ if (!asn1_eoc_decode(ctx, eoc)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
+ struct snmp_request *request)
+{
+ unsigned int cls, con, tag;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_ulong_decode(ctx, end, &request->id))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_status))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_index))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Fast checksum update for possibly oddly-aligned UDP byte, from the
+ * code example in the draft.
+ */
+static void fast_csum(__sum16 *csum,
+ const unsigned char *optr,
+ const unsigned char *nptr,
+ int offset)
+{
+ unsigned char s[4];
+
+ if (offset & 1) {
+ s[0] = s[2] = 0;
+ s[1] = ~*optr;
+ s[3] = *nptr;
+ } else {
+ s[1] = s[3] = 0;
+ s[0] = ~*optr;
+ s[2] = *nptr;
+ }
+
+ *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
+}
+
+/*
+ * Mangle IP address.
+ * - begin points to the start of the snmp messgae
+ * - addr points to the start of the address
+ */
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ if (map->from == NOCT1(addr)) {
+ u_int32_t old;
+
+ if (debug)
+ memcpy(&old, (unsigned char *)addr, sizeof(old));
+
+ *addr = map->to;
+
+ /* Update UDP checksum if being used */
+ if (*check) {
+ fast_csum(check,
+ &map->from, &map->to, addr - begin);
+
+ }
+
+ if (debug)
+ printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
+ "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
+ }
+}
+
+static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
+ struct snmp_v1_trap *trap,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ unsigned int cls, con, tag, len;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_id_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
+ goto err_id_free;
+
+ if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
+ goto err_id_free;
+
+ /* IPv4 only */
+ if (len != 4)
+ goto err_addr_free;
+
+ mangle_address(ctx->begin, ctx->pointer - 4, map, check);
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->general))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->specific))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
+ goto err_addr_free;
+
+ if (!asn1_ulong_decode(ctx, end, &trap->time))
+ goto err_addr_free;
+
+ return 1;
+
+err_addr_free:
+ kfree((unsigned long *)trap->ip_address);
+
+err_id_free:
+ kfree(trap->id);
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Misc. routines
+ *
+ *****************************************************************************/
+
+static void hex_dump(unsigned char *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (i && !(i % 16))
+ printk("\n");
+ printk("%02x ", *(buf + i));
+ }
+ printk("\n");
+}
+
+/*
+ * Parse and mangle SNMP message according to mapping.
+ * (And this is the fucking 'basic' method).
+ */
+static int snmp_parse_mangle(unsigned char *msg,
+ u_int16_t len,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ unsigned char *eoc, *end;
+ unsigned int cls, con, tag, vers, pdutype;
+ struct asn1_ctx ctx;
+ struct asn1_octstr comm;
+ struct snmp_object **obj;
+
+ if (debug > 1)
+ hex_dump(msg, len);
+
+ asn1_open(&ctx, msg, len);
+
+ /*
+ * Start of SNMP message.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ /*
+ * Version 1 or 2 handled.
+ */
+ if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+ if (!asn1_uint_decode (&ctx, end, &vers))
+ return 0;
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
+ if (vers > 1)
+ return 1;
+
+ /*
+ * Community.
+ */
+ if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
+ return 0;
+ if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
+ return 0;
+ if (debug > 1) {
+ unsigned int i;
+
+ printk(KERN_DEBUG "bsalg: community: ");
+ for (i = 0; i < comm.len; i++)
+ printk("%c", comm.data[i]);
+ printk("\n");
+ }
+ kfree(comm.data);
+
+ /*
+ * PDU type
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
+ return 0;
+ if (cls != ASN1_CTX || con != ASN1_CON)
+ return 0;
+ if (debug > 1) {
+ unsigned char *pdus[] = {
+ [SNMP_PDU_GET] = "get",
+ [SNMP_PDU_NEXT] = "get-next",
+ [SNMP_PDU_RESPONSE] = "response",
+ [SNMP_PDU_SET] = "set",
+ [SNMP_PDU_TRAP1] = "trapv1",
+ [SNMP_PDU_BULK] = "bulk",
+ [SNMP_PDU_INFORM] = "inform",
+ [SNMP_PDU_TRAP2] = "trapv2"
+ };
+
+ if (pdutype > SNMP_PDU_TRAP2)
+ printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
+ else
+ printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
+ }
+ if (pdutype != SNMP_PDU_RESPONSE &&
+ pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
+ return 1;
+
+ /*
+ * Request header or v1 trap
+ */
+ if (pdutype == SNMP_PDU_TRAP1) {
+ struct snmp_v1_trap trap;
+ unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
+
+ if (ret) {
+ kfree(trap.id);
+ kfree((unsigned long *)trap.ip_address);
+ } else
+ return ret;
+
+ } else {
+ struct snmp_request req;
+
+ if (!snmp_request_decode(&ctx, &req))
+ return 0;
+
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
+ "error_index=%u\n", req.id, req.error_status,
+ req.error_index);
+ }
+
+ /*
+ * Loop through objects, look for IP addresses to mangle.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (obj == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
+ return 0;
+ }
+
+ while (!asn1_eoc_decode(&ctx, eoc)) {
+ unsigned int i;
+
+ if (!snmp_object_decode(&ctx, obj)) {
+ if (*obj) {
+ kfree((*obj)->id);
+ kfree(*obj);
+ }
+ kfree(obj);
+ return 0;
+ }
+
+ if (debug > 1) {
+ printk(KERN_DEBUG "bsalg: object: ");
+ for (i = 0; i < (*obj)->id_len; i++) {
+ if (i > 0)
+ printk(".");
+ printk("%lu", (*obj)->id[i]);
+ }
+ printk(": type=%u\n", (*obj)->type);
+
+ }
+
+ if ((*obj)->type == SNMP_IPADDR)
+ mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+
+ kfree((*obj)->id);
+ kfree(*obj);
+ }
+ kfree(obj);
+
+ if (!asn1_eoc_decode(&ctx, eoc))
+ return 0;
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * NAT routines.
+ *
+ *****************************************************************************/
+
+/*
+ * SNMP translation routine.
+ */
+static int snmp_translate(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct sk_buff **pskb)
+{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+ u_int16_t udplen = ntohs(udph->len);
+ u_int16_t paylen = udplen - sizeof(struct udphdr);
+ int dir = CTINFO2DIR(ctinfo);
+ struct oct1_map map;
+
+ /*
+ * Determine mappping for application layer addresses based
+ * on NAT manipulations for the packet.
+ */
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ /* SNAT traps */
+ map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ } else {
+ /* DNAT replies */
+ map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ }
+
+ if (map.from == map.to)
+ return NF_ACCEPT;
+
+ if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
+ paylen, &map, &udph->check)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "bsalg: parser failed\n");
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted */
+static int help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
+ return NF_ACCEPT;
+
+ /*
+ * Make sure the packet length is ok. So far, we were only guaranteed
+ * to have a valid length IP header plus 8 bytes, which means we have
+ * enough room for a UDP header. Just verify the UDP length field so we
+ * can mess around with the payload.
+ */
+ if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "SNMP: dropping malformed packet "
+ "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+ return NF_DROP;
+ }
+
+ if (!skb_make_writable(pskb, (*pskb)->len))
+ return NF_DROP;
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, ctinfo, pskb);
+ spin_unlock_bh(&snmp_lock);
+ return ret;
+}
+
+static struct nf_conntrack_helper snmp_helper __read_mostly = {
+ .max_expected = 0,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "snmp",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+};
+
+static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+ .max_expected = 0,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "snmp_trap",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+};
+
+/*****************************************************************************
+ *
+ * Module stuff.
+ *
+ *****************************************************************************/
+
+static int __init nf_nat_snmp_basic_init(void)
+{
+ int ret = 0;
+
+ ret = nf_conntrack_helper_register(&snmp_helper);
+ if (ret < 0)
+ return ret;
+ ret = nf_conntrack_helper_register(&snmp_trap_helper);
+ if (ret < 0) {
+ nf_conntrack_helper_unregister(&snmp_helper);
+ return ret;
+ }
+ return ret;
+}
+
+static void __exit nf_nat_snmp_basic_fini(void)
+{
+ nf_conntrack_helper_unregister(&snmp_helper);
+ nf_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(nf_nat_snmp_basic_init);
+module_exit(nf_nat_snmp_basic_fini);
+
+module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
new file mode 100644
index 00000000000..730a7a44c88
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -0,0 +1,406 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/spinlock.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
+ : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
+ : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
+ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
+ : "*ERROR*")))
+
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+ struct nf_conn *ct;
+ struct nf_conntrack_tuple *t;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ unsigned long statusbit;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return;
+ dir = CTINFO2DIR(ctinfo);
+ t = &ct->tuplehash[dir].tuple;
+
+ if (dir == IP_CT_DIR_ORIGINAL)
+ statusbit = IPS_DST_NAT;
+ else
+ statusbit = IPS_SRC_NAT;
+
+ if (ct->status & statusbit) {
+ fl->fl4_dst = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP)
+ fl->fl_ip_dport = t->dst.u.tcp.port;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl->fl4_src = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP)
+ fl->fl_ip_sport = t->src.u.tcp.port;
+ }
+}
+#endif
+
+static unsigned int
+nf_nat_fn(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ struct nf_nat_info *info;
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ and local-out, and nf_nat_out protects post-routing. */
+ NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off
+ & htons(IP_MF|IP_OFFSET)));
+
+ ct = nf_ct_get(*pskb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ have dropped it. Hence it's the user's responsibilty to
+ packet filter it out, or implement conntrack/NAT for that
+ protocol. 8) --RR */
+ if (!ct) {
+ /* Exception: ICMP redirect to new connection (not in
+ hash table yet). We must not let this through, in
+ case we're doing NAT to the same network. */
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ struct icmphdr _hdr, *hp;
+
+ hp = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4,
+ sizeof(_hdr), &_hdr);
+ if (hp != NULL &&
+ hp->type == ICMP_REDIRECT)
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+ }
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (ct == &nf_conntrack_untracked)
+ return NF_ACCEPT;
+
+ nat = nfct_nat(ct);
+ if (!nat)
+ return NF_DROP;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED+IP_CT_IS_REPLY:
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(ct, ctinfo,
+ hooknum, pskb))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ info = &nat->info;
+
+ /* Seen it before? This can happen for loopback, retrans,
+ or local packets.. */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ if (unlikely(nf_ct_is_confirmed(ct)))
+ /* NAT module was loaded late */
+ ret = alloc_null_binding_confirmed(ct, info,
+ hooknum);
+ else if (hooknum == NF_IP_LOCAL_IN)
+ /* LOCAL_IN hook doesn't have a chain! */
+ ret = alloc_null_binding(ct, info, hooknum);
+ else
+ ret = nf_nat_rule_find(pskb, hooknum, in, out,
+ ct, info);
+
+ if (ret != NF_ACCEPT) {
+ return ret;
+ }
+ } else
+ DEBUGP("Already setup manip %s for ct %p\n",
+ maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+ info = &nat->info;
+ }
+
+ NF_CT_ASSERT(info);
+ return nf_nat_packet(ct, ctinfo, hooknum, pskb);
+}
+
+static unsigned int
+nf_nat_in(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ __be32 daddr = (*pskb)->nh.iph->daddr;
+
+ ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != (*pskb)->nh.iph->daddr) {
+ dst_release((*pskb)->dst);
+ (*pskb)->dst = NULL;
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_out(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr) ||
+ (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip
+ || ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all
+ )
+ return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_local_fn(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr) ||
+ (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip
+#ifdef CONFIG_XFRM
+ || ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all
+#endif
+ )
+ if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_adjust(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+ DEBUGP("nf_nat_standalone: adjusting sequence number\n");
+ if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+/* We must be after connection tracking and before packet filtering. */
+
+static struct nf_hook_ops nf_nat_ops[] = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* After conntrack, adjust sequence number */
+ {
+ .hook = nf_nat_adjust,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_local_fn,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_fn,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* After conntrack, adjust sequence number */
+ {
+ .hook = nf_nat_adjust,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
+ },
+};
+
+static int __init nf_nat_standalone_init(void)
+{
+ int size, ret = 0;
+
+ need_conntrack();
+
+ size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) +
+ sizeof(struct nf_conn_nat);
+ ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
+ return ret;
+ }
+
+ size = ALIGN(size, __alignof__(struct nf_conn_help)) +
+ sizeof(struct nf_conn_help);
+ ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP,
+ "nf_nat:help", size);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
+ goto cleanup_register_cache;
+ }
+#ifdef CONFIG_XFRM
+ BUG_ON(ip_nat_decode_session != NULL);
+ ip_nat_decode_session = nat_decode_session;
+#endif
+ ret = nf_nat_rule_init();
+ if (ret < 0) {
+ printk("nf_nat_init: can't setup rules.\n");
+ goto cleanup_decode_session;
+ }
+ ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
+ if (ret < 0) {
+ printk("nf_nat_init: can't register hooks.\n");
+ goto cleanup_rule_init;
+ }
+ nf_nat_module_is_loaded = 1;
+ return ret;
+
+ cleanup_rule_init:
+ nf_nat_rule_cleanup();
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+ ip_nat_decode_session = NULL;
+ synchronize_net();
+#endif
+ nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP);
+ cleanup_register_cache:
+ nf_conntrack_unregister_cache(NF_CT_F_NAT);
+ return ret;
+}
+
+static void __exit nf_nat_standalone_fini(void)
+{
+ nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
+ nf_nat_rule_cleanup();
+ nf_nat_module_is_loaded = 0;
+#ifdef CONFIG_XFRM
+ ip_nat_decode_session = NULL;
+ synchronize_net();
+#endif
+ /* Conntrack caches are unregistered in nf_conntrack_cleanup */
+}
+
+module_init(nf_nat_standalone_init);
+module_exit(nf_nat_standalone_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
new file mode 100644
index 00000000000..2566b79de22
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -0,0 +1,52 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/udp.h>
+
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_tftp");
+
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_conn *ct = exp->master;
+
+ exp->saved_proto.udp.port
+ = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+ exp->expectfn = nf_nat_follow_master;
+ if (nf_conntrack_expect_related(exp) != 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static void __exit nf_nat_tftp_fini(void)
+{
+ rcu_assign_pointer(nf_nat_tftp_hook, NULL);
+ synchronize_rcu();
+}
+
+static int __init nf_nat_tftp_init(void)
+{
+ BUG_ON(rcu_dereference(nf_nat_tftp_hook));
+ rcu_assign_pointer(nf_nat_tftp_hook, help);
+ return 0;
+}
+
+module_init(nf_nat_tftp_init);
+module_exit(nf_nat_tftp_fini);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9c6cbe3d9fb..cd873da54cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/udp.h>
+#include <net/udplite.h>
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -66,6 +67,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
+ seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
atomic_read(&ip_frag_mem));
@@ -304,6 +306,17 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
fold_field((void **) udp_statistics,
snmp4_udp_list[i].entry));
+ /* the UDP and UDP-Lite MIBs are the same */
+ seq_puts(seq, "\nUdpLite:");
+ for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ seq_printf(seq, " %s", snmp4_udp_list[i].name);
+
+ seq_puts(seq, "\nUdpLite:");
+ for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+ fold_field((void **) udplite_statistics,
+ snmp4_udp_list[i].entry) );
+
seq_putc(seq, '\n');
return 0;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b430cf2a4f6..a6c63bbd9dd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -329,7 +329,7 @@ error:
return err;
}
-static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
{
struct iovec *iov;
u8 __user *type = NULL;
@@ -338,7 +338,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
unsigned int i;
if (!msg->msg_iov)
- return;
+ return 0;
for (i = 0; i < msg->msg_iovlen; i++) {
iov = &msg->msg_iov[i];
@@ -360,8 +360,9 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
code = iov->iov_base;
if (type && code) {
- get_user(fl->fl_icmp_type, type);
- get_user(fl->fl_icmp_code, code);
+ if (get_user(fl->fl_icmp_type, type) ||
+ get_user(fl->fl_icmp_code, code))
+ return -EFAULT;
probed = 1;
}
break;
@@ -372,6 +373,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
if (probed)
break;
}
+ return 0;
}
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
@@ -480,8 +482,11 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.proto = inet->hdrincl ? IPPROTO_RAW :
sk->sk_protocol,
};
- if (!inet->hdrincl)
- raw_probe_proto_opt(&fl, msg);
+ if (!inet->hdrincl) {
+ err = raw_probe_proto_opt(&fl, msg);
+ if (err)
+ goto done;
+ }
security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
@@ -849,8 +854,8 @@ static void raw_seq_stop(struct seq_file *seq, void *v)
static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
{
struct inet_sock *inet = inet_sk(sp);
- unsigned int dest = inet->daddr,
- src = inet->rcv_saddr;
+ __be32 dest = inet->daddr,
+ src = inet->rcv_saddr;
__u16 destp = 0,
srcp = inet->num;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 925ee4dfc32..1aaff0a2e09 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -566,11 +566,9 @@ static inline u32 rt_score(struct rtable *rt)
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
{
- return ((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
- (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) |
-#ifdef CONFIG_IP_ROUTE_FWMARK
- (fl1->nl_u.ip4_u.fwmark ^ fl2->nl_u.ip4_u.fwmark) |
-#endif
+ return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
+ (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
+ (fl1->mark ^ fl2->mark) |
(*(u16 *)&fl1->nl_u.ip4_u.tos ^
*(u16 *)&fl2->nl_u.ip4_u.tos) |
(fl1->oif ^ fl2->oif) |
@@ -1643,9 +1641,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
rth->fl.fl4_tos = tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= skb->nfmark;
-#endif
+ rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
#ifdef CONFIG_NET_CLS_ROUTE
@@ -1784,14 +1780,12 @@ static inline int __mkroute_input(struct sk_buff *skb,
#endif
if (in_dev->cnf.no_policy)
rth->u.dst.flags |= DST_NOPOLICY;
- if (in_dev->cnf.no_xfrm)
+ if (out_dev->cnf.no_xfrm)
rth->u.dst.flags |= DST_NOXFRM;
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
rth->fl.fl4_tos = tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= skb->nfmark;
-#endif
+ rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
rth->rt_gateway = daddr;
@@ -1920,10 +1914,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
.saddr = saddr,
.tos = tos,
.scope = RT_SCOPE_UNIVERSE,
-#ifdef CONFIG_IP_ROUTE_FWMARK
- .fwmark = skb->nfmark
-#endif
} },
+ .mark = skb->mark,
.iif = dev->ifindex };
unsigned flags = 0;
u32 itag = 0;
@@ -2034,9 +2026,7 @@ local_input:
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
rth->fl.fl4_tos = tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= skb->nfmark;
-#endif
+ rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
#ifdef CONFIG_NET_CLS_ROUTE
@@ -2113,9 +2103,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.fl4_src == saddr &&
rth->fl.iif == iif &&
rth->fl.oif == 0 &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark == skb->nfmark &&
-#endif
+ rth->fl.mark == skb->mark &&
rth->fl.fl4_tos == tos) {
rth->u.dst.lastuse = jiffies;
dst_hold(&rth->u.dst);
@@ -2239,9 +2227,7 @@ static inline int __mkroute_output(struct rtable **result,
rth->fl.fl4_tos = tos;
rth->fl.fl4_src = oldflp->fl4_src;
rth->fl.oif = oldflp->oif;
-#ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
-#endif
+ rth->fl.mark = oldflp->mark;
rth->rt_dst = fl->fl4_dst;
rth->rt_src = fl->fl4_src;
rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
@@ -2385,10 +2371,8 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
.scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK :
RT_SCOPE_UNIVERSE),
-#ifdef CONFIG_IP_ROUTE_FWMARK
- .fwmark = oldflp->fl4_fwmark
-#endif
} },
+ .mark = oldflp->mark,
.iif = loopback_dev.ifindex,
.oif = oldflp->oif };
struct fib_result res;
@@ -2583,9 +2567,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
rth->fl.fl4_src == flp->fl4_src &&
rth->fl.iif == 0 &&
rth->fl.oif == flp->oif &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark == flp->fl4_fwmark &&
-#endif
+ rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK))) {
@@ -2647,7 +2629,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
struct rtable *rt = (struct rtable*)skb->dst;
struct rtmsg *r;
struct nlmsghdr *nlh;
- struct rta_cacheinfo ci;
+ long expires;
+ u32 id = 0, ts = 0, tsage = 0, error;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
if (nlh == NULL)
@@ -2694,20 +2677,13 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
goto nla_put_failure;
- ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
- ci.rta_used = rt->u.dst.__use;
- ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
- if (rt->u.dst.expires)
- ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies);
- else
- ci.rta_expires = 0;
- ci.rta_error = rt->u.dst.error;
- ci.rta_id = ci.rta_ts = ci.rta_tsage = 0;
+ error = rt->u.dst.error;
+ expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
if (rt->peer) {
- ci.rta_id = rt->peer->ip_id_count;
+ id = rt->peer->ip_id_count;
if (rt->peer->tcp_ts_stamp) {
- ci.rta_ts = rt->peer->tcp_ts;
- ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+ ts = rt->peer->tcp_ts;
+ tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
}
}
@@ -2726,7 +2702,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
} else {
if (err == -EMSGSIZE)
goto nla_put_failure;
- ci.rta_error = err;
+ error = err;
}
}
} else
@@ -2734,7 +2710,9 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
}
- NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+ if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
+ expires, error) < 0)
+ goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2894,8 +2872,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
void __user *oldval,
size_t __user *oldlenp,
void __user *newval,
- size_t newlen,
- void **context)
+ size_t newlen)
{
int delay;
if (newlen != sizeof(int))
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 661e0a4bca7..6b19530905a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -35,23 +35,23 @@ module_init(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
-static u32 cookie_hash(u32 saddr, u32 daddr, u32 sport, u32 dport,
+static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
{
__u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS];
memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c]));
- tmp[0] = saddr;
- tmp[1] = daddr;
- tmp[2] = (sport << 16) + dport;
+ tmp[0] = (__force u32)saddr;
+ tmp[1] = (__force u32)daddr;
+ tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
tmp[3] = count;
sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
return tmp[17];
}
-static __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
- __u16 dport, __u32 sseq, __u32 count,
+static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
+ __be16 dport, __u32 sseq, __u32 count,
__u32 data)
{
/*
@@ -80,8 +80,8 @@ static __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
* "maxdiff" if the current (passed-in) "count". The return value
* is (__u32)-1 if this test fails.
*/
-static __u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr,
- __u16 sport, __u16 dport, __u32 sseq,
+static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport, __u32 sseq,
__u32 count, __u32 maxdiff)
{
__u32 diff;
@@ -220,7 +220,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
ireq = inet_rsk(req);
treq = tcp_rsk(req);
- treq->rcv_isn = htonl(skb->h.th->seq) - 1;
+ treq->rcv_isn = ntohl(skb->h.th->seq) - 1;
treq->snt_isn = cookie;
req->mss = mss;
ireq->rmt_port = skb->h.th->source;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e82a5be894b..fabf69a9108 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -51,8 +51,7 @@ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
static int ipv4_sysctl_forward_strategy(ctl_table *table,
int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
int *valp = table->data;
int new;
@@ -111,8 +110,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
int nlen, void __user *oldval,
size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
char val[TCP_CA_NAME_MAX];
ctl_table tbl = {
@@ -122,19 +120,71 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
int ret;
tcp_get_default_congestion_control(val);
- ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
- context);
+ ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
if (ret == 0 && newval && newlen)
ret = tcp_set_default_congestion_control(val);
return ret;
}
-static int __init tcp_congestion_default(void)
+static int proc_tcp_available_congestion_control(ctl_table *ctl,
+ int write, struct file * filp,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
{
- return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+ ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+ if (!tbl.data)
+ return -ENOMEM;
+ tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
+ ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ kfree(tbl.data);
+ return ret;
}
-late_initcall(tcp_congestion_default);
+static int proc_allowed_congestion_control(ctl_table *ctl,
+ int write, struct file * filp,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+ if (!tbl.data)
+ return -ENOMEM;
+
+ tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
+ ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = tcp_set_allowed_congestion_control(tbl.data);
+ kfree(tbl.data);
+ return ret;
+}
+
+static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
+ int nlen, void __user *oldval,
+ size_t __user *oldlenp,
+ void __user *newval,
+ size_t newlen)
+{
+ ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+ if (!tbl.data)
+ return -ENOMEM;
+
+ tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
+ ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+ if (ret == 0 && newval && newlen)
+ ret = tcp_set_allowed_congestion_control(tbl.data);
+ kfree(tbl.data);
+
+ return ret;
+
+}
ctl_table ipv4_table[] = {
{
@@ -738,6 +788,21 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
+ {
+ .ctl_name = NET_TCP_AVAIL_CONG_CONTROL,
+ .procname = "tcp_available_congestion_control",
+ .maxlen = TCP_CA_BUF_MAX,
+ .mode = 0444,
+ .proc_handler = &proc_tcp_available_congestion_control,
+ },
+ {
+ .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL,
+ .procname = "tcp_allowed_congestion_control",
+ .maxlen = TCP_CA_BUF_MAX,
+ .mode = 0644,
+ .proc_handler = &proc_allowed_congestion_control,
+ .strategy = &strategy_allowed_congestion_control,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 66e9a729f6d..b67e0dd743b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -258,6 +258,7 @@
#include <linux/bootmem.h>
#include <linux/cache.h>
#include <linux/err.h>
+#include <linux/crypto.h>
#include <net/icmp.h>
#include <net/tcp.h>
@@ -462,11 +463,12 @@ static inline int forced_push(struct tcp_sock *tp)
static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
struct sk_buff *skb)
{
- skb->csum = 0;
- TCP_SKB_CB(skb)->seq = tp->write_seq;
- TCP_SKB_CB(skb)->end_seq = tp->write_seq;
- TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(skb)->sacked = 0;
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+ skb->csum = 0;
+ tcb->seq = tcb->end_seq = tp->write_seq;
+ tcb->flags = TCPCB_FLAG_ACK;
+ tcb->sacked = 0;
skb_header_release(skb);
__skb_queue_tail(&sk->sk_write_queue, skb);
sk_charge_skb(sk, skb);
@@ -1942,6 +1944,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
}
break;
+#ifdef CONFIG_TCP_MD5SIG
+ case TCP_MD5SIG:
+ /* Read the IP->Key mappings from userspace */
+ err = tp->af_specific->md5_parse(sk, optval, optlen);
+ break;
+#endif
+
default:
err = -ENOPROTOOPT;
break;
@@ -2154,7 +2163,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
struct tcphdr *th;
unsigned thlen;
unsigned int seq;
- unsigned int delta;
+ __be32 delta;
unsigned int oldlen;
unsigned int len;
@@ -2207,7 +2216,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
do {
th->fin = th->psh = 0;
- th->check = ~csum_fold(th->check + delta);
+ th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
+ (__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check = csum_fold(csum_partial(skb->h.raw, thlen,
skb->csum));
@@ -2221,7 +2231,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
} while (skb->next);
delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
- th->check = ~csum_fold(th->check + delta);
+ th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
+ (__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check = csum_fold(csum_partial(skb->h.raw, thlen,
skb->csum));
@@ -2231,6 +2242,136 @@ out:
}
EXPORT_SYMBOL(tcp_tso_segment);
+#ifdef CONFIG_TCP_MD5SIG
+static unsigned long tcp_md5sig_users;
+static struct tcp_md5sig_pool **tcp_md5sig_pool;
+static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
+
+static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
+{
+ int cpu;
+ for_each_possible_cpu(cpu) {
+ struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
+ if (p) {
+ if (p->md5_desc.tfm)
+ crypto_free_hash(p->md5_desc.tfm);
+ kfree(p);
+ p = NULL;
+ }
+ }
+ free_percpu(pool);
+}
+
+void tcp_free_md5sig_pool(void)
+{
+ struct tcp_md5sig_pool **pool = NULL;
+
+ spin_lock(&tcp_md5sig_pool_lock);
+ if (--tcp_md5sig_users == 0) {
+ pool = tcp_md5sig_pool;
+ tcp_md5sig_pool = NULL;
+ }
+ spin_unlock(&tcp_md5sig_pool_lock);
+ if (pool)
+ __tcp_free_md5sig_pool(pool);
+}
+
+EXPORT_SYMBOL(tcp_free_md5sig_pool);
+
+static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
+{
+ int cpu;
+ struct tcp_md5sig_pool **pool;
+
+ pool = alloc_percpu(struct tcp_md5sig_pool *);
+ if (!pool)
+ return NULL;
+
+ for_each_possible_cpu(cpu) {
+ struct tcp_md5sig_pool *p;
+ struct crypto_hash *hash;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ goto out_free;
+ *per_cpu_ptr(pool, cpu) = p;
+
+ hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (!hash || IS_ERR(hash))
+ goto out_free;
+
+ p->md5_desc.tfm = hash;
+ }
+ return pool;
+out_free:
+ __tcp_free_md5sig_pool(pool);
+ return NULL;
+}
+
+struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void)
+{
+ struct tcp_md5sig_pool **pool;
+ int alloc = 0;
+
+retry:
+ spin_lock(&tcp_md5sig_pool_lock);
+ pool = tcp_md5sig_pool;
+ if (tcp_md5sig_users++ == 0) {
+ alloc = 1;
+ spin_unlock(&tcp_md5sig_pool_lock);
+ } else if (!pool) {
+ tcp_md5sig_users--;
+ spin_unlock(&tcp_md5sig_pool_lock);
+ cpu_relax();
+ goto retry;
+ } else
+ spin_unlock(&tcp_md5sig_pool_lock);
+
+ if (alloc) {
+ /* we cannot hold spinlock here because this may sleep. */
+ struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool();
+ spin_lock(&tcp_md5sig_pool_lock);
+ if (!p) {
+ tcp_md5sig_users--;
+ spin_unlock(&tcp_md5sig_pool_lock);
+ return NULL;
+ }
+ pool = tcp_md5sig_pool;
+ if (pool) {
+ /* oops, it has already been assigned. */
+ spin_unlock(&tcp_md5sig_pool_lock);
+ __tcp_free_md5sig_pool(p);
+ } else {
+ tcp_md5sig_pool = pool = p;
+ spin_unlock(&tcp_md5sig_pool_lock);
+ }
+ }
+ return pool;
+}
+
+EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
+
+struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
+{
+ struct tcp_md5sig_pool **p;
+ spin_lock(&tcp_md5sig_pool_lock);
+ p = tcp_md5sig_pool;
+ if (p)
+ tcp_md5sig_users++;
+ spin_unlock(&tcp_md5sig_pool_lock);
+ return (p ? *per_cpu_ptr(p, cpu) : NULL);
+}
+
+EXPORT_SYMBOL(__tcp_get_md5sig_pool);
+
+void __tcp_put_md5sig_pool(void)
+{
+ tcp_free_md5sig_pool();
+}
+
+EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+#endif
+
extern void __skb_cb_too_small_for_tcp(int, int);
extern struct tcp_congestion_ops tcp_reno;
@@ -2270,7 +2411,7 @@ void __init tcp_init(void)
thash_entries,
(num_physpages >= 128 * 1024) ?
13 : 15,
- HASH_HIGHMEM,
+ 0,
&tcp_hashinfo.ehash_size,
NULL,
0);
@@ -2286,7 +2427,7 @@ void __init tcp_init(void)
tcp_hashinfo.ehash_size,
(num_physpages >= 128 * 1024) ?
13 : 15,
- HASH_HIGHMEM,
+ 0,
&tcp_hashinfo.bhash_size,
NULL,
64 * 1024);
@@ -2316,9 +2457,10 @@ void __init tcp_init(void)
sysctl_max_syn_backlog = 128;
}
- sysctl_tcp_mem[0] = 768 << order;
- sysctl_tcp_mem[1] = 1024 << order;
- sysctl_tcp_mem[2] = 1536 << order;
+ /* Allow no more than 3/4 kernel memory (usually less) allocated to TCP */
+ sysctl_tcp_mem[0] = (1536 / sizeof (struct inet_bind_hashbucket)) << order;
+ sysctl_tcp_mem[1] = sysctl_tcp_mem[0] * 4 / 3;
+ sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index af0aca1e6be..5ca7723d079 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -113,7 +113,7 @@ int tcp_set_default_congestion_control(const char *name)
spin_lock(&tcp_cong_list_lock);
ca = tcp_ca_find(name);
#ifdef CONFIG_KMOD
- if (!ca) {
+ if (!ca && capable(CAP_SYS_MODULE)) {
spin_unlock(&tcp_cong_list_lock);
request_module("tcp_%s", name);
@@ -123,6 +123,7 @@ int tcp_set_default_congestion_control(const char *name)
#endif
if (ca) {
+ ca->non_restricted = 1; /* default is always allowed */
list_move(&ca->list, &tcp_cong_list);
ret = 0;
}
@@ -131,6 +132,30 @@ int tcp_set_default_congestion_control(const char *name)
return ret;
}
+/* Set default value from kernel configuration at bootup */
+static int __init tcp_congestion_default(void)
+{
+ return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+}
+late_initcall(tcp_congestion_default);
+
+
+/* Build string with list of available congestion control values */
+void tcp_get_available_congestion_control(char *buf, size_t maxlen)
+{
+ struct tcp_congestion_ops *ca;
+ size_t offs = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", ca->name);
+
+ }
+ rcu_read_unlock();
+}
+
/* Get current default congestion control */
void tcp_get_default_congestion_control(char *name)
{
@@ -144,6 +169,64 @@ void tcp_get_default_congestion_control(char *name)
rcu_read_unlock();
}
+/* Built list of non-restricted congestion control values */
+void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
+{
+ struct tcp_congestion_ops *ca;
+ size_t offs = 0;
+
+ *buf = '\0';
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ if (!ca->non_restricted)
+ continue;
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", ca->name);
+
+ }
+ rcu_read_unlock();
+}
+
+/* Change list of non-restricted congestion control */
+int tcp_set_allowed_congestion_control(char *val)
+{
+ struct tcp_congestion_ops *ca;
+ char *clone, *name;
+ int ret = 0;
+
+ clone = kstrdup(val, GFP_USER);
+ if (!clone)
+ return -ENOMEM;
+
+ spin_lock(&tcp_cong_list_lock);
+ /* pass 1 check for bad entries */
+ while ((name = strsep(&clone, " ")) && *name) {
+ ca = tcp_ca_find(name);
+ if (!ca) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ /* pass 2 clear */
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list)
+ ca->non_restricted = 0;
+
+ /* pass 3 mark as allowed */
+ while ((name = strsep(&val, " ")) && *name) {
+ ca = tcp_ca_find(name);
+ WARN_ON(!ca);
+ if (ca)
+ ca->non_restricted = 1;
+ }
+out:
+ spin_unlock(&tcp_cong_list_lock);
+
+ return ret;
+}
+
+
/* Change congestion control for socket */
int tcp_set_congestion_control(struct sock *sk, const char *name)
{
@@ -153,12 +236,25 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
rcu_read_lock();
ca = tcp_ca_find(name);
+ /* no change asking for existing value */
if (ca == icsk->icsk_ca_ops)
goto out;
+#ifdef CONFIG_KMOD
+ /* not found attempt to autoload module */
+ if (!ca && capable(CAP_SYS_MODULE)) {
+ rcu_read_unlock();
+ request_module("tcp_%s", name);
+ rcu_read_lock();
+ ca = tcp_ca_find(name);
+ }
+#endif
if (!ca)
err = -ENOENT;
+ else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+ err = -EPERM;
+
else if (!try_module_get(ca->owner))
err = -EBUSY;
@@ -260,6 +356,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
struct tcp_congestion_ops tcp_reno = {
.name = "reno",
+ .non_restricted = 1,
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index a60ef38d75c..6ad18480226 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -190,7 +190,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
*/
/* change the unit from HZ to bictcp_HZ */
- t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
+ t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
<< BICTCP_HZ) / HZ;
if (t < ca->bic_K) /* t - K */
@@ -259,7 +259,7 @@ static inline void measure_delay(struct sock *sk)
(s32)(tcp_time_stamp - ca->epoch_start) < HZ)
return;
- delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3;
if (delay == 0)
delay = 1;
@@ -366,7 +366,7 @@ static int __init cubictcp_register(void)
beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
- cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
+ cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */
/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
* so K = cubic_root( (wmax-cwnd)*rtt/c )
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 682e7d5b6f2..753987a1048 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -23,15 +23,15 @@ module_param(use_bandwidth_switch, int, 0644);
MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
struct htcp {
- u16 alpha; /* Fixed point arith, << 7 */
+ u32 alpha; /* Fixed point arith, << 7 */
u8 beta; /* Fixed point arith, << 7 */
u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */
- u32 last_cong; /* Time since last congestion event end */
- u32 undo_last_cong;
u16 pkts_acked;
u32 packetcount;
u32 minRTT;
u32 maxRTT;
+ u32 last_cong; /* Time since last congestion event end */
+ u32 undo_last_cong;
u32 undo_maxRTT;
u32 undo_old_maxB;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cf06accbe68..c701f6abbfc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2677,6 +2677,14 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
opt_rx->sack_ok) {
TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
}
+#ifdef CONFIG_TCP_MD5SIG
+ case TCPOPT_MD5SIG:
+ /*
+ * The MD5 Hash has already been
+ * checked (see tcp_v{4,6}_do_rcv()).
+ */
+ break;
+#endif
};
ptr+=opsize-2;
length-=opsize;
@@ -3782,9 +3790,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
return err;
}
-static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
{
- int result;
+ __sum16 result;
if (sock_owned_by_user(sk)) {
local_bh_enable();
@@ -4227,9 +4235,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* Change state from SYN-SENT only after copied_seq
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
- mb();
+ smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
+ security_inet_conn_established(sk, skb);
+
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
@@ -4473,7 +4483,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
case TCP_SYN_RECV:
if (acceptable) {
tp->copied_seq = tp->rcv_nxt;
- mb();
+ smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 22ef8bd2662..a1222d6968c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -78,6 +78,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
@@ -89,10 +92,19 @@ static struct socket *tcp_socket;
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
+ __be32 addr);
+static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+ __be32 saddr, __be32 daddr,
+ struct tcphdr *th, int protocol,
+ int tcplen);
+#endif
+
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
- .lhash_users = ATOMIC_INIT(0),
- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
+ .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
+ .lhash_users = ATOMIC_INIT(0),
+ .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
};
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -111,7 +123,7 @@ void tcp_unhash(struct sock *sk)
inet_unhash(&tcp_hashinfo, sk);
}
-static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
{
return secure_tcp_sequence_number(skb->nh.iph->daddr,
skb->nh.iph->saddr,
@@ -205,13 +217,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
-
- /* VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state TIME-WAIT
- * and initialize rx_opt.ts_recent from it, when trying new connection.
+ /*
+ * VJ's idea. We save last timestamp seen from
+ * the destination in peer table, when entering state
+ * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+ * when trying new connection.
*/
-
- if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+ if (peer != NULL &&
+ peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
@@ -236,7 +249,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
- err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
+ err = ip_route_newports(&rt, IPPROTO_TCP,
+ inet->sport, inet->dport, sk);
if (err)
goto failure;
@@ -260,7 +274,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return 0;
failure:
- /* This unhashes the socket and releases the local port, if necessary. */
+ /*
+ * This unhashes the socket and releases the local port,
+ * if necessary.
+ */
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
@@ -485,8 +502,9 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
struct tcphdr *th = skb->h.th;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
- skb->csum = offsetof(struct tcphdr, check);
+ th->check = ~tcp_v4_check(th, len,
+ inet->saddr, inet->daddr, 0);
+ skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
csum_partial((char *)th,
@@ -508,7 +526,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
th->check = 0;
th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
- skb->csum = offsetof(struct tcphdr, check);
+ skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
}
@@ -526,11 +544,19 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(struct sk_buff *skb)
+static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = skb->h.th;
- struct tcphdr rth;
+ struct {
+ struct tcphdr th;
+#ifdef CONFIG_TCP_MD5SIG
+ __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
+#endif
+ } rep;
struct ip_reply_arg arg;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -540,29 +566,49 @@ static void tcp_v4_send_reset(struct sk_buff *skb)
return;
/* Swap the send and the receive. */
- memset(&rth, 0, sizeof(struct tcphdr));
- rth.dest = th->source;
- rth.source = th->dest;
- rth.doff = sizeof(struct tcphdr) / 4;
- rth.rst = 1;
+ memset(&rep, 0, sizeof(rep));
+ rep.th.dest = th->source;
+ rep.th.source = th->dest;
+ rep.th.doff = sizeof(struct tcphdr) / 4;
+ rep.th.rst = 1;
if (th->ack) {
- rth.seq = th->ack_seq;
+ rep.th.seq = th->ack_seq;
} else {
- rth.ack = 1;
- rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
- skb->len - (th->doff << 2));
+ rep.th.ack = 1;
+ rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
+ skb->len - (th->doff << 2));
}
- memset(&arg, 0, sizeof arg);
- arg.iov[0].iov_base = (unsigned char *)&rth;
- arg.iov[0].iov_len = sizeof rth;
+ memset(&arg, 0, sizeof(arg));
+ arg.iov[0].iov_base = (unsigned char *)&rep;
+ arg.iov[0].iov_len = sizeof(rep.th);
+
+#ifdef CONFIG_TCP_MD5SIG
+ key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
+ if (key) {
+ rep.opt[0] = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ /* Update length and the length the header thinks exists */
+ arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
+ rep.th.doff = arg.iov[0].iov_len / 4;
+
+ tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
+ key,
+ skb->nh.iph->daddr,
+ skb->nh.iph->saddr,
+ &rep.th, IPPROTO_TCP,
+ arg.iov[0].iov_len);
+ }
+#endif
arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
- skb->nh.iph->saddr, /*XXX*/
+ skb->nh.iph->saddr, /* XXX */
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+ ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
@@ -572,28 +618,37 @@ static void tcp_v4_send_reset(struct sk_buff *skb)
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts)
{
struct tcphdr *th = skb->h.th;
struct {
struct tcphdr th;
- u32 tsopt[TCPOLEN_TSTAMP_ALIGNED >> 2];
+ __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
+#ifdef CONFIG_TCP_MD5SIG
+ + (TCPOLEN_MD5SIG_ALIGNED >> 2)
+#endif
+ ];
} rep;
struct ip_reply_arg arg;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+ struct tcp_md5sig_key tw_key;
+#endif
memset(&rep.th, 0, sizeof(struct tcphdr));
- memset(&arg, 0, sizeof arg);
+ memset(&arg, 0, sizeof(arg));
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
if (ts) {
- rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- rep.tsopt[1] = htonl(tcp_time_stamp);
- rep.tsopt[2] = htonl(ts);
- arg.iov[0].iov_len = sizeof(rep);
+ rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ rep.opt[1] = htonl(tcp_time_stamp);
+ rep.opt[2] = htonl(ts);
+ arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
}
/* Swap the send and the receive. */
@@ -605,8 +660,44 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
rep.th.ack = 1;
rep.th.window = htons(win);
+#ifdef CONFIG_TCP_MD5SIG
+ /*
+ * The SKB holds an imcoming packet, but may not have a valid ->sk
+ * pointer. This is especially the case when we're dealing with a
+ * TIME_WAIT ack, because the sk structure is long gone, and only
+ * the tcp_timewait_sock remains. So the md5 key is stashed in that
+ * structure, and we use it in preference. I believe that (twsk ||
+ * skb->sk) holds true, but we program defensively.
+ */
+ if (!twsk && skb->sk) {
+ key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
+ } else if (twsk && twsk->tw_md5_keylen) {
+ tw_key.key = twsk->tw_md5_key;
+ tw_key.keylen = twsk->tw_md5_keylen;
+ key = &tw_key;
+ } else
+ key = NULL;
+
+ if (key) {
+ int offset = (ts) ? 3 : 0;
+
+ rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
+ rep.th.doff = arg.iov[0].iov_len/4;
+
+ tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
+ key,
+ skb->nh.iph->daddr,
+ skb->nh.iph->saddr,
+ &rep.th, IPPROTO_TCP,
+ arg.iov[0].iov_len);
+ }
+#endif
arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
- skb->nh.iph->saddr, /*XXX*/
+ skb->nh.iph->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
@@ -618,17 +709,20 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
- const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
- tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
+ tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+ tcptw->tw_ts_recent);
inet_twsk_put(tw);
}
-static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
+static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
+ struct request_sock *req)
{
- tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
+ tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
+ tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
req->ts_recent);
}
@@ -662,8 +756,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
ireq->rmt_addr,
ireq->opt);
- if (err == NET_XMIT_CN)
- err = 0;
+ err = net_xmit_eval(err);
}
out:
@@ -715,7 +808,423 @@ static struct ip_options *tcp_v4_save_options(struct sock *sk,
return dopt;
}
-struct request_sock_ops tcp_request_sock_ops = {
+#ifdef CONFIG_TCP_MD5SIG
+/*
+ * RFC2385 MD5 checksumming requires a mapping of
+ * IP address->MD5 Key.
+ * We need to maintain these in the sk structure.
+ */
+
+/* Find the Key structure for an address. */
+static struct tcp_md5sig_key *
+ tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int i;
+
+ if (!tp->md5sig_info || !tp->md5sig_info->entries4)
+ return NULL;
+ for (i = 0; i < tp->md5sig_info->entries4; i++) {
+ if (tp->md5sig_info->keys4[i].addr == addr)
+ return (struct tcp_md5sig_key *)
+ &tp->md5sig_info->keys4[i];
+ }
+ return NULL;
+}
+
+struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
+ struct sock *addr_sk)
+{
+ return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
+}
+
+EXPORT_SYMBOL(tcp_v4_md5_lookup);
+
+static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
+ struct request_sock *req)
+{
+ return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
+}
+
+/* This can be called on a newly created socket, from other files */
+int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
+ u8 *newkey, u8 newkeylen)
+{
+ /* Add Key to the list */
+ struct tcp4_md5sig_key *key;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp4_md5sig_key *keys;
+
+ key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
+ if (key) {
+ /* Pre-existing entry - just update that one. */
+ kfree(key->key);
+ key->key = newkey;
+ key->keylen = newkeylen;
+ } else {
+ struct tcp_md5sig_info *md5sig;
+
+ if (!tp->md5sig_info) {
+ tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
+ GFP_ATOMIC);
+ if (!tp->md5sig_info) {
+ kfree(newkey);
+ return -ENOMEM;
+ }
+ }
+ if (tcp_alloc_md5sig_pool() == NULL) {
+ kfree(newkey);
+ return -ENOMEM;
+ }
+ md5sig = tp->md5sig_info;
+
+ if (md5sig->alloced4 == md5sig->entries4) {
+ keys = kmalloc((sizeof(*keys) *
+ (md5sig->entries4 + 1)), GFP_ATOMIC);
+ if (!keys) {
+ kfree(newkey);
+ tcp_free_md5sig_pool();
+ return -ENOMEM;
+ }
+
+ if (md5sig->entries4)
+ memcpy(keys, md5sig->keys4,
+ sizeof(*keys) * md5sig->entries4);
+
+ /* Free old key list, and reference new one */
+ if (md5sig->keys4)
+ kfree(md5sig->keys4);
+ md5sig->keys4 = keys;
+ md5sig->alloced4++;
+ }
+ md5sig->entries4++;
+ md5sig->keys4[md5sig->entries4 - 1].addr = addr;
+ md5sig->keys4[md5sig->entries4 - 1].key = newkey;
+ md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL(tcp_v4_md5_do_add);
+
+static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
+ u8 *newkey, u8 newkeylen)
+{
+ return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
+ newkey, newkeylen);
+}
+
+int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int i;
+
+ for (i = 0; i < tp->md5sig_info->entries4; i++) {
+ if (tp->md5sig_info->keys4[i].addr == addr) {
+ /* Free the key */
+ kfree(tp->md5sig_info->keys4[i].key);
+ tp->md5sig_info->entries4--;
+
+ if (tp->md5sig_info->entries4 == 0) {
+ kfree(tp->md5sig_info->keys4);
+ tp->md5sig_info->keys4 = NULL;
+ } else if (tp->md5sig_info->entries4 != i) {
+ /* Need to do some manipulation */
+ memcpy(&tp->md5sig_info->keys4[i],
+ &tp->md5sig_info->keys4[i+1],
+ (tp->md5sig_info->entries4 - i) *
+ sizeof(struct tcp4_md5sig_key));
+ }
+ tcp_free_md5sig_pool();
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+EXPORT_SYMBOL(tcp_v4_md5_do_del);
+
+static void tcp_v4_clear_md5_list(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Free each key, then the set of key keys,
+ * the crypto element, and then decrement our
+ * hold on the last resort crypto.
+ */
+ if (tp->md5sig_info->entries4) {
+ int i;
+ for (i = 0; i < tp->md5sig_info->entries4; i++)
+ kfree(tp->md5sig_info->keys4[i].key);
+ tp->md5sig_info->entries4 = 0;
+ tcp_free_md5sig_pool();
+ }
+ if (tp->md5sig_info->keys4) {
+ kfree(tp->md5sig_info->keys4);
+ tp->md5sig_info->keys4 = NULL;
+ tp->md5sig_info->alloced4 = 0;
+ }
+}
+
+static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
+ int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+ u8 *newkey;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin->sin_family != AF_INET)
+ return -EINVAL;
+
+ if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
+ if (!tcp_sk(sk)->md5sig_info)
+ return -ENOENT;
+ return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
+ }
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ if (!tcp_sk(sk)->md5sig_info) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
+
+ if (!p)
+ return -EINVAL;
+
+ tp->md5sig_info = p;
+
+ }
+
+ newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+ if (!newkey)
+ return -ENOMEM;
+ return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
+ newkey, cmd.tcpm_keylen);
+}
+
+static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+ __be32 saddr, __be32 daddr,
+ struct tcphdr *th, int protocol,
+ int tcplen)
+{
+ struct scatterlist sg[4];
+ __u16 data_len;
+ int block = 0;
+ __sum16 old_checksum;
+ struct tcp_md5sig_pool *hp;
+ struct tcp4_pseudohdr *bp;
+ struct hash_desc *desc;
+ int err;
+ unsigned int nbytes = 0;
+
+ /*
+ * Okay, so RFC2385 is turned on for this connection,
+ * so we need to generate the MD5 hash for the packet now.
+ */
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+
+ bp = &hp->md5_blk.ip4;
+ desc = &hp->md5_desc;
+
+ /*
+ * 1. the TCP pseudo-header (in the order: source IP address,
+ * destination IP address, zero-padded protocol number, and
+ * segment length)
+ */
+ bp->saddr = saddr;
+ bp->daddr = daddr;
+ bp->pad = 0;
+ bp->protocol = protocol;
+ bp->len = htons(tcplen);
+ sg_set_buf(&sg[block++], bp, sizeof(*bp));
+ nbytes += sizeof(*bp);
+
+ /* 2. the TCP header, excluding options, and assuming a
+ * checksum of zero/
+ */
+ old_checksum = th->check;
+ th->check = 0;
+ sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
+ nbytes += sizeof(struct tcphdr);
+
+ /* 3. the TCP segment data (if any) */
+ data_len = tcplen - (th->doff << 2);
+ if (data_len > 0) {
+ unsigned char *data = (unsigned char *)th + (th->doff << 2);
+ sg_set_buf(&sg[block++], data, data_len);
+ nbytes += data_len;
+ }
+
+ /* 4. an independently-specified key or password, known to both
+ * TCPs and presumably connection-specific
+ */
+ sg_set_buf(&sg[block++], key->key, key->keylen);
+ nbytes += key->keylen;
+
+ /* Now store the Hash into the packet */
+ err = crypto_hash_init(desc);
+ if (err)
+ goto clear_hash;
+ err = crypto_hash_update(desc, sg, nbytes);
+ if (err)
+ goto clear_hash;
+ err = crypto_hash_final(desc, md5_hash);
+ if (err)
+ goto clear_hash;
+
+ /* Reset header, and free up the crypto */
+ tcp_put_md5sig_pool();
+ th->check = old_checksum;
+
+out:
+ return 0;
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ goto out;
+}
+
+int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+ struct sock *sk,
+ struct dst_entry *dst,
+ struct request_sock *req,
+ struct tcphdr *th, int protocol,
+ int tcplen)
+{
+ __be32 saddr, daddr;
+
+ if (sk) {
+ saddr = inet_sk(sk)->saddr;
+ daddr = inet_sk(sk)->daddr;
+ } else {
+ struct rtable *rt = (struct rtable *)dst;
+ BUG_ON(!rt);
+ saddr = rt->rt_src;
+ daddr = rt->rt_dst;
+ }
+ return tcp_v4_do_calc_md5_hash(md5_hash, key,
+ saddr, daddr,
+ th, protocol, tcplen);
+}
+
+EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
+
+static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
+{
+ /*
+ * This gets called for each TCP segment that arrives
+ * so we want to be efficient.
+ * We have 3 drop cases:
+ * o No MD5 hash and one expected.
+ * o MD5 hash and we're not expecting one.
+ * o MD5 hash and its wrong.
+ */
+ __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th = skb->h.th;
+ int length = (th->doff << 2) - sizeof(struct tcphdr);
+ int genhash;
+ unsigned char *ptr;
+ unsigned char newhash[16];
+
+ hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+
+ /*
+ * If the TCP option length is less than the TCP_MD5SIG
+ * option length, then we can shortcut
+ */
+ if (length < TCPOLEN_MD5SIG) {
+ if (hash_expected)
+ return 1;
+ else
+ return 0;
+ }
+
+ /* Okay, we can't shortcut - we have to grub through the options */
+ ptr = (unsigned char *)(th + 1);
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ goto done_opts;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2)
+ goto done_opts;
+ if (opsize > length)
+ goto done_opts;
+
+ if (opcode == TCPOPT_MD5SIG) {
+ hash_location = ptr;
+ goto done_opts;
+ }
+ }
+ ptr += opsize-2;
+ length -= opsize;
+ }
+done_opts:
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return 0;
+
+ if (hash_expected && !hash_location) {
+ LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
+ "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest));
+ return 1;
+ }
+
+ if (!hash_expected && hash_location) {
+ LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
+ "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest));
+ return 1;
+ }
+
+ /* Okay, so this is hash_expected and hash_location -
+ * so we need to calculate the checksum.
+ */
+ genhash = tcp_v4_do_calc_md5_hash(newhash,
+ hash_expected,
+ iph->saddr, iph->daddr,
+ th, sk->sk_protocol,
+ skb->len);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ if (net_ratelimit()) {
+ printk(KERN_INFO "MD5 Hash failed for "
+ "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest),
+ genhash ? " tcp_v4_calc_md5_hash failed" : "");
+ }
+ return 1;
+ }
+ return 0;
+}
+
+#endif
+
+struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.family = PF_INET,
.obj_size = sizeof(struct tcp_request_sock),
.rtx_syn_ack = tcp_v4_send_synack,
@@ -724,9 +1233,16 @@ struct request_sock_ops tcp_request_sock_ops = {
.send_reset = tcp_v4_send_reset,
};
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+ .md5_lookup = tcp_v4_reqsk_md5_lookup,
+};
+#endif
+
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
.twsk_unique = tcp_twsk_unique,
+ .twsk_destructor= tcp_twsk_destructor,
};
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -774,6 +1290,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (!req)
goto drop;
+#ifdef CONFIG_TCP_MD5SIG
+ tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
+#endif
+
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = 536;
tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
@@ -859,7 +1379,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
}
- isn = tcp_v4_init_sequence(sk, skb);
+ isn = tcp_v4_init_sequence(skb);
}
tcp_rsk(req)->snt_isn = isn;
@@ -892,6 +1412,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
if (sk_acceptq_is_full(sk))
goto exit_overflow;
@@ -926,6 +1449,22 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk);
+#ifdef CONFIG_TCP_MD5SIG
+ /* Copy over the MD5 key from the original socket */
+ if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
+ /*
+ * We're using one, so create a matching key
+ * on the newsk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
+ if (newkey != NULL)
+ tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
+ newkey, key->keylen);
+ }
+#endif
+
__inet_hash(&tcp_hashinfo, newsk, 0);
__inet_inherit_port(&tcp_hashinfo, sk, newsk);
@@ -971,7 +1510,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-static int tcp_v4_checksum_init(struct sk_buff *skb)
+static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
@@ -1001,10 +1540,24 @@ static int tcp_v4_checksum_init(struct sk_buff *skb)
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
+ struct sock *rsk;
+#ifdef CONFIG_TCP_MD5SIG
+ /*
+ * We really want to reject the packet as early as possible
+ * if:
+ * o We're expecting an MD5'd packet and this is no MD5 tcp option
+ * o There is an MD5 option and we're not expecting one
+ */
+ if (tcp_v4_inbound_md5_hash(sk, skb))
+ goto discard;
+#endif
+
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+ rsk = sk;
goto reset;
+ }
TCP_CHECK_TIMER(sk);
return 0;
}
@@ -1018,20 +1571,24 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb))
+ if (tcp_child_process(sk, nsk, skb)) {
+ rsk = nsk;
goto reset;
+ }
return 0;
}
}
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+ rsk = sk;
goto reset;
+ }
TCP_CHECK_TIMER(sk);
return 0;
reset:
- tcp_v4_send_reset(skb);
+ tcp_v4_send_reset(rsk, skb);
discard:
kfree_skb(skb);
/* Be careful here. If this function gets more complicated and
@@ -1140,7 +1697,7 @@ no_tcp_socket:
bad_packet:
TCP_INC_STATS_BH(TCP_MIB_INERRS);
} else {
- tcp_v4_send_reset(skb);
+ tcp_v4_send_reset(NULL, skb);
}
discard_it:
@@ -1263,6 +1820,15 @@ struct inet_connection_sock_af_ops ipv4_specific = {
#endif
};
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_calc_md5_hash,
+ .md5_add = tcp_v4_md5_add_func,
+ .md5_parse = tcp_v4_parse_md5_keys,
+};
+#endif
+
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
@@ -1302,6 +1868,9 @@ static int tcp_v4_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv4_specific;
icsk->icsk_sync_mss = tcp_sync_mss;
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv4_specific;
+#endif
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1325,6 +1894,15 @@ int tcp_v4_destroy_sock(struct sock *sk)
/* Cleans up our, hopefully empty, out_of_order_queue. */
__skb_queue_purge(&tp->out_of_order_queue);
+#ifdef CONFIG_TCP_MD5SIG
+ /* Clean up the MD5 key list, if any */
+ if (tp->md5sig_info) {
+ tcp_v4_clear_md5_list(sk);
+ kfree(tp->md5sig_info);
+ tp->md5sig_info = NULL;
+ }
+#endif
+
#ifdef CONFIG_NET_DMA
/* Cleans up our sk_async_wait_queue */
__skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1385,7 +1963,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
if (st->state == TCP_SEQ_STATE_OPENREQ) {
struct request_sock *req = cur;
- icsk = inet_csk(st->syn_wait_sk);
+ icsk = inet_csk(st->syn_wait_sk);
req = req->dl_next;
while (1) {
while (req) {
@@ -1395,7 +1973,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
}
req = req->dl_next;
}
- if (++st->sbucket >= TCP_SYNQ_HSIZE)
+ if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
break;
get_req:
req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
@@ -1543,7 +2121,7 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos)
while (rc && pos) {
rc = established_get_next(seq, rc);
--pos;
- }
+ }
return rc;
}
@@ -1672,7 +2250,7 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
afinfo->seq_fops->read = seq_read;
afinfo->seq_fops->llseek = seq_lseek;
afinfo->seq_fops->release = seq_release_private;
-
+
p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
if (p)
p->data = afinfo;
@@ -1686,7 +2264,7 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
if (!afinfo)
return;
proc_net_remove(afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
}
static void get_openreq4(struct sock *sk, struct request_sock *req,
@@ -1721,8 +2299,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
struct inet_sock *inet = inet_sk(sp);
- unsigned int dest = inet->daddr;
- unsigned int src = inet->rcv_saddr;
+ __be32 dest = inet->daddr;
+ __be32 src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
__u16 srcp = ntohs(inet->sport);
@@ -1744,7 +2322,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
"%08X %5d %8d %lu %d %p %u %u %u %u %d",
i, src, srcp, dest, destp, sp->sk_state,
tp->write_seq - tp->snd_una,
- (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+ sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
+ (tp->rcv_nxt - tp->copied_seq),
timer_active,
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
@@ -1759,7 +2338,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
}
-static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
+static void get_timewait4_sock(struct inet_timewait_sock *tw,
+ char *tmpbuf, int i)
{
__be32 dest, src;
__u16 destp, srcp;
@@ -1872,7 +2452,8 @@ struct proto tcp_prot = {
void __init tcp_v4_init(struct net_proto_family *ops)
{
- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
+ if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
+ IPPROTO_TCP) < 0)
panic("Failed to create the TCP control socket.\n");
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0163d982690..4a3889dd194 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -45,8 +45,7 @@ struct inet_timewait_death_row tcp_death_row = {
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&tcp_death_row),
.twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
- inet_twdr_twkill_work,
- &tcp_death_row),
+ inet_twdr_twkill_work),
/* Short-time timewait calendar */
.twcal_hand = -1,
@@ -306,6 +305,28 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_ipv6only = np->ipv6only;
}
#endif
+
+#ifdef CONFIG_TCP_MD5SIG
+ /*
+ * The timewait bucket does not have the key DB from the
+ * sock structure. We just make a quick copy of the
+ * md5 key being used (if indeed we are using one)
+ * so the timewait ack generating code has the key.
+ */
+ do {
+ struct tcp_md5sig_key *key;
+ memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key));
+ tcptw->tw_md5_keylen = 0;
+ key = tp->af_specific->md5_lookup(sk, sk);
+ if (key != NULL) {
+ memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
+ tcptw->tw_md5_keylen = key->keylen;
+ if (tcp_alloc_md5sig_pool() == NULL)
+ BUG();
+ }
+ } while(0);
+#endif
+
/* Linkage updates. */
__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
@@ -329,14 +350,24 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- if (net_ratelimit())
- printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+ LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
}
tcp_update_metrics(sk);
tcp_done(sk);
}
+void tcp_twsk_destructor(struct sock *sk)
+{
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+ if (twsk->tw_md5_keylen)
+ tcp_put_md5sig_pool();
+#endif
+}
+
+EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
*
@@ -435,6 +466,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr);
}
+#ifdef CONFIG_TCP_MD5SIG
+ newtp->md5sig_info = NULL; /*XXX*/
+ if (newtp->af_specific->md5_lookup(sk, newsk))
+ newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+#endif
if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
@@ -455,7 +491,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock **prev)
{
struct tcphdr *th = skb->h.th;
- u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+ __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
int paws_reject = 0;
struct tcp_options_received tmp_opt;
struct sock *child;
@@ -617,6 +653,30 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
req, NULL);
if (child == NULL)
goto listen_overflow;
+#ifdef CONFIG_TCP_MD5SIG
+ else {
+ /* Copy over the MD5 key from the original socket */
+ struct tcp_md5sig_key *key;
+ struct tcp_sock *tp = tcp_sk(sk);
+ key = tp->af_specific->md5_lookup(sk, child);
+ if (key != NULL) {
+ /*
+ * We're using one, so create a matching key on the
+ * newsk structure. If we fail to get memory then we
+ * end up not copying the key across. Shucks.
+ */
+ char *newkey = kmemdup(key->key, key->keylen,
+ GFP_ATOMIC);
+ if (newkey) {
+ if (!tcp_alloc_md5sig_pool())
+ BUG();
+ tp->af_specific->md5_add(child, child,
+ newkey,
+ key->keylen);
+ }
+ }
+ }
+#endif
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
@@ -633,7 +693,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
embryonic_reset:
NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
if (!(flg & TCP_FLAG_RST))
- req->rsk_ops->send_reset(skb);
+ req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
return NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ca406157724..32c1a972fa3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -270,7 +270,7 @@ static u16 tcp_select_window(struct sock *sk)
}
static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
- __u32 tstamp)
+ __u32 tstamp, __u8 **md5_hash)
{
if (tp->rx_opt.tstamp_ok) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
@@ -298,16 +298,29 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
tp->rx_opt.eff_sacks--;
}
}
+#ifdef CONFIG_TCP_MD5SIG
+ if (md5_hash) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ *md5_hash = (__u8 *)ptr;
+ }
+#endif
}
/* Construct a tcp options header for a SYN or SYN_ACK packet.
* If this is every changed make sure to change the definition of
* MAX_SYN_SIZE to match the new maximum number of options that you
* can generate.
+ *
+ * Note - that with the RFC2385 TCP option, we make room for the
+ * 16 byte MD5 hash. This will be filled in later, so the pointer for the
+ * location to be filled is passed back up.
*/
static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
int offer_wscale, int wscale, __u32 tstamp,
- __u32 ts_recent)
+ __u32 ts_recent, __u8 **md5_hash)
{
/* We always get an MSS option.
* The option bytes which will be seen in normal data
@@ -346,6 +359,20 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
(TCPOPT_WINDOW << 16) |
(TCPOLEN_WINDOW << 8) |
(wscale));
+#ifdef CONFIG_TCP_MD5SIG
+ /*
+ * If MD5 is enabled, then we set the option, and include the size
+ * (always 18). The actual MD5 hash is added just before the
+ * packet is sent.
+ */
+ if (md5_hash) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ *md5_hash = (__u8 *) ptr;
+ }
+#endif
}
/* This routine actually transmits TCP packets queued in by
@@ -366,6 +393,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
int tcp_header_size;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *md5;
+ __u8 *md5_hash_location;
+#endif
struct tcphdr *th;
int sysctl_flags;
int err;
@@ -424,9 +455,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
+#ifdef CONFIG_TCP_MD5SIG
+ /*
+ * Are we doing MD5 on this segment? If so - make
+ * room for it.
+ */
+ md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (md5)
+ tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
th = (struct tcphdr *) skb_push(skb, tcp_header_size);
skb->h.th = th;
- skb_set_owner_w(skb, sk);
/* Build TCP header and checksum it. */
th->source = inet->sport;
@@ -461,13 +501,34 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
(sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rx_opt.rcv_wscale,
tcb->when,
- tp->rx_opt.ts_recent);
+ tp->rx_opt.ts_recent,
+
+#ifdef CONFIG_TCP_MD5SIG
+ md5 ? &md5_hash_location :
+#endif
+ NULL);
} else {
tcp_build_and_update_options((__be32 *)(th + 1),
- tp, tcb->when);
+ tp, tcb->when,
+#ifdef CONFIG_TCP_MD5SIG
+ md5 ? &md5_hash_location :
+#endif
+ NULL);
TCP_ECN_send(sk, tp, skb, tcp_header_size);
}
+#ifdef CONFIG_TCP_MD5SIG
+ /* Calculate the MD5 hash, as we have all we need now */
+ if (md5) {
+ tp->af_specific->calc_md5_hash(md5_hash_location,
+ md5,
+ sk, NULL, NULL,
+ skb->h.th,
+ sk->sk_protocol,
+ skb->len);
+ }
+#endif
+
icsk->icsk_af_ops->send_check(sk, skb->len, skb);
if (likely(tcb->flags & TCPCB_FLAG_ACK))
@@ -479,19 +540,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_INC_STATS(TCP_MIB_OUTSEGS);
- err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+ err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0);
if (likely(err <= 0))
return err;
tcp_enter_cwr(sk);
- /* NET_XMIT_CN is special. It does not guarantee,
- * that this packet is lost. It tells that device
- * is about to start to drop packets or already
- * drops some packets of the same priority and
- * invokes us to send less aggressively.
- */
- return err == NET_XMIT_CN ? 0 : err;
+ return net_xmit_eval(err);
#undef SYSCTL_FLAG_TSTAMPS
#undef SYSCTL_FLAG_WSCALE
@@ -847,6 +902,11 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
(tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+#ifdef CONFIG_TCP_MD5SIG
+ if (tp->af_specific->md5_lookup(sk, sk))
+ mss_now -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
xmit_size_goal = mss_now;
if (doing_tso) {
@@ -2040,6 +2100,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct tcphdr *th;
int tcp_header_size;
struct sk_buff *skb;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *md5;
+ __u8 *md5_hash_location;
+#endif
skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
if (skb == NULL)
@@ -2055,6 +2119,13 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
(ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
/* SACK_PERM is in the place of NOP NOP of TS */
((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+
+#ifdef CONFIG_TCP_MD5SIG
+ /* Are we doing MD5 on this segment? If so - make room for it */
+ md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
+ if (md5)
+ tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
+#endif
skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
memset(th, 0, sizeof(struct tcphdr));
@@ -2092,11 +2163,29 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
TCP_SKB_CB(skb)->when,
- req->ts_recent);
+ req->ts_recent,
+ (
+#ifdef CONFIG_TCP_MD5SIG
+ md5 ? &md5_hash_location :
+#endif
+ NULL)
+ );
skb->csum = 0;
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS(TCP_MIB_OUTSEGS);
+
+#ifdef CONFIG_TCP_MD5SIG
+ /* Okay, we have all we need - do the md5 hash if needed */
+ if (md5) {
+ tp->af_specific->calc_md5_hash(md5_hash_location,
+ md5,
+ NULL, dst, req,
+ skb->h.th, sk->sk_protocol,
+ skb->len);
+ }
+#endif
+
return skb;
}
@@ -2115,6 +2204,11 @@ static void tcp_connect_init(struct sock *sk)
tp->tcp_header_len = sizeof(struct tcphdr) +
(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+#ifdef CONFIG_TCP_MD5SIG
+ if (tp->af_specific->md5_lookup(sk, sk) != NULL)
+ tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 4be336f1788..f230eeecf09 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -156,6 +156,8 @@ static __init int tcpprobe_init(void)
init_waitqueue_head(&tcpw.wait);
spin_lock_init(&tcpw.lock);
tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock);
+ if (IS_ERR(tcpw.fifo))
+ return PTR_ERR(tcpw.fifo);
if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
goto err0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index fb09ade5897..3355c276b61 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -297,7 +297,7 @@ static void tcp_retransmit_timer(struct sock *sk)
if (net_ratelimit()) {
struct inet_sock *inet = inet_sk(sk);
printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
- NIPQUAD(inet->daddr), htons(inet->dport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport),
inet->num, tp->snd_una, tp->snd_nxt);
}
#endif
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a3b7aa015a2..ddc4bcc5785 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -42,8 +42,8 @@
* with V_PARAM_SHIFT bits to the right of the binary point.
*/
#define V_PARAM_SHIFT 1
-static int alpha = 1<<V_PARAM_SHIFT;
-static int beta = 3<<V_PARAM_SHIFT;
+static int alpha = 2<<V_PARAM_SHIFT;
+static int beta = 4<<V_PARAM_SHIFT;
static int gamma = 1<<V_PARAM_SHIFT;
module_param(alpha, int, 0644);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 865d75214a9..035915fc9ed 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -92,22 +92,16 @@
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/inet.h>
-#include <linux/ipv6.h>
#include <linux/netdevice.h>
-#include <net/snmp.h>
-#include <net/ip.h>
#include <net/tcp_states.h>
-#include <net/protocol.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <net/sock.h>
-#include <net/udp.h>
#include <net/icmp.h>
#include <net/route.h>
-#include <net/inet_common.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include "udp_impl.h"
/*
* Snmp MIB for the UDP layer
@@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock);
static int udp_port_rover;
-static inline int udp_lport_inuse(u16 num)
+static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
{
struct sock *sk;
struct hlist_node *node;
- sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+ sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
if (inet_sk(sk)->num == num)
return 1;
return 0;
}
/**
- * udp_get_port - common port lookup for IPv4 and IPv6
+ * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
*
* @sk: socket struct in question
* @snum: port number to look up
+ * @udptable: hash list table, must be of UDP_HTABLE_SIZE
+ * @port_rover: pointer to record of last unallocated port
* @saddr_comp: AF-dependent comparison of bound local IP addresses
*/
-int udp_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
+int __udp_lib_get_port(struct sock *sk, unsigned short snum,
+ struct hlist_head udptable[], int *port_rover,
+ int (*saddr_comp)(const struct sock *sk1,
+ const struct sock *sk2 ) )
{
struct hlist_node *node;
struct hlist_head *head;
@@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum,
if (snum == 0) {
int best_size_so_far, best, result, i;
- if (udp_port_rover > sysctl_local_port_range[1] ||
- udp_port_rover < sysctl_local_port_range[0])
- udp_port_rover = sysctl_local_port_range[0];
+ if (*port_rover > sysctl_local_port_range[1] ||
+ *port_rover < sysctl_local_port_range[0])
+ *port_rover = sysctl_local_port_range[0];
best_size_so_far = 32767;
- best = result = udp_port_rover;
+ best = result = *port_rover;
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
int size;
- head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
if (hlist_empty(head)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
@@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum,
result = sysctl_local_port_range[0]
+ ((result - sysctl_local_port_range[0]) &
(UDP_HTABLE_SIZE - 1));
- if (!udp_lport_inuse(result))
+ if (! __udp_lib_lport_inuse(result, udptable))
break;
}
if (i >= (1 << 16) / UDP_HTABLE_SIZE)
goto fail;
gotit:
- udp_port_rover = snum = result;
+ *port_rover = snum = result;
} else {
- head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
sk_for_each(sk2, node, head)
if (inet_sk(sk2)->num == snum &&
@@ -195,12 +193,12 @@ gotit:
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (*saddr_cmp)(sk, sk2) )
+ (*saddr_comp)(sk, sk2) )
goto fail;
}
inet_sk(sk)->num = snum;
if (sk_unhashed(sk)) {
- head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
sk_add_node(sk, head);
sock_prot_inc_use(sk->sk_prot);
}
@@ -210,7 +208,13 @@ fail:
return error;
}
-static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
+ int (*scmp)(const struct sock *, const struct sock *))
+{
+ return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
+}
+
+inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
-
-static void udp_v4_hash(struct sock *sk)
-{
- BUG();
-}
-
-static void udp_v4_unhash(struct sock *sk)
-{
- write_lock_bh(&udp_hash_lock);
- if (sk_del_node_init(sk)) {
- inet_sk(sk)->num = 0;
- sock_prot_dec_use(sk->sk_prot);
- }
- write_unlock_bh(&udp_hash_lock);
-}
-
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif)
+static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
+ __be32 daddr, __be16 dport,
+ int dif, struct hlist_head udptable[])
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
unsigned short hnum = ntohs(dport);
int badness = -1;
- sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ read_lock(&udp_hash_lock);
+ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
if (inet->num == hnum && !ipv6_only_sock(sk)) {
@@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport,
}
}
}
- return result;
-}
-
-static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport, int dif)
-{
- struct sock *sk;
-
- read_lock(&udp_hash_lock);
- sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
- if (sk)
- sock_hold(sk);
+ if (result)
+ sock_hold(result);
read_unlock(&udp_hash_lock);
- return sk;
+ return result;
}
static inline struct sock *udp_v4_mcast_next(struct sock *sk,
@@ -340,7 +320,7 @@ found:
* to find the appropriate port.
*/
-void udp_err(struct sk_buff *skb, u32 info)
+void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
{
struct inet_sock *inet;
struct iphdr *iph = (struct iphdr*)skb->data;
@@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info)
int harderr;
int err;
- sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
+ sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
+ skb->dev->ifindex, udptable );
if (sk == NULL) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return; /* No socket for error */
@@ -405,6 +386,11 @@ out:
sock_put(sk);
}
+__inline__ void udp_err(struct sk_buff *skb, u32 info)
+{
+ return __udp4_lib_err(skb, info, udp_hash);
+}
+
/*
* Throw away all pending data and cancel the corking. Socket is locked.
*/
@@ -419,16 +405,58 @@ static void udp_flush_pending_frames(struct sock *sk)
}
}
+/**
+ * udp4_hwcsum_outgoing - handle outgoing HW checksumming
+ * @sk: socket we are sending on
+ * @skb: sk_buff containing the filled-in UDP header
+ * (checksum field must be zeroed out)
+ */
+static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
+ __be32 src, __be32 dst, int len )
+{
+ unsigned int offset;
+ struct udphdr *uh = skb->h.uh;
+ __wsum csum = 0;
+
+ if (skb_queue_len(&sk->sk_write_queue) == 1) {
+ /*
+ * Only one fragment on the socket.
+ */
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
+ } else {
+ /*
+ * HW-checksum won't work as there are two or more
+ * fragments on the socket so that all csums of sk_buffs
+ * should be together
+ */
+ offset = skb->h.raw - skb->data;
+ skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ csum = csum_add(csum, skb->csum);
+ }
+
+ uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ }
+}
+
/*
* Push out all pending data as one UDP datagram. Socket is locked.
*/
-static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
+static int udp_push_pending_frames(struct sock *sk)
{
+ struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi *fl = &inet->cork.fl;
struct sk_buff *skb;
struct udphdr *uh;
int err = 0;
+ __wsum csum = 0;
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
@@ -443,52 +471,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
uh->len = htons(up->len);
uh->check = 0;
- if (sk->sk_no_check == UDP_CSUM_NOXMIT) {
+ if (up->pcflag) /* UDP-Lite */
+ csum = udplite_csum_outgoing(sk, skb);
+
+ else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
+
skb->ip_summed = CHECKSUM_NONE;
goto send;
- }
- if (skb_queue_len(&sk->sk_write_queue) == 1) {
- /*
- * Only one fragment on the socket.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- skb->csum = offsetof(struct udphdr, check);
- uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
- up->len, IPPROTO_UDP, 0);
- } else {
- skb->csum = csum_partial((char *)uh,
- sizeof(struct udphdr), skb->csum);
- uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
- up->len, IPPROTO_UDP, skb->csum);
- if (uh->check == 0)
- uh->check = -1;
- }
- } else {
- unsigned int csum = 0;
- /*
- * HW-checksum won't work as there are two or more
- * fragments on the socket so that all csums of sk_buffs
- * should be together.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- int offset = (unsigned char *)uh - skb->data;
- skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
- skb->ip_summed = CHECKSUM_NONE;
- } else {
- skb->csum = csum_partial((char *)uh,
- sizeof(struct udphdr), skb->csum);
- }
+ udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
+ goto send;
+
+ } else /* `normal' UDP */
+ csum = udp_csum_outgoing(sk, skb);
+
+ /* add protocol-dependent pseudo-header */
+ uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
+ sk->sk_protocol, csum );
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
- skb_queue_walk(&sk->sk_write_queue, skb) {
- csum = csum_add(csum, skb->csum);
- }
- uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
- up->len, IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = -1;
- }
send:
err = ip_push_pending_frames(sk);
out:
@@ -497,12 +501,6 @@ out:
return err;
}
-
-static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base)
-{
- return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
-}
-
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
@@ -516,8 +514,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
__be32 daddr, faddr, saddr;
__be16 dport;
u8 tos;
- int err;
+ int err, is_udplite = up->pcflag;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
if (len > 0xFFFF)
return -EMSGSIZE;
@@ -622,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
{ .daddr = faddr,
.saddr = saddr,
.tos = tos } },
- .proto = IPPROTO_UDP,
+ .proto = sk->sk_protocol,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
@@ -668,13 +667,14 @@ back_from_confirm:
do_append_data:
up->len += ulen;
- err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
- sizeof(struct udphdr), &ipc, rt,
+ getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
+ err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
+ sizeof(struct udphdr), &ipc, rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_flush_pending_frames(sk);
else if (!corkreq)
- err = udp_push_pending_frames(sk, up);
+ err = udp_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
release_sock(sk);
@@ -684,7 +684,7 @@ out:
if (free)
kfree(ipc.opt);
if (!err) {
- UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
+ UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
return len;
}
/*
@@ -695,7 +695,7 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+ UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
@@ -707,8 +707,8 @@ do_confirm:
goto out;
}
-static int udp_sendpage(struct sock *sk, struct page *page, int offset,
- size_t size, int flags)
+int udp_sendpage(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
{
struct udp_sock *up = udp_sk(sk);
int ret;
@@ -747,7 +747,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset,
up->len += size;
if (!(up->corkflag || (flags&MSG_MORE)))
- ret = udp_push_pending_frames(sk, up);
+ ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
out:
@@ -795,29 +795,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return(0);
}
-static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
-{
- return __skb_checksum_complete(skb);
-}
-
-static __inline__ int udp_checksum_complete(struct sk_buff *skb)
-{
- return skb->ip_summed != CHECKSUM_UNNECESSARY &&
- __udp_checksum_complete(skb);
-}
-
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
-static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
- int copied, err;
+ int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
/*
* Check any passed addresses
@@ -839,15 +828,25 @@ try_again:
msg->msg_flags |= MSG_TRUNC;
}
- if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
- copied);
- } else if (msg->msg_flags&MSG_TRUNC) {
- if (__udp_checksum_complete(skb))
+ /*
+ * Decide whether to checksum and/or copy data.
+ *
+ * UDP: checksum may have been computed in HW,
+ * (re-)compute it if message is truncated.
+ * UDP-Lite: always needs to checksum, no HW support.
+ */
+ copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
+
+ if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
+ if (__udp_lib_checksum_complete(skb))
goto csum_copy_err;
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
- copied);
- } else {
+ copy_only = 1;
+ }
+
+ if (copy_only)
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov, copied );
+ else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
@@ -880,7 +879,7 @@ out:
return err;
csum_copy_err:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
skb_kill_datagram(sk, skb, flags);
@@ -912,11 +911,6 @@ int udp_disconnect(struct sock *sk, int flags)
return 0;
}
-static void udp_close(struct sock *sk, long timeout)
-{
- sk_common_release(sk);
-}
-
/* return:
* 1 if the the UDP system should process it
* 0 if we should drop this packet
@@ -928,23 +922,32 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
return 1;
#else
struct udp_sock *up = udp_sk(sk);
- struct udphdr *uh = skb->h.uh;
+ struct udphdr *uh;
struct iphdr *iph;
int iphlen, len;
- __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr);
- __be32 *udpdata32 = (__be32 *)udpdata;
+ __u8 *udpdata;
+ __be32 *udpdata32;
__u16 encap_type = up->encap_type;
/* if we're overly short, let UDP handle it */
- if (udpdata > skb->tail)
+ len = skb->len - sizeof(struct udphdr);
+ if (len <= 0)
return 1;
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
- len = skb->tail - udpdata;
+ /* If this is a paged skb, make sure we pull up
+ * whatever data we need to look at. */
+ if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
+ return 1;
+
+ /* Now we can get the pointers */
+ uh = skb->h.uh;
+ udpdata = (__u8 *)uh + sizeof(struct udphdr);
+ udpdata32 = (__be32 *)udpdata;
switch (encap_type) {
default:
@@ -1013,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
-static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int rc;
@@ -1021,10 +1024,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
/*
* Charge it to the socket, dropping if the queue is full.
*/
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- return -1;
- }
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto drop;
nf_reset(skb);
if (up->encap_type) {
@@ -1048,31 +1049,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
if (ret < 0) {
/* process the ESP packet */
ret = xfrm4_rcv_encap(skb, up->encap_type);
- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
+ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
return -ret;
}
/* FALLTHROUGH -- it's a UDP Packet */
}
- if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
- if (__udp_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
- kfree_skb(skb);
- return -1;
+ /*
+ * UDP-Lite specific tests, ignored on UDP sockets
+ */
+ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+
+ /*
+ * MIB statistics other than incrementing the error count are
+ * disabled for the following two types of errors: these depend
+ * on the application settings, not on the functioning of the
+ * protocol stack as such.
+ *
+ * RFC 3828 here recommends (sec 3.3): "There should also be a
+ * way ... to ... at least let the receiving application block
+ * delivery of packets with coverage values less than a value
+ * provided by the application."
+ */
+ if (up->pcrlen == 0) { /* full coverage was set */
+ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
+ "%d while full coverage %d requested\n",
+ UDP_SKB_CB(skb)->cscov, skb->len);
+ goto drop;
}
+ /* The next case involves violating the min. coverage requested
+ * by the receiver. This is subtle: if receiver wants x and x is
+ * greater than the buffersize/MTU then receiver will complain
+ * that it wants x while sender emits packets of smaller size y.
+ * Therefore the above ...()->partial_cov statement is essential.
+ */
+ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ LIMIT_NETDEBUG(KERN_WARNING
+ "UDPLITE: coverage %d too small, need min %d\n",
+ UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ goto drop;
+ }
+ }
+
+ if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
+ if (__udp_lib_checksum_complete(skb))
+ goto drop;
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
- UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
- kfree_skb(skb);
- return -1;
+ UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+ goto drop;
}
- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
+
+ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
return 0;
+
+drop:
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
+ kfree_skb(skb);
+ return -1;
}
/*
@@ -1081,14 +1119,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
-static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
- __be32 saddr, __be32 daddr)
+static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
+ struct udphdr *uh,
+ __be32 saddr, __be32 daddr,
+ struct hlist_head udptable[])
{
struct sock *sk;
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
@@ -1122,65 +1162,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
-static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
- unsigned short ulen, __be32 saddr, __be32 daddr)
+static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
{
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
+ if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ skb->len, IPPROTO_UDP, skb->csum ))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+ skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
+ skb->nh.iph->daddr,
+ skb->len, IPPROTO_UDP, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
+
+ /* UDP = UDP-Lite with a non-partial checksum coverage */
+ UDP_SKB_CB(skb)->partial_cov = 0;
}
/*
* All we need to do is get the socket, and then do a checksum.
*/
-int udp_rcv(struct sk_buff *skb)
+int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
+ int is_udplite)
{
struct sock *sk;
- struct udphdr *uh;
+ struct udphdr *uh = skb->h.uh;
unsigned short ulen;
struct rtable *rt = (struct rtable*)skb->dst;
__be32 saddr = skb->nh.iph->saddr;
__be32 daddr = skb->nh.iph->daddr;
- int len = skb->len;
/*
- * Validate the packet and the UDP length.
+ * Validate the packet.
*/
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- goto no_header;
-
- uh = skb->h.uh;
+ goto drop; /* No space for header. */
ulen = ntohs(uh->len);
-
- if (ulen > len || ulen < sizeof(*uh))
+ if (ulen > skb->len)
goto short_packet;
- if (pskb_trim_rcsum(skb, ulen))
- goto short_packet;
+ if(! is_udplite ) { /* UDP validates ulen. */
+
+ if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
+ goto short_packet;
- udp_checksum_init(skb, uh, ulen, saddr, daddr);
+ udp4_csum_init(skb, uh);
+
+ } else { /* UDP-Lite validates cscov. */
+ if (udplite4_csum_init(skb, uh))
+ goto csum_error;
+ }
if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
+ return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
- sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
+ sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
+ skb->dev->ifindex, udptable );
if (sk != NULL) {
int ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);
/* a return value > 0 means to resubmit the input, but
- * it it wants the return to be -protocol, or 0
+ * it wants the return to be -protocol, or 0
*/
if (ret > 0)
return -ret;
@@ -1192,10 +1242,10 @@ int udp_rcv(struct sk_buff *skb)
nf_reset(skb);
/* No socket. Drop packet silently, if checksum is wrong */
- if (udp_checksum_complete(skb))
+ if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP_INC_STATS_BH(UDP_MIB_NOPORTS);
+ UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -1206,36 +1256,40 @@ int udp_rcv(struct sk_buff *skb)
return(0);
short_packet:
- LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+ is_udplite? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
ulen,
- len,
+ skb->len,
NIPQUAD(daddr),
ntohs(uh->dest));
-no_header:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
- kfree_skb(skb);
- return(0);
+ goto drop;
csum_error:
/*
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+ is_udplite? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
NIPQUAD(daddr),
ntohs(uh->dest),
ulen);
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return(0);
}
-static int udp_destroy_sock(struct sock *sk)
+__inline__ int udp_rcv(struct sk_buff *skb)
+{
+ return __udp4_lib_rcv(skb, udp_hash, 0);
+}
+
+int udp_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_flush_pending_frames(sk);
@@ -1246,8 +1300,9 @@ static int udp_destroy_sock(struct sock *sk)
/*
* Socket option code for UDP
*/
-static int do_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen,
+ int (*push_pending_frames)(struct sock *))
{
struct udp_sock *up = udp_sk(sk);
int val;
@@ -1266,7 +1321,7 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
} else {
up->corkflag = 0;
lock_sock(sk);
- udp_push_pending_frames(sk, up);
+ (*push_pending_frames)(sk);
release_sock(sk);
}
break;
@@ -1284,6 +1339,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
}
break;
+ /*
+ * UDP-Lite's partial checksum coverage (RFC 3828).
+ */
+ /* The sender sets actual checksum coverage length via this option.
+ * The case coverage > packet length is handled by send module. */
+ case UDPLITE_SEND_CSCOV:
+ if (!up->pcflag) /* Disable the option on UDP sockets */
+ return -ENOPROTOOPT;
+ if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
+ val = 8;
+ up->pcslen = val;
+ up->pcflag |= UDPLITE_SEND_CC;
+ break;
+
+ /* The receiver specifies a minimum checksum coverage value. To make
+ * sense, this should be set to at least 8 (as done below). If zero is
+ * used, this again means full checksum coverage. */
+ case UDPLITE_RECV_CSCOV:
+ if (!up->pcflag) /* Disable the option on UDP sockets */
+ return -ENOPROTOOPT;
+ if (val != 0 && val < 8) /* Avoid silly minimal values. */
+ val = 8;
+ up->pcrlen = val;
+ up->pcflag |= UDPLITE_RECV_CC;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -1292,26 +1373,28 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
return err;
}
-static int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+int udp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
{
- if (level != SOL_UDP)
- return ip_setsockopt(sk, level, optname, optval, optlen);
- return do_udp_setsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ udp_push_pending_frames);
+ return ip_setsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
-static int compat_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+int compat_udp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
{
- if (level != SOL_UDP)
- return compat_ip_setsockopt(sk, level, optname, optval, optlen);
- return do_udp_setsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ udp_push_pending_frames);
+ return compat_ip_setsockopt(sk, level, optname, optval, optlen);
}
#endif
-static int do_udp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
{
struct udp_sock *up = udp_sk(sk);
int val, len;
@@ -1333,6 +1416,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname,
val = up->encap_type;
break;
+ /* The following two cannot be changed on UDP sockets, the return is
+ * always 0 (which corresponds to the full checksum coverage of UDP). */
+ case UDPLITE_SEND_CSCOV:
+ val = up->pcslen;
+ break;
+
+ case UDPLITE_RECV_CSCOV:
+ val = up->pcrlen;
+ break;
+
default:
return -ENOPROTOOPT;
};
@@ -1344,21 +1437,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname,
return 0;
}
-static int udp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+int udp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
{
- if (level != SOL_UDP)
- return ip_getsockopt(sk, level, optname, optval, optlen);
- return do_udp_getsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+ return ip_getsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
-static int compat_udp_getsockopt(struct sock *sk, int level, int optname,
+int compat_udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
- if (level != SOL_UDP)
- return compat_ip_getsockopt(sk, level, optname, optval, optlen);
- return do_udp_getsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+ return compat_ip_getsockopt(sk, level, optname, optval, optlen);
}
#endif
/**
@@ -1378,7 +1471,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
-
+ int is_lite = IS_UDPLITE(sk);
+
/* Check for false positives due to checksum errors */
if ( (mask & POLLRDNORM) &&
!(file->f_flags & O_NONBLOCK) &&
@@ -1388,8 +1482,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL) {
- if (udp_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+ if (udp_lib_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
__skb_unlink(skb, rcvq);
kfree_skb(skb);
} else {
@@ -1411,7 +1505,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
- .close = udp_close,
+ .close = udp_lib_close,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
@@ -1422,8 +1516,8 @@ struct proto udp_prot = {
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
.backlog_rcv = udp_queue_rcv_skb,
- .hash = udp_v4_hash,
- .unhash = udp_v4_unhash,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.obj_size = sizeof(struct udp_sock),
#ifdef CONFIG_COMPAT
@@ -1442,7 +1536,7 @@ static struct sock *udp_get_first(struct seq_file *seq)
for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
struct hlist_node *node;
- sk_for_each(sk, node, &udp_hash[state->bucket]) {
+ sk_for_each(sk, node, state->hashtable + state->bucket) {
if (sk->sk_family == state->family)
goto found;
}
@@ -1463,7 +1557,7 @@ try_again:
} while (sk && sk->sk_family != state->family);
if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
- sk = sk_head(&udp_hash[state->bucket]);
+ sk = sk_head(state->hashtable + state->bucket);
goto try_again;
}
return sk;
@@ -1513,6 +1607,7 @@ static int udp_seq_open(struct inode *inode, struct file *file)
if (!s)
goto out;
s->family = afinfo->family;
+ s->hashtable = afinfo->hashtable;
s->seq_ops.start = udp_seq_start;
s->seq_ops.next = udp_seq_next;
s->seq_ops.show = afinfo->seq_show;
@@ -1579,7 +1674,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
atomic_read(&sp->sk_refcnt), sp);
}
-static int udp4_seq_show(struct seq_file *seq, void *v)
+int udp4_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-127s\n",
@@ -1602,6 +1697,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = {
.owner = THIS_MODULE,
.name = "udp",
.family = AF_INET,
+ .hashtable = udp_hash,
.seq_show = udp4_seq_show,
.seq_fops = &udp4_seq_fops,
};
@@ -1624,6 +1720,8 @@ EXPORT_SYMBOL(udp_ioctl);
EXPORT_SYMBOL(udp_get_port);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
+EXPORT_SYMBOL(udp_lib_getsockopt);
+EXPORT_SYMBOL(udp_lib_setsockopt);
EXPORT_SYMBOL(udp_poll);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
new file mode 100644
index 00000000000..f6f4277ba6d
--- /dev/null
+++ b/net/ipv4/udp_impl.h
@@ -0,0 +1,38 @@
+#ifndef _UDP4_IMPL_H
+#define _UDP4_IMPL_H
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <net/protocol.h>
+#include <net/inet_common.h>
+
+extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
+extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
+
+extern int __udp_lib_get_port(struct sock *sk, unsigned short snum,
+ struct hlist_head udptable[], int *port_rover,
+ int (*)(const struct sock*,const struct sock*));
+extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
+
+
+extern int udp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen);
+extern int udp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+
+#ifdef CONFIG_COMPAT
+extern int compat_udp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen);
+extern int compat_udp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+#endif
+extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len);
+extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags);
+extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
+extern int udp_destroy_sock(struct sock *sk);
+
+#ifdef CONFIG_PROC_FS
+extern int udp4_seq_show(struct seq_file *seq, void *v);
+#endif
+#endif /* _UDP4_IMPL_H */
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
new file mode 100644
index 00000000000..b28fe1edf98
--- /dev/null
+++ b/net/ipv4/udplite.c
@@ -0,0 +1,119 @@
+/*
+ * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828).
+ *
+ * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $
+ *
+ * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ *
+ * Changes:
+ * Fixes:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "udp_impl.h"
+DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
+
+struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
+static int udplite_port_rover;
+
+int udplite_get_port(struct sock *sk, unsigned short p,
+ int (*c)(const struct sock *, const struct sock *))
+{
+ return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c);
+}
+
+static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
+{
+ return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
+}
+
+static int udplite_rcv(struct sk_buff *skb)
+{
+ return __udp4_lib_rcv(skb, udplite_hash, 1);
+}
+
+static void udplite_err(struct sk_buff *skb, u32 info)
+{
+ return __udp4_lib_err(skb, info, udplite_hash);
+}
+
+static struct net_protocol udplite_protocol = {
+ .handler = udplite_rcv,
+ .err_handler = udplite_err,
+ .no_policy = 1,
+};
+
+struct proto udplite_prot = {
+ .name = "UDP-Lite",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .connect = ip4_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udplite_sk_init,
+ .destroy = udp_destroy_sock,
+ .setsockopt = udp_setsockopt,
+ .getsockopt = udp_getsockopt,
+ .sendmsg = udp_sendmsg,
+ .recvmsg = udp_recvmsg,
+ .sendpage = udp_sendpage,
+ .backlog_rcv = udp_queue_rcv_skb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .get_port = udplite_v4_get_port,
+ .obj_size = sizeof(struct udp_sock),
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_udp_setsockopt,
+ .compat_getsockopt = compat_udp_getsockopt,
+#endif
+};
+
+static struct inet_protosw udplite4_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDPLITE,
+ .prot = &udplite_prot,
+ .ops = &inet_dgram_ops,
+ .capability = -1,
+ .no_check = 0, /* must checksum (RFC 3828) */
+ .flags = INET_PROTOSW_PERMANENT,
+};
+
+#ifdef CONFIG_PROC_FS
+static struct file_operations udplite4_seq_fops;
+static struct udp_seq_afinfo udplite4_seq_afinfo = {
+ .owner = THIS_MODULE,
+ .name = "udplite",
+ .family = AF_INET,
+ .hashtable = udplite_hash,
+ .seq_show = udp4_seq_show,
+ .seq_fops = &udplite4_seq_fops,
+};
+#endif
+
+void __init udplite4_register(void)
+{
+ if (proto_register(&udplite_prot, 1))
+ goto out_register_err;
+
+ if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0)
+ goto out_unregister_proto;
+
+ inet_register_protosw(&udplite4_protosw);
+
+#ifdef CONFIG_PROC_FS
+ if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */
+ printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
+#endif
+ return;
+
+out_unregister_proto:
+ proto_unregister(&udplite_prot);
+out_register_err:
+ printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__);
+}
+
+EXPORT_SYMBOL(udplite_hash);
+EXPORT_SYMBOL(udplite_prot);
+EXPORT_SYMBOL(udplite_get_port);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1bed0cdf53e..fb9f69c616f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -72,8 +72,8 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
struct dst_entry *dst, *dst_prev;
struct rtable *rt0 = (struct rtable*)(*dst_p);
struct rtable *rt = rt0;
- u32 remote = fl->fl4_dst;
- u32 local = fl->fl4_src;
+ __be32 remote = fl->fl4_dst;
+ __be32 local = fl->fl4_src;
struct flowi fl_tunnel = {
.nl_u = {
.ip4_u = {
@@ -199,11 +199,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
switch (iph->protocol) {
case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
- u16 *ports = (u16 *)xprth;
+ __be16 *ports = (__be16 *)xprth;
fl->fl_ip_sport = ports[0];
fl->fl_ip_dport = ports[1];
@@ -273,6 +274,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt.idev))
in_dev_put(xdst->u.rt.idev);
+ if (likely(xdst->u.rt.peer))
+ inet_putpeer(xdst->u.rt.peer);
xfrm_dst_destroy(xdst);
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 6e48f52e197..deb4101a2a8 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -196,10 +196,3 @@ config IPV6_SUBTREES
If unsure, say N.
-config IPV6_ROUTE_FWMARK
- bool "IPv6: use netfilter MARK value as routing key"
- depends on IPV6_MULTIPLE_TABLES && NETFILTER
- ---help---
- If you say Y here, you will be able to specify different routes for
- packets with different mark values (see iptables(8), MARK target).
-
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index addcc011bc0..8bacda109b7 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -5,8 +5,8 @@
obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
- route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
- protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
+ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b312a5f7a75..9b0a9064315 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -232,7 +232,7 @@ static inline unsigned ipv6_addr_scope2type(unsigned scope)
int __ipv6_addr_type(const struct in6_addr *addr)
{
- u32 st;
+ __be32 st;
st = addr->s6_addr32[0];
@@ -1164,7 +1164,7 @@ record_it:
int ipv6_get_saddr(struct dst_entry *dst,
struct in6_addr *daddr, struct in6_addr *saddr)
{
- return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr);
+ return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
}
@@ -3098,10 +3098,9 @@ static inline int rt_scope(int ifa_scope)
static inline int inet6_ifaddr_msgsize(void)
{
- return nlmsg_total_size(sizeof(struct ifaddrmsg) +
- nla_total_size(16) +
- nla_total_size(sizeof(struct ifa_cacheinfo)) +
- 128);
+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(16) /* IFA_ADDRESS */
+ + nla_total_size(sizeof(struct ifa_cacheinfo));
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
@@ -3329,10 +3328,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, RTM_NEWADDR, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout_ifa;
- }
+ /* failure implies BUG in inet6_ifaddr_msgsize() */
+ BUG_ON(err < 0);
err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
errout_ifa:
@@ -3351,10 +3348,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
goto errout;
err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in inet6_ifaddr_msgsize() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
errout:
@@ -3365,6 +3360,8 @@ errout:
static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
__s32 *array, int bytes)
{
+ BUG_ON(bytes < (DEVCONF_MAX * 4));
+
memset(array, 0, bytes);
array[DEVCONF_FORWARDING] = cnf->forwarding;
array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
@@ -3397,80 +3394,76 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
}
-/* Maximum length of ifinfomsg attributes */
-#define INET6_IFINFO_RTA_SPACE \
- RTA_SPACE(IFNAMSIZ) /* IFNAME */ + \
- RTA_SPACE(MAX_ADDR_LEN) /* ADDRESS */ + \
- RTA_SPACE(sizeof(u32)) /* MTU */ + \
- RTA_SPACE(sizeof(int)) /* LINK */ + \
- RTA_SPACE(0) /* PROTINFO */ + \
- RTA_SPACE(sizeof(u32)) /* FLAGS */ + \
- RTA_SPACE(sizeof(struct ifla_cacheinfo)) /* CACHEINFO */ + \
- RTA_SPACE(sizeof(__s32[DEVCONF_MAX])) /* CONF */
+static inline size_t inet6_if_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size( /* IFLA_PROTINFO */
+ nla_total_size(4) /* IFLA_INET6_FLAGS */
+ + nla_total_size(sizeof(struct ifla_cacheinfo))
+ + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ );
+}
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 pid, u32 seq, int event, unsigned int flags)
{
- struct net_device *dev = idev->dev;
- __s32 *array = NULL;
- struct ifinfomsg *r;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
- struct rtattr *subattr;
- __u32 mtu = dev->mtu;
- struct ifla_cacheinfo ci;
-
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
- r = NLMSG_DATA(nlh);
- r->ifi_family = AF_INET6;
- r->__ifi_pad = 0;
- r->ifi_type = dev->type;
- r->ifi_index = dev->ifindex;
- r->ifi_flags = dev_get_flags(dev);
- r->ifi_change = 0;
-
- RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+ struct net_device *dev = idev->dev;
+ struct nlattr *conf;
+ struct ifinfomsg *hdr;
+ struct nlmsghdr *nlh;
+ void *protoinfo;
+ struct ifla_cacheinfo ci;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifi_family = AF_INET6;
+ hdr->__ifi_pad = 0;
+ hdr->ifi_type = dev->type;
+ hdr->ifi_index = dev->ifindex;
+ hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_change = 0;
+
+ NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
if (dev->addr_len)
- RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
- RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+ NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
if (dev->ifindex != dev->iflink)
- RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
-
- subattr = (struct rtattr*)skb->tail;
+ NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
- RTA_PUT(skb, IFLA_PROTINFO, 0, NULL);
+ protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
+ if (protoinfo == NULL)
+ goto nla_put_failure;
- /* return the device flags */
- RTA_PUT(skb, IFLA_INET6_FLAGS, sizeof(__u32), &idev->if_flags);
+ NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
- /* return interface cacheinfo */
ci.max_reasm_len = IPV6_MAXPLEN;
ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
+ TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
ci.reachable_time = idev->nd_parms->reachable_time;
ci.retrans_time = idev->nd_parms->retrans_time;
- RTA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
-
- /* return the device sysctl params */
- if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL)
- goto rtattr_failure;
- ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array));
- RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array);
+ NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+
+ conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+ if (conf == NULL)
+ goto nla_put_failure;
+ ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf));
/* XXX - Statistics/MC not implemented */
- subattr->rta_len = skb->tail - (u8*)subattr;
- nlh->nlmsg_len = skb->tail - b;
- kfree(array);
- return skb->len;
+ nla_nest_end(skb, protoinfo);
+ return nlmsg_end(skb, nlh);
-nlmsg_failure:
-rtattr_failure:
- kfree(array);
- skb_trim(skb, b - skb->data);
- return -1;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3501,18 +3494,15 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
{
struct sk_buff *skb;
- int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
int err = -ENOBUFS;
- skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in inet6_if_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
errout:
@@ -3520,22 +3510,26 @@ errout:
rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
}
-/* Maximum length of prefix_cacheinfo attributes */
-#define INET6_PREFIX_RTA_SPACE \
- RTA_SPACE(sizeof(((struct prefix_info *)NULL)->prefix)) /* ADDRESS */ + \
- RTA_SPACE(sizeof(struct prefix_cacheinfo)) /* CACHEINFO */
+static inline size_t inet6_prefix_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct prefixmsg))
+ + nla_total_size(sizeof(struct in6_addr))
+ + nla_total_size(sizeof(struct prefix_cacheinfo));
+}
static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
- struct prefix_info *pinfo, u32 pid, u32 seq,
- int event, unsigned int flags)
+ struct prefix_info *pinfo, u32 pid, u32 seq,
+ int event, unsigned int flags)
{
- struct prefixmsg *pmsg;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ struct prefixmsg *pmsg;
+ struct nlmsghdr *nlh;
struct prefix_cacheinfo ci;
- nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
- pmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+ if (nlh == NULL)
+ return -ENOBUFS;
+
+ pmsg = nlmsg_data(nlh);
pmsg->prefix_family = AF_INET6;
pmsg->prefix_pad1 = 0;
pmsg->prefix_pad2 = 0;
@@ -3543,44 +3537,37 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
pmsg->prefix_len = pinfo->prefix_len;
pmsg->prefix_type = pinfo->type;
pmsg->prefix_pad3 = 0;
-
pmsg->prefix_flags = 0;
if (pinfo->onlink)
pmsg->prefix_flags |= IF_PREFIX_ONLINK;
if (pinfo->autoconf)
pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
- RTA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
+ NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
ci.preferred_time = ntohl(pinfo->prefered);
ci.valid_time = ntohl(pinfo->valid);
- RTA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
+ NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
+ return nlmsg_end(skb, nlh);
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
+nla_put_failure:
+ return nlmsg_cancel(skb, nlh);
}
static void inet6_prefix_notify(int event, struct inet6_dev *idev,
struct prefix_info *pinfo)
{
struct sk_buff *skb;
- int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
int err = -ENOBUFS;
- skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in inet6_prefix_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
errout:
@@ -3669,8 +3656,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
int __user *name, int nlen,
void __user *oldval,
size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
int *valp = table->data;
int new;
@@ -3982,10 +3968,9 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
struct addrconf_sysctl_table *t;
char *dev_name = NULL;
- t = kmalloc(sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
if (t == NULL)
return;
- memcpy(t, &addrconf_sysctl, sizeof(*t));
for (i=0; t->addrconf_vars[i].data; i++) {
t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
t->addrconf_vars[i].de = NULL;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 858cae29581..e5cd83b2205 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,6 +49,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/udp.h>
+#include <net/udplite.h>
#include <net/tcp.h>
#include <net/ipip.h>
#include <net/protocol.h>
@@ -221,7 +222,7 @@ lookup_protocol:
* the user to assign a number at socket
* creation time automatically shares.
*/
- inet->sport = ntohs(inet->num);
+ inet->sport = htons(inet->num);
sk->sk_prot->hash(sk);
}
if (sk->sk_prot->init) {
@@ -341,7 +342,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
- inet->sport = ntohs(inet->num);
+ inet->sport = htons(inet->num);
inet->dport = 0;
inet->daddr = 0;
out:
@@ -678,7 +679,7 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
np->rxopt.bits.ohopopts)) ||
- ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
+ ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
np->rxopt.bits.osrcrt)) ||
@@ -719,10 +720,8 @@ snmp6_mib_free(void *ptr[2])
{
if (ptr == NULL)
return;
- if (ptr[0])
- free_percpu(ptr[0]);
- if (ptr[1])
- free_percpu(ptr[1]);
+ free_percpu(ptr[0]);
+ free_percpu(ptr[1]);
ptr[0] = ptr[1] = NULL;
}
@@ -737,8 +736,13 @@ static int __init init_ipv6_mibs(void)
if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
__alignof__(struct udp_mib)) < 0)
goto err_udp_mib;
+ if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udplite_mib;
return 0;
+err_udplite_mib:
+ snmp6_mib_free((void **)udp_stats_in6);
err_udp_mib:
snmp6_mib_free((void **)icmpv6_statistics);
err_icmp_mib:
@@ -753,6 +757,7 @@ static void cleanup_ipv6_mibs(void)
snmp6_mib_free((void **)ipv6_statistics);
snmp6_mib_free((void **)icmpv6_statistics);
snmp6_mib_free((void **)udp_stats_in6);
+ snmp6_mib_free((void **)udplite_stats_in6);
}
static int __init inet6_init(void)
@@ -780,10 +785,14 @@ static int __init inet6_init(void)
if (err)
goto out_unregister_tcp_proto;
- err = proto_register(&rawv6_prot, 1);
+ err = proto_register(&udplitev6_prot, 1);
if (err)
goto out_unregister_udp_proto;
+ err = proto_register(&rawv6_prot, 1);
+ if (err)
+ goto out_unregister_udplite_proto;
+
/* Register the socket-side information for inet6_create. */
for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
@@ -837,6 +846,8 @@ static int __init inet6_init(void)
goto proc_tcp6_fail;
if (udp6_proc_init())
goto proc_udp6_fail;
+ if (udplite6_proc_init())
+ goto proc_udplite6_fail;
if (ipv6_misc_proc_init())
goto proc_misc6_fail;
@@ -862,6 +873,7 @@ static int __init inet6_init(void)
/* Init v6 transport protocols. */
udpv6_init();
+ udplitev6_init();
tcpv6_init();
ipv6_packet_init();
@@ -879,6 +891,8 @@ proc_if6_fail:
proc_anycast6_fail:
ipv6_misc_proc_exit();
proc_misc6_fail:
+ udplite6_proc_exit();
+proc_udplite6_fail:
udp6_proc_exit();
proc_udp6_fail:
tcp6_proc_exit();
@@ -902,6 +916,8 @@ out_unregister_sock:
sock_unregister(PF_INET6);
out_unregister_raw_proto:
proto_unregister(&rawv6_prot);
+out_unregister_udplite_proto:
+ proto_unregister(&udplitev6_prot);
out_unregister_udp_proto:
proto_unregister(&udpv6_prot);
out_unregister_tcp_proto:
@@ -919,6 +935,7 @@ static void __exit inet6_exit(void)
ac6_proc_exit();
ipv6_misc_proc_exit();
udp6_proc_exit();
+ udplite6_proc_exit();
tcp6_proc_exit();
raw6_proc_exit();
#endif
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index b0d83e8e425..12c5a4dec09 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -354,10 +354,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+ tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC);
if (!tmp_hdr)
goto out;
- memcpy(tmp_hdr, skb->nh.raw, hdr_len);
if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
goto free_out;
skb->nh.ipv6h->priority = 0;
@@ -397,7 +396,7 @@ out:
}
static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7206747022f..5c94fea90e9 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -207,7 +207,7 @@ out:
}
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
- u16 port, u32 info, u8 *payload)
+ __be16 port, u32 info, u8 *payload)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
@@ -318,13 +318,13 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
ipv6_addr_copy(&sin->sin6_addr,
(struct in6_addr *)(skb->nh.raw + serr->addr_offset));
if (np->sndflow)
- sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+ sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = IP6CB(skb)->iif;
} else {
ipv6_addr_set(&sin->sin6_addr, 0, 0,
htonl(0xffff),
- *(u32*)(skb->nh.raw + serr->addr_offset));
+ *(__be32*)(skb->nh.raw + serr->addr_offset));
}
}
@@ -397,12 +397,12 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
}
if (np->rxopt.bits.rxtclass) {
- int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff;
+ int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff;
put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
- if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
- u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+ if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
+ __be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
}
@@ -560,12 +560,12 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
}
if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
- if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+ if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
err = -EINVAL;
goto exit_f;
}
}
- fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
+ fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
break;
case IPV6_2292HOPOPTS:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index e78680a9985..25dcf69cd80 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -256,7 +256,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 88c96b10684..0711f92d6a1 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -284,10 +284,12 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
#ifdef CONFIG_IPV6_MIP6
__u16 dstbuf;
#endif
+ struct dst_entry *dst;
if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
!pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -298,7 +300,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
dstbuf = opt->dst1;
#endif
+ dst = dst_clone(skb->dst);
if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+ dst_release(dst);
skb = *skbp;
skb->h.raw += ((skb->h.raw[1]+1)<<3);
opt = IP6CB(skb);
@@ -310,7 +314,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
return 1;
}
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ dst_release(dst);
return -1;
}
@@ -365,7 +370,8 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
!pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -374,7 +380,8 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
skb->pkt_type != PACKET_HOST) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -388,7 +395,8 @@ looped_back:
* processed by own
*/
if (!addr) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -410,7 +418,8 @@ looped_back:
switch (hdr->type) {
case IPV6_SRCRT_TYPE_0:
if (hdr->hdrlen & 0x01) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
return -1;
}
@@ -419,14 +428,16 @@ looped_back:
case IPV6_SRCRT_TYPE_2:
/* Silently discard invalid RTH type 2 */
if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
break;
#endif
default:
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
return -1;
}
@@ -439,7 +450,8 @@ looped_back:
n = hdr->hdrlen >> 1;
if (hdr->segments_left > n) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
return -1;
}
@@ -449,12 +461,14 @@ looped_back:
*/
if (skb_cloned(skb)) {
struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
- kfree_skb(skb);
/* the copy is a forwarded packet */
if (skb2 == NULL) {
- IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
return -1;
}
+ kfree_skb(skb);
*skbp = skb = skb2;
opt = IP6CB(skb2);
hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
@@ -475,12 +489,14 @@ looped_back:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&skb->nh.ipv6h->saddr,
IPPROTO_ROUTING) < 0) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
if (!ipv6_chk_home_addr(addr)) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -491,7 +507,8 @@ looped_back:
}
if (ipv6_addr_is_multicast(addr)) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -510,7 +527,8 @@ looped_back:
if (skb->dst->dev->flags&IFF_LOOPBACK) {
if (skb->nh.ipv6h->hop_limit <= 1) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0, skb->dev);
kfree_skb(skb);
@@ -632,24 +650,25 @@ static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
skb->nh.raw[optoff+1]);
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
goto drop;
}
- pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
+ pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2));
if (pkt_len <= IPV6_MAXPLEN) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return 0;
}
if (skb->nh.ipv6h->payload_len) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return 0;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 315bc1fbec3..21cbbbddaf4 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -77,7 +77,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
if (hp == NULL)
return -1;
if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short _frag_off, *fp;
+ __be16 _frag_off, *fp;
fp = skb_header_pointer(skb,
start+offsetof(struct frag_hdr,
frag_off),
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 1896ecb5289..0862809ffcf 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -25,10 +25,6 @@ struct fib6_rule
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
- u32 fwmark;
- u32 fwmask;
-#endif
u8 tclass;
};
@@ -67,7 +63,7 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
fib_rule_put(arg.rule);
if (arg.result)
- return (struct dst_entry *) arg.result;
+ return arg.result;
dst_hold(&ip6_null_entry.u.dst);
return &ip6_null_entry.u.dst;
@@ -130,22 +126,13 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
return 0;
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
- if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask)
- return 0;
-#endif
-
return 1;
}
static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
- [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
- [FRA_PRIORITY] = { .type = NLA_U32 },
+ FRA_GENERIC_POLICY,
[FRA_SRC] = { .len = sizeof(struct in6_addr) },
[FRA_DST] = { .len = sizeof(struct in6_addr) },
- [FRA_FWMARK] = { .type = NLA_U32 },
- [FRA_FWMASK] = { .type = NLA_U32 },
- [FRA_TABLE] = { .type = NLA_U32 },
};
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
@@ -155,8 +142,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
- if (frh->src_len > 128 || frh->dst_len > 128 ||
- (frh->tos & ~IPV6_FLOWINFO_MASK))
+ if (frh->src_len > 128 || frh->dst_len > 128)
goto errout;
if (rule->action == FR_ACT_TO_TBL) {
@@ -177,23 +163,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
sizeof(struct in6_addr));
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
- if (tb[FRA_FWMARK]) {
- rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]);
- if (rule6->fwmark) {
- /*
- * if the mark value is non-zero,
- * all bits are compared by default
- * unless a mask is explicitly specified.
- */
- rule6->fwmask = 0xFFFFFFFF;
- }
- }
-
- if (tb[FRA_FWMASK])
- rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]);
-#endif
-
rule6->src.plen = frh->src_len;
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
@@ -225,14 +194,6 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
return 0;
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
- if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK])))
- return 0;
-
- if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK])))
- return 0;
-#endif
-
return 1;
}
@@ -254,14 +215,6 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
&rule6->src.addr);
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
- if (rule6->fwmark)
- NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
-
- if (rule6->fwmask || rule6->fwmark)
- NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask);
-#endif
-
return 0;
nla_put_failure:
@@ -278,6 +231,12 @@ static u32 fib6_rule_default_pref(void)
return 0x3FFF;
}
+static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+{
+ return nla_total_size(16) /* dst */
+ + nla_total_size(16); /* src */
+}
+
static struct fib_rules_ops fib6_rules_ops = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
@@ -287,6 +246,7 @@ static struct fib_rules_ops fib6_rules_ops = {
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.default_pref = fib6_rule_default_pref,
+ .nlmsg_payload = fib6_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
.rules_list = &fib6_rules,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4ec876066b3..3dcc4b7f41b 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -177,7 +177,8 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
*/
dst = ip6_route_output(sk, fl);
if (dst->error) {
- IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(ip6_dst_idev(dst),
+ IPSTATS_MIB_OUTNOROUTES);
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
res = 1;
} else {
@@ -233,7 +234,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
len, fl->proto,
skb->csum);
} else {
- u32 tmp_csum = 0;
+ __wsum tmp_csum = 0;
skb_queue_walk(&sk->sk_write_queue, skb) {
tmp_csum = csum_add(tmp_csum, skb->csum);
@@ -241,13 +242,11 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
tmp_csum = csum_partial((char *)icmp6h,
sizeof(struct icmp6hdr), tmp_csum);
- tmp_csum = csum_ipv6_magic(&fl->fl6_src,
- &fl->fl6_dst,
- len, fl->proto, tmp_csum);
- icmp6h->icmp6_cksum = tmp_csum;
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
+ &fl->fl6_dst,
+ len, fl->proto,
+ tmp_csum);
}
- if (icmp6h->icmp6_cksum == 0)
- icmp6h->icmp6_cksum = -1;
ip6_push_pending_frames(sk);
out:
return err;
@@ -263,7 +262,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
{
struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
struct sk_buff *org_skb = msg->skb;
- __u32 csum = 0;
+ __wsum csum = 0;
csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
to, len, csum);
@@ -555,7 +554,7 @@ out:
icmpv6_xmit_unlock();
}
-static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
+static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
{
struct in6_addr *saddr, *daddr;
struct inet6_protocol *ipprot;
@@ -637,8 +636,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
break;
/* fall through */
case CHECKSUM_NONE:
- skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
- IPPROTO_ICMPV6, 0);
+ skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
+ IPPROTO_ICMPV6, 0));
if (__skb_checksum_complete(skb)) {
LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
NIP6(*saddr), NIP6(*daddr));
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 827f41d1478..c700302ad51 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -52,20 +52,20 @@ EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
/*
* request_sock (formerly open request) hash tables.
*/
-static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
+static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
const u32 rnd, const u16 synq_hsize)
{
- u32 a = raddr->s6_addr32[0];
- u32 b = raddr->s6_addr32[1];
- u32 c = raddr->s6_addr32[2];
+ u32 a = (__force u32)raddr->s6_addr32[0];
+ u32 b = (__force u32)raddr->s6_addr32[1];
+ u32 c = (__force u32)raddr->s6_addr32[2];
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += rnd;
__jhash_mix(a, b, c);
- a += raddr->s6_addr32[3];
- b += (u32)rport;
+ a += (__force u32)raddr->s6_addr32[3];
+ b += (__force u32)rport;
__jhash_mix(a, b, c);
return c & (synq_hsize - 1);
@@ -73,7 +73,7 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
struct request_sock *inet6_csk_search_req(const struct sock *sk,
struct request_sock ***prevp,
- const __u16 rport,
+ const __be16 rport,
const struct in6_addr *raddr,
const struct in6_addr *laddr,
const int iif)
@@ -139,9 +139,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
-int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+int inet6_csk_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi fl;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 8accd1fbeed..b7e5bae0e34 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(__inet6_hash);
*/
struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
- const u16 sport,
+ const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
const int dif)
@@ -146,8 +146,8 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
- const struct in6_addr *saddr, const u16 sport,
- const struct in6_addr *daddr, const u16 dport,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
const int dif)
{
struct sock *sk;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8fcae7a6510..96d8310ae9c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -50,7 +50,7 @@
struct rt6_statistics rt6_stats;
-static kmem_cache_t * fib6_node_kmem __read_mostly;
+static struct kmem_cache * fib6_node_kmem __read_mostly;
enum fib_walk_state_t
{
@@ -139,9 +139,9 @@ static __inline__ u32 fib6_new_sernum(void)
* test bit
*/
-static __inline__ int addr_bit_set(void *token, int fn_bit)
+static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
{
- __u32 *addr = token;
+ __be32 *addr = token;
return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
}
@@ -150,7 +150,7 @@ static __inline__ struct fib6_node * node_alloc(void)
{
struct fib6_node *fn;
- if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
+ if ((fn = kmem_cache_alloc(fib6_node_kmem, GFP_ATOMIC)) != NULL)
memset(fn, 0, sizeof(struct fib6_node));
return fn;
@@ -169,7 +169,6 @@ static __inline__ void rt6_release(struct rt6_info *rt)
static struct fib6_table fib6_main_tbl = {
.tb6_id = RT6_TABLE_MAIN,
- .tb6_lock = RW_LOCK_UNLOCKED,
.tb6_root = {
.leaf = &ip6_null_entry,
.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
@@ -187,6 +186,12 @@ static void fib6_link_table(struct fib6_table *tb)
{
unsigned int h;
+ /*
+ * Initialize table lock at a single place to give lockdep a key,
+ * tables aren't visible prior to being linked to the list.
+ */
+ rwlock_init(&tb->tb6_lock);
+
h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
/*
@@ -199,7 +204,6 @@ static void fib6_link_table(struct fib6_table *tb)
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
static struct fib6_table fib6_local_tbl = {
.tb6_id = RT6_TABLE_LOCAL,
- .tb6_lock = RW_LOCK_UNLOCKED,
.tb6_root = {
.leaf = &ip6_null_entry,
.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
@@ -213,7 +217,6 @@ static struct fib6_table *fib6_alloc_table(u32 id)
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table != NULL) {
table->tb6_id = id;
- table->tb6_lock = RW_LOCK_UNLOCKED;
table->tb6_root.leaf = &ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
}
@@ -431,7 +434,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
struct fib6_node *pn = NULL;
struct rt6key *key;
int bit;
- int dir = 0;
+ __be32 dir = 0;
__u32 sernum = fib6_new_sernum();
RT6_TRACE("fib6_add_1\n");
@@ -826,7 +829,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
struct lookup_args *args)
{
struct fib6_node *fn;
- int dir;
+ __be32 dir;
if (unlikely(args->offset == 0))
return NULL;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1d672b0547f..624fae251f4 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -61,7 +61,7 @@ static DEFINE_RWLOCK(ip6_fl_lock);
static DEFINE_RWLOCK(ip6_sk_fl_lock);
-static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label)
+static __inline__ struct ip6_flowlabel * __fl_lookup(__be32 label)
{
struct ip6_flowlabel *fl;
@@ -72,7 +72,7 @@ static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label)
return NULL;
}
-static struct ip6_flowlabel * fl_lookup(u32 label)
+static struct ip6_flowlabel * fl_lookup(__be32 label)
{
struct ip6_flowlabel *fl;
@@ -153,7 +153,7 @@ static void ip6_fl_gc(unsigned long dummy)
write_unlock(&ip6_fl_lock);
}
-static int fl_intern(struct ip6_flowlabel *fl, __u32 label)
+static int fl_intern(struct ip6_flowlabel *fl, __be32 label)
{
fl->label = label & IPV6_FLOWLABEL_MASK;
@@ -182,7 +182,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __u32 label)
/* Socket flowlabel lists */
-struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
+struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
{
struct ipv6_fl_socklist *sfl;
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -330,8 +330,10 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *
fl->share = freq->flr_share;
addr_type = ipv6_addr_type(&freq->flr_dst);
if ((addr_type&IPV6_ADDR_MAPPED)
- || addr_type == IPV6_ADDR_ANY)
+ || addr_type == IPV6_ADDR_ANY) {
+ err = -EINVAL;
goto done;
+ }
ipv6_addr_copy(&fl->dst, &freq->flr_dst);
atomic_set(&fl->users, 1);
switch (fl->share) {
@@ -587,6 +589,8 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
while (!fl) {
if (++state->bucket <= FL_HASH_MASK)
fl = fl_ht[state->bucket];
+ else
+ break;
}
return fl;
}
@@ -623,9 +627,13 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
read_unlock_bh(&ip6_fl_lock);
}
-static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
+static int ip6fl_seq_show(struct seq_file *seq, void *v)
{
- while(fl) {
+ if (v == SEQ_START_TOKEN)
+ seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
+ "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
+ else {
+ struct ip6_flowlabel *fl = v;
seq_printf(seq,
"%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n",
(unsigned)ntohl(fl->label),
@@ -636,17 +644,7 @@ static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
(long)(fl->expires - jiffies)/HZ,
NIP6(fl->dst),
fl->opt ? fl->opt->opt_nflen : 0);
- fl = fl->next;
}
-}
-
-static int ip6fl_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
- "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
- else
- ip6fl_fl_seq_show(seq, v);
return 0;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6b8e6d76a58..ad0b8abcdf4 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -60,14 +60,22 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
{
struct ipv6hdr *hdr;
u32 pkt_len;
+ struct inet6_dev *idev;
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto drop;
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ rcu_read_lock();
- IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+ idev = __in6_dev_get(skb->dev);
+
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
+ rcu_read_unlock();
goto out;
}
@@ -84,7 +92,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
* arrived via the sending interface (ethX), because of the
* nature of scoping architecture. --yoshfuji
*/
- IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex;
+ IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex;
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
@@ -104,7 +112,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
goto truncated;
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
hdr = skb->nh.ipv6h;
@@ -112,17 +120,21 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->nexthdr == NEXTHDR_HOP) {
if (ipv6_parse_hopopts(&skb) < 0) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
+ rcu_read_unlock();
return 0;
}
}
+ rcu_read_unlock();
+
return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
truncated:
- IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
err:
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
drop:
+ rcu_read_unlock();
kfree_skb(skb);
out:
return 0;
@@ -140,6 +152,7 @@ static inline int ip6_input_finish(struct sk_buff *skb)
unsigned int nhoff;
int nexthdr;
u8 hash;
+ struct inet6_dev *idev;
/*
* Parse extension headers
@@ -147,6 +160,7 @@ static inline int ip6_input_finish(struct sk_buff *skb)
rcu_read_lock();
resubmit:
+ idev = ip6_dst_idev(skb->dst);
if (!pskb_pull(skb, skb->h.raw - skb->data))
goto discard;
nhoff = IP6CB(skb)->nhoff;
@@ -185,24 +199,24 @@ resubmit:
if (ret > 0)
goto resubmit;
else if (ret == 0)
- IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
} else {
if (!raw_sk) {
if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff,
skb->dev);
}
} else
- IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
kfree_skb(skb);
}
rcu_read_unlock();
return 0;
discard:
- IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
kfree_skb(skb);
return 0;
@@ -219,7 +233,7 @@ int ip6_mc_input(struct sk_buff *skb)
struct ipv6hdr *hdr;
int deliver;
- IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
hdr = skb->nh.ipv6h;
deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 66716911962..7b7bd44fbf4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -72,23 +72,14 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f
static inline int ip6_output_finish(struct sk_buff *skb)
{
-
struct dst_entry *dst = skb->dst;
- struct hh_cache *hh = dst->hh;
-
- if (hh) {
- int hh_alen;
-
- read_lock_bh(&hh->hh_lock);
- hh_alen = HH_DATA_ALIGN(hh->hh_len);
- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
- read_unlock_bh(&hh->hh_lock);
- skb_push(skb, hh->hh_len);
- return hh->hh_output(skb);
- } else if (dst->neighbour)
+
+ if (dst->hh)
+ return neigh_hh_output(dst->hh, skb);
+ else if (dst->neighbour)
return dst->neighbour->output(skb);
- IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
@@ -118,6 +109,7 @@ static int ip6_output2(struct sk_buff *skb)
if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
+ struct inet6_dev *idev = ip6_dst_idev(skb->dst);
if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
@@ -133,13 +125,13 @@ static int ip6_output2(struct sk_buff *skb)
ip6_dev_loopback_xmit);
if (skb->nh.ipv6h->hop_limit == 0) {
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
}
- IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
}
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
@@ -182,12 +174,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
if (skb_headroom(skb) < head_room) {
struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- kfree_skb(skb);
- skb = skb2;
- if (skb == NULL) {
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ if (skb2 == NULL) {
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
return -ENOBUFS;
}
+ kfree_skb(skb);
+ skb = skb2;
if (sk)
skb_set_owner_w(skb, sk);
}
@@ -217,7 +211,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
if (tclass < 0)
tclass = 0;
- *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
+ *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -230,7 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_OUTREQUESTS);
return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
dst_output);
}
@@ -239,7 +234,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
- IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -267,7 +262,7 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
skb->nh.ipv6h = hdr;
- *(u32*)hdr = htonl(0x60000000);
+ *(__be32*)hdr = htonl(0x60000000);
hdr->payload_len = htons(len);
hdr->nexthdr = proto;
@@ -373,7 +368,7 @@ int ip6_forward(struct sk_buff *skb)
goto error;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
- IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -406,7 +401,7 @@ int ip6_forward(struct sk_buff *skb)
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0, skb->dev);
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
@@ -419,13 +414,13 @@ int ip6_forward(struct sk_buff *skb)
if (proxied > 0)
return ip6_input(skb);
else if (proxied < 0) {
- IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
- IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
goto drop;
}
dst = skb->dst;
@@ -464,14 +459,14 @@ int ip6_forward(struct sk_buff *skb)
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
- IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
- IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
if (skb_cow(skb, dst->dev->hard_header_len)) {
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
goto drop;
}
@@ -481,11 +476,11 @@ int ip6_forward(struct sk_buff *skb)
hdr->hop_limit--;
- IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
error:
- IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -499,12 +494,12 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
dst_release(to->dst);
to->dst = dst_clone(from->dst);
to->dev = from->dev;
+ to->mark = from->mark;
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
#ifdef CONFIG_NETFILTER
- to->nfmark = from->nfmark;
/* Connection association is same as pre-frag packet */
nf_conntrack_put(to->nfct);
to->nfct = from->nfct;
@@ -571,7 +566,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
- u32 frag_id = 0;
+ __be32 frag_id = 0;
int ptr, offset = 0, err=0;
u8 *prevhdr, nexthdr = 0;
@@ -620,14 +615,13 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
skb_shinfo(skb)->frag_list = NULL;
/* BUILD HEADER */
- tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
+ *prevhdr = NEXTHDR_FRAGMENT;
+ tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC);
if (!tmp_hdr) {
- IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
return -ENOMEM;
}
- *prevhdr = NEXTHDR_FRAGMENT;
- memcpy(tmp_hdr, skb->nh.raw, hlen);
__skb_pull(skb, hlen);
fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
skb->nh.raw = __skb_push(skb, hlen);
@@ -643,7 +637,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
-
+
+ dst_hold(&rt->u.dst);
for (;;) {
/* Prepare header of the next frame,
@@ -667,7 +662,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
err = output(skb);
if(!err)
- IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+ IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
@@ -680,7 +675,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
kfree(tmp_hdr);
if (err == 0) {
- IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
+ dst_release(&rt->u.dst);
return 0;
}
@@ -690,7 +686,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
frag = skb;
}
- IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
+ dst_release(&rt->u.dst);
return err;
}
@@ -723,7 +720,8 @@ slow_path:
if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
- IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
goto fail;
}
@@ -784,15 +782,17 @@ slow_path:
if (err)
goto fail;
- IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
}
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_FRAGOKS);
kfree_skb(skb);
- IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
return err;
fail:
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
- IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
return err;
}
@@ -1265,7 +1265,7 @@ alloc_new_skb:
return 0;
error:
inet->cork.length -= length;
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -1311,7 +1311,7 @@ int ip6_push_pending_frames(struct sock *sk)
skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
- *(u32*)hdr = fl->fl6_flowlabel |
+ *(__be32*)hdr = fl->fl6_flowlabel |
htonl(0x60000000 | ((int)np->cork.tclass << 20));
if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
@@ -1326,7 +1326,7 @@ int ip6_push_pending_frames(struct sock *sk)
skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst);
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
if (err) {
if (err > 0)
@@ -1357,7 +1357,8 @@ void ip6_flush_pending_frames(struct sock *sk)
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 84d7ebdb9d2..8d918348f5b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -66,7 +66,7 @@ MODULE_LICENSE("GPL");
#define HASH_SIZE 32
-#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
+#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
(addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
(HASH_SIZE - 1))
@@ -215,11 +215,10 @@ ip6ip6_tnl_unlink(struct ip6_tnl *t)
* Create tunnel matching given parameters.
*
* Return:
- * 0 on success
+ * created tunnel or NULL
**/
-static int
-ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
+static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
{
struct net_device *dev;
struct ip6_tnl *t;
@@ -236,11 +235,11 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
break;
}
if (i == IP6_TNL_MAX)
- return -ENOBUFS;
+ goto failed;
}
dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
if (dev == NULL)
- return -ENOMEM;
+ goto failed;
t = netdev_priv(dev);
dev->init = ip6ip6_tnl_dev_init;
@@ -248,13 +247,13 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
if ((err = register_netdevice(dev)) < 0) {
free_netdev(dev);
- return err;
+ goto failed;
}
dev_hold(dev);
-
ip6ip6_tnl_link(t);
- *pt = t;
- return 0;
+ return t;
+failed:
+ return NULL;
}
/**
@@ -268,32 +267,23 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
* tunnel device is created and registered for use.
*
* Return:
- * 0 if tunnel located or created,
- * -EINVAL if parameters incorrect,
- * -ENODEV if no matching tunnel available
+ * matching tunnel or NULL
**/
-static int
-ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
+static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
struct ip6_tnl *t;
- if (p->proto != IPPROTO_IPV6)
- return -EINVAL;
-
for (t = *ip6ip6_bucket(p); t; t = t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
- *pt = t;
- return (create ? -EEXIST : 0);
- }
+ ipv6_addr_equal(remote, &t->parms.raddr))
+ return t;
}
if (!create)
- return -ENODEV;
-
- return ip6_tnl_create(p, pt);
+ return NULL;
+ return ip6_tnl_create(p);
}
/**
@@ -391,7 +381,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
struct ip6_tnl *t;
@@ -434,12 +424,9 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PARAMPROB:
- /* ignore if parameter problem not caused by a tunnel
- encapsulation limit sub-option */
- if (code != ICMPV6_HDR_FIELD) {
- break;
- }
- teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+ teli = 0;
+ if (code == ICMPV6_HDR_FIELD)
+ teli = parse_tlv_tnl_enc_lim(skb, skb->data);
if (teli && teli == ntohl(info) - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
@@ -451,6 +438,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
"tunnel!\n", t->parms.name);
rel_msg = 1;
}
+ } else if (net_ratelimit()) {
+ printk(KERN_WARNING
+ "%s: Recipient unable to parse tunneled "
+ "packet!\n ", t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
@@ -470,6 +461,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
struct rt6_info *rt;
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
if (!skb2)
goto out;
@@ -504,6 +496,27 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
IP6_ECN_set_ce(inner_iph);
}
+static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ret = 0;
+
+ if (p->flags & IP6_TNL_F_CAP_RCV) {
+ struct net_device *ldev = NULL;
+
+ if (p->link)
+ ldev = dev_get_by_index(p->link);
+
+ if ((ipv6_addr_is_multicast(&p->laddr) ||
+ likely(ipv6_chk_addr(&p->laddr, ldev, 0))) &&
+ likely(!ipv6_chk_addr(&p->raddr, NULL, 0)))
+ ret = 1;
+
+ if (ldev)
+ dev_put(ldev);
+ }
+ return ret;
+}
/**
* ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -528,7 +541,7 @@ ip6ip6_rcv(struct sk_buff *skb)
goto discard;
}
- if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
+ if (!ip6_tnl_rcv_ctl(t)) {
t->stat.rx_dropped++;
read_unlock(&ip6ip6_lock);
goto discard;
@@ -542,6 +555,7 @@ ip6ip6_rcv(struct sk_buff *skb)
skb->dev = t->dev;
dst_release(skb->dst);
skb->dst = NULL;
+ nf_reset(skb);
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
ip6ip6_ecn_decapsulate(ipv6h, skb);
@@ -559,31 +573,23 @@ discard:
return 0;
}
-static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
-{
- struct ipv6_tlv_tnl_enc_lim *tel;
- struct ipv6_txoptions *opt;
- __u8 *raw;
-
- int opt_len = sizeof(*opt) + 8;
-
- if (!(opt = kzalloc(opt_len, GFP_ATOMIC))) {
- return NULL;
- }
- opt->tot_len = opt_len;
- opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
- opt->opt_nflen = 8;
+struct ipv6_tel_txoption {
+ struct ipv6_txoptions ops;
+ __u8 dst_opt[8];
+};
- tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
- tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
- tel->length = 1;
- tel->encap_limit = encap_limit;
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+ memset(opt, 0, sizeof(struct ipv6_tel_txoption));
- raw = (__u8 *) opt->dst0opt;
- raw[5] = IPV6_TLV_PADN;
- raw[6] = 1;
+ opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+ opt->dst_opt[3] = 1;
+ opt->dst_opt[4] = encap_limit;
+ opt->dst_opt[5] = IPV6_TLV_PADN;
+ opt->dst_opt[6] = 1;
- return opt;
+ opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+ opt->ops.opt_nflen = 8;
}
/**
@@ -606,6 +612,34 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
+static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ret = 0;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ struct net_device *ldev = NULL;
+
+ if (p->link)
+ ldev = dev_get_by_index(p->link);
+
+ if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0)))
+ printk(KERN_WARNING
+ "%s xmit: Local address not yet configured!\n",
+ p->name);
+ else if (!ipv6_addr_is_multicast(&p->raddr) &&
+ unlikely(ipv6_chk_addr(&p->raddr, NULL, 0)))
+ printk(KERN_WARNING
+ "%s xmit: Routing loop! "
+ "Remote address found on this node!\n",
+ p->name);
+ else
+ ret = 1;
+ if (ldev)
+ dev_put(ldev);
+ }
+ return ret;
+}
/**
* ip6ip6_tnl_xmit - encapsulate packet and send
* @skb: the outgoing socket buffer
@@ -625,8 +659,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->stat;
struct ipv6hdr *ipv6h = skb->nh.ipv6h;
- struct ipv6_txoptions *opt = NULL;
int encap_limit = -1;
+ struct ipv6_tel_txoption opt;
__u16 offset;
struct flowi fl;
struct dst_entry *dst;
@@ -643,10 +677,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
}
if (skb->protocol != htons(ETH_P_IPV6) ||
- !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
- ip6ip6_tnl_addr_conflict(t, ipv6h)) {
+ !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
goto tx_err;
- }
+
if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
@@ -656,20 +689,17 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
}
encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- }
+
memcpy(&fl, &t->fl, sizeof (fl));
proto = fl.proto;
dsfield = ipv6_get_dsfield(ipv6h);
if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
- fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
+ fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
-
- if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
- goto tx_err;
+ fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
if ((dst = ip6_tnl_dst_check(t)) != NULL)
dst_hold(dst);
@@ -691,7 +721,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err_dst_release;
}
mtu = dst_mtu(dst) - sizeof (*ipv6h);
- if (opt) {
+ if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
}
@@ -729,12 +759,13 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
skb->h.raw = skb->nh.raw;
- if (opt)
- ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
-
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ }
skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
ipv6h = skb->nh.ipv6h;
- *(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+ *(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
dsfield = INET_ECN_encapsulate(0, dsfield);
ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
@@ -747,7 +778,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output);
- if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
+ if (net_xmit_eval(err) == 0) {
stats->tx_bytes += pkt_len;
stats->tx_packets++;
} else {
@@ -755,9 +786,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
stats->tx_aborted_errors++;
}
ip6_tnl_dst_store(t, dst);
-
- kfree(opt);
-
t->recursion--;
return 0;
tx_err_link_failure:
@@ -765,7 +793,6 @@ tx_err_link_failure:
dst_link_failure(skb);
tx_err_dst_release:
dst_release(dst);
- kfree(opt);
tx_err:
stats->tx_errors++;
stats->tx_dropped++;
@@ -777,39 +804,19 @@ tx_err:
static void ip6_tnl_set_cap(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
- struct in6_addr *laddr = &p->laddr;
- struct in6_addr *raddr = &p->raddr;
- int ltype = ipv6_addr_type(laddr);
- int rtype = ipv6_addr_type(raddr);
+ int ltype = ipv6_addr_type(&p->laddr);
+ int rtype = ipv6_addr_type(&p->raddr);
p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
- if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
- ((ltype|rtype) &
- (IPV6_ADDR_UNICAST|
- IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
- IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
- struct net_device *ldev = NULL;
- int l_ok = 1;
- int r_ok = 1;
-
- if (p->link)
- ldev = dev_get_by_index(p->link);
-
- if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0))
- l_ok = 0;
-
- if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0))
- r_ok = 0;
-
- if (l_ok && r_ok) {
- if (ltype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_XMIT;
- if (rtype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_RCV;
- }
- if (ldev)
- dev_put(ldev);
+ if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+ (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+ if (ltype&IPV6_ADDR_UNICAST)
+ p->flags |= IP6_TNL_F_CAP_XMIT;
+ if (rtype&IPV6_ADDR_UNICAST)
+ p->flags |= IP6_TNL_F_CAP_RCV;
}
}
@@ -843,8 +850,11 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
dev->iflink = p->link;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
- p->link, 0);
+ p->link, strict);
if (rt == NULL)
return;
@@ -919,26 +929,20 @@ static int
ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
- int create;
struct ip6_tnl_parm p;
struct ip6_tnl *t = NULL;
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6ip6_fb_tnl_dev) {
- if (copy_from_user(&p,
- ifr->ifr_ifru.ifru_data,
- sizeof (p))) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
err = -EFAULT;
break;
}
- if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
- t = netdev_priv(dev);
- else if (err)
- break;
- } else
+ t = ip6ip6_tnl_locate(&p, 0);
+ }
+ if (t == NULL)
t = netdev_priv(dev);
-
memcpy(&p, &t->parms, sizeof (p));
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
@@ -947,35 +951,36 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- create = (cmd == SIOCADDTUNNEL);
if (!capable(CAP_NET_ADMIN))
break;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
- err = -EFAULT;
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
- }
- if (!create && dev != ip6ip6_fb_tnl_dev) {
- t = netdev_priv(dev);
- }
- if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
+ err = -EINVAL;
+ if (p.proto != IPPROTO_IPV6)
break;
- }
- if (cmd == SIOCCHGTUNNEL) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
+ t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
+ if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else
+ t = netdev_priv(dev);
+
ip6ip6_tnl_unlink(t);
err = ip6ip6_tnl_change(t, &p);
ip6ip6_tnl_link(t);
netdev_state_change(dev);
}
- if (copy_to_user(ifr->ifr_ifru.ifru_data,
- &t->parms, sizeof (p))) {
- err = -EFAULT;
- } else {
+ if (t) {
err = 0;
- }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
+ err = -EFAULT;
+
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
break;
case SIOCDELTUNNEL:
err = -EPERM;
@@ -983,22 +988,18 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
if (dev == ip6ip6_fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
- sizeof (p))) {
- err = -EFAULT;
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
- }
- err = ip6ip6_tnl_locate(&p, &t, 0);
- if (err)
+ err = -ENOENT;
+ if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL)
break;
- if (t == netdev_priv(ip6ip6_fb_tnl_dev)) {
- err = -EPERM;
+ err = -EPERM;
+ if (t->dev == ip6ip6_fb_tnl_dev)
break;
- }
- } else {
- t = netdev_priv(dev);
+ dev = t->dev;
}
- err = unregister_netdevice(t->dev);
+ err = unregister_netdevice(dev);
break;
default:
err = -EINVAL;
@@ -1149,6 +1150,20 @@ fail:
return err;
}
+static void __exit ip6ip6_destroy_tunnels(void)
+{
+ int h;
+ struct ip6_tnl *t;
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ while ((t = tnls_r_l[h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+
+ t = tnls_wc[0];
+ unregister_netdevice(t->dev);
+}
+
/**
* ip6_tunnel_cleanup - free resources and unregister protocol
**/
@@ -1158,7 +1173,9 @@ static void __exit ip6_tunnel_cleanup(void)
if (xfrm6_tunnel_deregister(&ip6ip6_handler))
printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
- unregister_netdev(ip6ip6_fb_tnl_dev);
+ rtnl_lock();
+ ip6ip6_destroy_tunnels();
+ rtnl_unlock();
}
module_init(ip6_tunnel_init);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 71f59f18ede..511730b67e9 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -176,7 +176,7 @@ out_ok:
}
static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
__be32 spi;
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index de6b91981b3..352690e2ab8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -51,6 +51,7 @@
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/udp.h>
+#include <net/udplite.h>
#include <net/xfrm.h>
#include <asm/uaccess.h>
@@ -239,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
struct sk_buff *pktopt;
if (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_UDPLITE &&
sk->sk_protocol != IPPROTO_TCP)
break;
@@ -276,11 +278,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_family = PF_INET;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
+ struct proto *prot = &udp_prot;
+
+ if (sk->sk_protocol == IPPROTO_UDPLITE)
+ prot = &udplite_prot;
local_bh_disable();
sock_prot_dec_use(sk->sk_prot);
- sock_prot_inc_use(&udp_prot);
+ sock_prot_inc_use(prot);
local_bh_enable();
- sk->sk_prot = &udp_prot;
+ sk->sk_prot = prot;
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
@@ -813,6 +819,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_ADDRFORM:
if (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_UDPLITE &&
sk->sk_protocol != IPPROTO_TCP)
return -EINVAL;
if (sk->sk_state != TCP_ESTABLISHED)
@@ -971,12 +978,27 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_UNICAST_HOPS:
- val = np->hop_limit;
- break;
-
case IPV6_MULTICAST_HOPS:
- val = np->mcast_hops;
+ {
+ struct dst_entry *dst;
+
+ if (optname == IPV6_UNICAST_HOPS)
+ val = np->hop_limit;
+ else
+ val = np->mcast_hops;
+
+ dst = sk_dst_get(sk);
+ if (dst) {
+ if (val < 0)
+ val = dst_metric(dst, RTAX_HOPLIMIT);
+ if (val < 0)
+ val = ipv6_get_hoplimit(dst->dev);
+ dst_release(dst);
+ }
+ if (val < 0)
+ val = ipv6_devconf.hop_limit;
break;
+ }
case IPV6_MULTICAST_LOOP:
val = np->mc_loop;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3b114e3fa2f..a1c231a04ac 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -83,7 +83,7 @@
struct mld2_grec {
__u8 grec_type;
__u8 grec_auxwords;
- __u16 grec_nsrcs;
+ __be16 grec_nsrcs;
struct in6_addr grec_mca;
struct in6_addr grec_src[0];
};
@@ -91,18 +91,18 @@ struct mld2_grec {
struct mld2_report {
__u8 type;
__u8 resv1;
- __u16 csum;
- __u16 resv2;
- __u16 ngrec;
+ __sum16 csum;
+ __be16 resv2;
+ __be16 ngrec;
struct mld2_grec grec[0];
};
struct mld2_query {
__u8 type;
__u8 code;
- __u16 csum;
- __u16 mrc;
- __u16 resv1;
+ __sum16 csum;
+ __be16 mrc;
+ __be16 resv1;
struct in6_addr mca;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 qrv:3,
@@ -116,7 +116,7 @@ struct mld2_query {
#error "Please fix <asm/byteorder.h>"
#endif
__u8 qqic;
- __u16 nsrcs;
+ __be16 nsrcs;
struct in6_addr srcs[0];
};
@@ -1465,7 +1465,7 @@ static void mld_sendpack(struct sk_buff *skb)
struct inet6_dev *idev = in6_dev_get(skb->dev);
int err;
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
sizeof(struct ipv6hdr);
mldlen = skb->tail - skb->h.raw;
@@ -1477,9 +1477,9 @@ static void mld_sendpack(struct sk_buff *skb)
mld_dev_queue_xmit);
if (!err) {
ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
- IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
} else
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
if (likely(idev != NULL))
in6_dev_put(idev);
@@ -1763,7 +1763,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ rcu_read_lock();
+ IP6_INC_STATS(__in6_dev_get(dev),
+ IPSTATS_MIB_OUTREQUESTS);
+ rcu_read_unlock();
snd_addr = addr;
if (type == ICMPV6_MGM_REDUCTION) {
snd_addr = &all_routers;
@@ -1777,7 +1780,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
if (skb == NULL) {
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_lock();
+ IP6_INC_STATS(__in6_dev_get(dev),
+ IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
return;
}
@@ -1816,9 +1822,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
else
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES);
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
- IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
} else
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
if (likely(idev != NULL))
in6_dev_put(idev);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 7ccdc8fc5a3..be7dd7db65d 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -262,10 +262,10 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
sel.proto = fl->proto;
sel.dport = xfrm_flowi_dport(fl);
if (sel.dport)
- sel.dport_mask = ~((__u16)0);
+ sel.dport_mask = htons(~0);
sel.sport = xfrm_flowi_sport(fl);
if (sel.sport)
- sel.sport_mask = ~((__u16)0);
+ sel.sport_mask = htons(~0);
sel.ifindex = fl->oif;
err = km_report(IPPROTO_DSTOPTS, &sel,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 41a8a5f0660..6a9f616de37 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -472,7 +472,9 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
inc_opt = 0;
}
- skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ skb = sock_alloc_send_skb(sk,
+ (MAX_HEADER + sizeof(struct ipv6hdr) +
+ len + LL_RESERVED_SPACE(dev)),
1, &err);
if (skb == NULL) {
@@ -513,7 +515,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
skb->dst = dst;
idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
if (!err) {
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
@@ -561,7 +563,9 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
if (send_llinfo)
len += ndisc_opt_addr_space(dev);
- skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ skb = sock_alloc_send_skb(sk,
+ (MAX_HEADER + sizeof(struct ipv6hdr) +
+ len + LL_RESERVED_SPACE(dev)),
1, &err);
if (skb == NULL) {
ND_PRINTK0(KERN_ERR
@@ -597,7 +601,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
/* send it! */
skb->dst = dst;
idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
if (!err) {
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
@@ -636,7 +640,9 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
if (dev->addr_len)
len += ndisc_opt_addr_space(dev);
- skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ skb = sock_alloc_send_skb(sk,
+ (MAX_HEADER + sizeof(struct ipv6hdr) +
+ len + LL_RESERVED_SPACE(dev)),
1, &err);
if (skb == NULL) {
ND_PRINTK0(KERN_ERR
@@ -670,7 +676,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
/* send it! */
skb->dst = dst;
idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
if (!err) {
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
@@ -1261,10 +1267,11 @@ skip_defrtr:
}
if (ndopts.nd_opts_mtu) {
+ __be32 n;
u32 mtu;
- memcpy(&mtu, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
- mtu = ntohl(mtu);
+ memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
+ mtu = ntohl(n);
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK2(KERN_WARNING
@@ -1446,7 +1453,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
rd_len &= ~0x7;
len += rd_len;
- buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ buff = sock_alloc_send_skb(sk,
+ (MAX_HEADER + sizeof(struct ipv6hdr) +
+ len + LL_RESERVED_SPACE(dev)),
1, &err);
if (buff == NULL) {
ND_PRINTK0(KERN_ERR
@@ -1504,7 +1513,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
buff->dst = dst;
idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output);
if (!err) {
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS);
@@ -1658,8 +1667,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
int nlen, void __user *oldval,
size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- void **context)
+ void __user *newval, size_t newlen)
{
struct net_device *dev = ctl->extra1;
struct inet6_dev *idev;
@@ -1672,14 +1680,12 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
switch (ctl->ctl_name) {
case NET_NEIGH_REACHABLE_TIME:
ret = sysctl_jiffies(ctl, name, nlen,
- oldval, oldlenp, newval, newlen,
- context);
+ oldval, oldlenp, newval, newlen);
break;
case NET_NEIGH_RETRANS_TIME_MS:
case NET_NEIGH_REACHABLE_TIME_MS:
ret = sysctl_ms_jiffies(ctl, name, nlen,
- oldval, oldlenp, newval, newlen,
- context);
+ oldval, oldlenp, newval, newlen);
break;
default:
ret = 0;
@@ -1742,6 +1748,7 @@ int __init ndisc_init(struct net_proto_family *ops)
void ndisc_cleanup(void)
{
+ unregister_netdevice_notifier(&ndisc_netdev_notifier);
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 580b1aba672..f6294e5bcb3 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -31,7 +31,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
#endif
if (dst->error) {
- IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
return -EINVAL;
@@ -80,11 +80,11 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
return 0;
}
-unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
struct ipv6hdr *ip6h = skb->nh.ipv6h;
- unsigned int csum = 0;
+ __sum16 csum = 0;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -100,12 +100,13 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
}
/* fall through */
case CHECKSUM_NONE:
- skb->csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ skb->csum = ~csum_unfold(
+ csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
skb->len - dataoff,
protocol,
csum_sub(0,
skb_checksum(skb, 0,
- dataoff, 0)));
+ dataoff, 0))));
csum = __skb_checksum_complete(skb);
}
return csum;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4bc4e5b3379..fc3e5eb4bc3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -6,7 +6,7 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
config NF_CONNTRACK_IPV6
- tristate "IPv6 support for new connection tracking (EXPERIMENTAL)"
+ tristate "IPv6 connection tracking support (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_CONNTRACK
---help---
Connection tracking keeps a record of what packets have passed
@@ -40,7 +40,7 @@ config IP6_NF_QUEUE
To compile it as a module, choose M here. If unsure, say N.
config IP6_NF_IPTABLES
- tristate "IP6 tables support (required for filtering/masq/NAT)"
+ tristate "IP6 tables support (required for filtering)"
depends on NETFILTER_XTABLES
help
ip6tables is a general, extensible packet identification framework.
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 9510c24ca8d..d4d9f182441 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -241,7 +241,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
pmsg->data_len = data_len;
pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
- pmsg->mark = entry->skb->nfmark;
+ pmsg->mark = entry->skb->mark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
@@ -349,9 +349,10 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
if (v->data_len < sizeof(*user_iph))
return 0;
diff = v->data_len - e->skb->len;
- if (diff < 0)
- skb_trim(e->skb, v->data_len);
- else if (diff > 0) {
+ if (diff < 0) {
+ if (pskb_trim(e->skb, v->data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
@@ -619,6 +620,7 @@ static ctl_table ipq_root_table[] = {
{ .ctl_name = 0 }
};
+#ifdef CONFIG_PROC_FS
static int
ipq_get_info(char *buffer, char **start, off_t offset, int length)
{
@@ -652,6 +654,7 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
len = 0;
return len;
}
+#endif /* CONFIG_PROC_FS */
static struct nf_queue_handler nfqh = {
.name = "ip6_queue",
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4ab368fa0b8..99502c5da4c 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -111,7 +111,7 @@ ip6_packet_match(const struct sk_buff *skb,
const char *outdev,
const struct ip6t_ip6 *ip6info,
unsigned int *protoff,
- int *fragoff)
+ int *fragoff, int *hotdrop)
{
size_t i;
unsigned long ret;
@@ -169,9 +169,11 @@ ip6_packet_match(const struct sk_buff *skb,
unsigned short _frag_off;
protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
- if (protohdr < 0)
+ if (protohdr < 0) {
+ if (_frag_off == 0)
+ *hotdrop = 1;
return 0;
-
+ }
*fragoff = _frag_off;
dprintf("Packet protocol %hi ?= %s%hi.\n",
@@ -290,7 +292,7 @@ ip6t_do_table(struct sk_buff **pskb,
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
- &protoff, &offset)) {
+ &protoff, &offset, &hotdrop)) {
struct ip6t_entry_target *t;
if (IP6T_MATCH_ITERATE(e, do_match,
@@ -411,6 +413,7 @@ mark_source_chains(struct xt_table_info *newinfo,
unsigned int pos = newinfo->hook_entry[hook];
struct ip6t_entry *e
= (struct ip6t_entry *)(entry0 + pos);
+ int visited = e->comefrom & (1 << hook);
if (!(valid_hooks & (1 << hook)))
continue;
@@ -431,13 +434,20 @@ mark_source_chains(struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
/* Unconditional return/END. */
- if (e->target_offset == sizeof(struct ip6t_entry)
+ if ((e->target_offset == sizeof(struct ip6t_entry)
&& (strcmp(t->target.u.user.name,
IP6T_STANDARD_TARGET) == 0)
&& t->verdict < 0
- && unconditional(&e->ipv6)) {
+ && unconditional(&e->ipv6)) || visited) {
unsigned int oldpos, size;
+ if (t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
/* Return: backtrack through the last
big jump. */
do {
@@ -475,6 +485,13 @@ mark_source_chains(struct xt_table_info *newinfo,
if (strcmp(t->target.u.user.name,
IP6T_STANDARD_TARGET) == 0
&& newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct ip6t_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -507,27 +524,6 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
}
static inline int
-standard_check(const struct ip6t_entry_target *t,
- unsigned int max_offset)
-{
- struct ip6t_standard_target *targ = (void *)t;
-
- /* Check standard info. */
- if (targ->verdict >= 0
- && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
- duprintf("ip6t_standard_check: bad verdict (%i)\n",
- targ->verdict);
- return 0;
- }
- if (targ->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
- targ->verdict);
- return 0;
- }
- return 1;
-}
-
-static inline int
check_match(struct ip6t_entry_match *m,
const char *name,
const struct ip6t_ip6 *ipv6,
@@ -584,12 +580,19 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
return -EINVAL;
}
+ if (e->target_offset + sizeof(struct ip6t_entry_target) >
+ e->next_offset)
+ return -EINVAL;
+
j = 0;
ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
if (ret != 0)
goto cleanup_matches;
t = ip6t_get_target(e);
+ ret = -EINVAL;
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ goto cleanup_matches;
target = try_then_request_module(xt_find_target(AF_INET6,
t->u.user.name,
t->u.user.revision),
@@ -607,12 +610,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
if (ret)
goto err;
- if (t->u.kernel.target == &ip6t_standard_target) {
- if (!standard_check(t, size)) {
- ret = -EINVAL;
- goto err;
- }
- } else if (t->u.kernel.target->checkentry
+ if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(name, e, target, t->data,
e->comefrom)) {
duprintf("ip_tables: check failed for `%s'.\n",
@@ -759,7 +757,7 @@ translate_table(const char *name,
if (ret != 0) {
IP6T_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ cleanup_entry, &i);
return ret;
}
@@ -769,7 +767,7 @@ translate_table(const char *name,
memcpy(newinfo->entries[i], entry0, newinfo->size);
}
- return ret;
+ return 0;
}
/* Gets counters. */
@@ -1438,6 +1436,9 @@ static void __exit ip6_tables_fini(void)
* If target header is found, its offset is set in *offset and return protocol
* number. Otherwise, return -1.
*
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
* Note that non-1st fragment is special case that "the protocol number
* of last header" is "next header" field in Fragment header. In this case,
* *offset is meaningless and fragment offset is stored in *fragoff if fragoff
@@ -1461,32 +1462,33 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
if (target < 0)
break;
- return -1;
+ return -ENOENT;
}
hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return -1;
+ return -EBADMSG;
if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short _frag_off, *fp;
+ unsigned short _frag_off;
+ __be16 *fp;
fp = skb_header_pointer(skb,
start+offsetof(struct frag_hdr,
frag_off),
sizeof(_frag_off),
&_frag_off);
if (fp == NULL)
- return -1;
+ return -EBADMSG;
_frag_off = ntohs(*fp) & ~0x7;
if (_frag_off) {
if (target < 0 &&
((!ipv6_ext_hdr(hp->nexthdr)) ||
- nexthdr == NEXTHDR_NONE)) {
+ hp->nexthdr == NEXTHDR_NONE)) {
if (fragoff)
*fragoff = _frag_off;
return hp->nexthdr;
}
- return -1;
+ return -ENOENT;
}
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cf537d3018..33b1faa90d7 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -69,9 +69,9 @@ static void dump_packet(const struct nf_loginfo *info,
/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
- (ntohl(*(u_int32_t *)ih) & 0x0ff00000) >> 20,
+ (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
ih->hop_limit,
- (ntohl(*(u_int32_t *)ih) & 0x000fffff));
+ (ntohl(*(__be32 *)ih) & 0x000fffff));
fragment = 0;
ptr = ip6hoff + sizeof(struct ipv6hdr);
@@ -270,11 +270,15 @@ static void dump_packet(const struct nf_loginfo *info,
}
break;
}
- case IPPROTO_UDP: {
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE: {
struct udphdr _udph, *uh;
- /* Max length: 10 "PROTO=UDP " */
- printk("PROTO=UDP ");
+ if (currenthdr == IPPROTO_UDP)
+ /* Max length: 10 "PROTO=UDP " */
+ printk("PROTO=UDP " );
+ else /* Max length: 14 "PROTO=UDPLITE " */
+ printk("PROTO=UDPLITE ");
if (fragment)
break;
@@ -436,13 +440,8 @@ ip6t_log_target(struct sk_buff **pskb,
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- if (loginfo->logflags & IP6T_LOG_NFLOG)
- nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
- "%s", loginfo->prefix);
- else
- ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
- loginfo->prefix);
-
+ ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
+ loginfo->prefix);
return IP6T_CONTINUE;
}
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index ec1b1608156..46486645eb7 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -54,9 +54,14 @@ match(const struct sk_buff *skb,
const struct ip6t_ah *ahinfo = matchinfo;
unsigned int ptr;
unsigned int hdrlen = 0;
+ int err;
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ *hotdrop = 1;
return 0;
+ }
ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
if (ah == NULL) {
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 78d9c8b9e28..cd22eaaccdc 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -52,9 +52,14 @@ match(const struct sk_buff *skb,
struct frag_hdr _frag, *fh;
const struct ip6t_frag *fraginfo = matchinfo;
unsigned int ptr;
+ int err;
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ *hotdrop = 1;
return 0;
+ }
fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
if (fh == NULL) {
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d32a205e3af..3f25babe044 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -65,9 +65,14 @@ match(const struct sk_buff *skb,
u8 _opttype, *tp = NULL;
u8 _optlen, *lp = NULL;
unsigned int optlen;
+ int err;
- if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0)
+ err = ipv6_find_hdr(skb, &ptr, match->data, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ *hotdrop = 1;
return 0;
+ }
oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
if (oh == NULL) {
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index bcb2e168a5b..54d7d14134f 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -58,9 +58,14 @@ match(const struct sk_buff *skb,
unsigned int hdrlen = 0;
unsigned int ret = 0;
struct in6_addr *ap, _addr;
+ int err;
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ *hotdrop = 1;
return 0;
+ }
rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
if (rh == NULL) {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 386ea260e76..6250e86a6dd 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -149,11 +149,10 @@ ip6t_local_hook(unsigned int hook,
int (*okfn)(struct sk_buff *))
{
- unsigned long nfmark;
unsigned int ret;
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
- u_int32_t flowlabel;
+ u_int32_t flowlabel, mark;
#if 0
/* root is playing with raw sockets. */
@@ -165,10 +164,10 @@ ip6t_local_hook(unsigned int hook,
}
#endif
- /* save source/dest address, nfmark, hoplimit, flowlabel, priority, */
+ /* save source/dest address, mark, hoplimit, flowlabel, priority, */
memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
- nfmark = (*pskb)->nfmark;
+ mark = (*pskb)->mark;
hop_limit = (*pskb)->nh.ipv6h->hop_limit;
/* flowlabel and prio (includes version, which shouldn't change either */
@@ -179,7 +178,7 @@ ip6t_local_hook(unsigned int hook,
if (ret != NF_DROP && ret != NF_STOLEN
&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
|| memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
- || (*pskb)->nfmark != nfmark
+ || (*pskb)->mark != mark
|| (*pskb)->nh.ipv6h->hop_limit != hop_limit))
return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index e5e53fff9e3..a20615ffccf 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -33,7 +33,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -43,8 +43,6 @@
#define DEBUGP(format, args...)
#endif
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-
static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
@@ -211,11 +209,6 @@ out:
return nf_conntrack_confirm(pskb);
}
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
- struct net_device *in,
- struct net_device *out,
- int (*okfn)(struct sk_buff *));
static unsigned int ipv6_defrag(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
@@ -331,26 +324,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = {
};
#ifdef CONFIG_SYSCTL
-
-/* From nf_conntrack_proto_icmpv6.c */
-extern unsigned int nf_ct_icmpv6_timeout;
-
-/* From nf_conntrack_reasm.c */
-extern unsigned int nf_ct_frag6_timeout;
-extern unsigned int nf_ct_frag6_low_thresh;
-extern unsigned int nf_ct_frag6_high_thresh;
-
-static struct ctl_table_header *nf_ct_ipv6_sysctl_header;
-
-static ctl_table nf_ct_sysctl_table[] = {
- {
- .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
- .procname = "nf_conntrack_icmpv6_timeout",
- .data = &nf_ct_icmpv6_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
+static ctl_table nf_ct_ipv6_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
.procname = "nf_conntrack_frag6_timeout",
@@ -377,26 +351,6 @@ static ctl_table nf_ct_sysctl_table[] = {
},
{ .ctl_name = 0 }
};
-
-static ctl_table nf_ct_netfilter_table[] = {
- {
- .ctl_name = NET_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = nf_ct_sysctl_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table nf_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = nf_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
#endif
#if defined(CONFIG_NF_CT_NETLINK) || \
@@ -454,16 +408,14 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.tuple_to_nfattr = ipv6_tuple_to_nfattr,
.nfattr_to_tuple = ipv6_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_path = nf_net_netfilter_sysctl_path,
+ .ctl_table = nf_ct_ipv6_sysctl_table,
+#endif
.get_features = ipv6_get_features,
.me = THIS_MODULE,
};
-extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
-extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
-extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6;
-extern int nf_ct_frag6_init(void);
-extern void nf_ct_frag6_cleanup(void);
-
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
@@ -479,19 +431,19 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
printk("nf_conntrack_ipv6: can't initialize frag6.\n");
return ret;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register tcp.\n");
goto cleanup_frag6;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register udp.\n");
goto cleanup_tcp;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
if (ret < 0) {
printk("nf_conntrack_ipv6: can't register icmpv6.\n");
goto cleanup_udp;
@@ -510,28 +462,16 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
"hook.\n");
goto cleanup_ipv6;
}
-#ifdef CONFIG_SYSCTL
- nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
- if (nf_ct_ipv6_sysctl_header == NULL) {
- printk("nf_conntrack: can't register to sysctl.\n");
- ret = -ENOMEM;
- goto cleanup_hooks;
- }
-#endif
return ret;
-#ifdef CONFIG_SYSCTL
- cleanup_hooks:
- nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
-#endif
cleanup_ipv6:
nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
cleanup_icmpv6:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
cleanup_udp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
cleanup_tcp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
cleanup_frag6:
nf_ct_frag6_cleanup();
return ret;
@@ -540,14 +480,11 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
-#endif
nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
nf_ct_frag6_cleanup();
}
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 34d447208ff..3905cacc69a 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -29,11 +29,11 @@
#include <linux/seq_file.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
-unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
#if 0
#define DEBUGP printk
@@ -142,9 +142,6 @@ static int icmpv6_new(struct nf_conn *conntrack,
return 1;
}
-extern int
-nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len);
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
static int
icmpv6_error_message(struct sk_buff *skb,
unsigned int icmp6off,
@@ -155,7 +152,7 @@ icmpv6_error_message(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h;
struct icmp6hdr _hdr, *hp;
unsigned int inip6off;
- struct nf_conntrack_protocol *inproto;
+ struct nf_conntrack_l4proto *inproto;
u_int8_t inprotonum;
unsigned int inprotoff;
@@ -185,7 +182,7 @@ icmpv6_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
- inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
+ inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -290,7 +287,7 @@ static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
tuple->dst.u.icmp.code =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
tuple->src.u.icmp.id =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
+ *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
if (tuple->dst.u.icmp.type < 128
|| tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
@@ -301,10 +298,27 @@ static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
}
#endif
-struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *icmpv6_sysctl_header;
+static struct ctl_table icmpv6_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
+ .procname = "nf_conntrack_icmpv6_timeout",
+ .data = &nf_ct_icmpv6_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l3proto = PF_INET6,
- .proto = IPPROTO_ICMPV6,
+ .l4proto = IPPROTO_ICMPV6,
.name = "icmpv6",
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
@@ -318,6 +332,10 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
.tuple_to_nfattr = icmpv6_tuple_to_nfattr,
.nfattr_to_tuple = icmpv6_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_header = &icmpv6_sysctl_header,
+ .ctl_table = icmpv6_sysctl_table,
+#endif
};
-EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
+EXPORT_SYMBOL(nf_conntrack_l4proto_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index bf93c1ea6be..37e5fca923a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -72,7 +72,7 @@ struct nf_ct_frag6_queue
struct hlist_node list;
struct list_head lru_list; /* lru list member */
- __u32 id; /* fragment id */
+ __be32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
@@ -115,28 +115,28 @@ static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
write_unlock(&nf_ct_frag6_lock);
}
-static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
+static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
{
u32 a, b, c;
- a = saddr->s6_addr32[0];
- b = saddr->s6_addr32[1];
- c = saddr->s6_addr32[2];
+ a = (__force u32)saddr->s6_addr32[0];
+ b = (__force u32)saddr->s6_addr32[1];
+ c = (__force u32)saddr->s6_addr32[2];
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += nf_ct_frag6_hash_rnd;
__jhash_mix(a, b, c);
- a += saddr->s6_addr32[3];
- b += daddr->s6_addr32[0];
- c += daddr->s6_addr32[1];
+ a += (__force u32)saddr->s6_addr32[3];
+ b += (__force u32)daddr->s6_addr32[0];
+ c += (__force u32)daddr->s6_addr32[1];
__jhash_mix(a, b, c);
- a += daddr->s6_addr32[2];
- b += daddr->s6_addr32[3];
- c += id;
+ a += (__force u32)daddr->s6_addr32[2];
+ b += (__force u32)daddr->s6_addr32[3];
+ c += (__force u32)id;
__jhash_mix(a, b, c);
return c & (FRAG6Q_HASHSZ - 1);
@@ -338,7 +338,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
static struct nf_ct_frag6_queue *
-nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
+nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst)
{
struct nf_ct_frag6_queue *fq;
@@ -366,7 +366,7 @@ oom:
}
static __inline__ struct nf_ct_frag6_queue *
-fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
{
struct nf_ct_frag6_queue *fq;
struct hlist_node *n;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index efee7a6301a..35249d8487b 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -49,6 +49,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
fold_prot_inuse(&tcpv6_prot));
seq_printf(seq, "UDP6: inuse %d\n",
fold_prot_inuse(&udpv6_prot));
+ seq_printf(seq, "UDPLITE6: inuse %d\n",
+ fold_prot_inuse(&udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
fold_prot_inuse(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
@@ -133,6 +135,14 @@ static struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_SENTINEL
};
+static struct snmp_mib snmp6_udplite6_list[] = {
+ SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
+ SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
+ SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
+ SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_SENTINEL
+};
+
static unsigned long
fold_field(void *mib[], int offt)
{
@@ -161,11 +171,13 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
if (idev) {
seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
+ snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list);
snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
} else {
snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+ snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list);
}
return 0;
}
@@ -281,6 +293,9 @@ int snmp6_alloc_dev(struct inet6_dev *idev)
if (!idev || !idev->dev)
return -EINVAL;
+ if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip;
if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
__alignof__(struct icmpv6_mib)) < 0)
goto err_icmp;
@@ -288,12 +303,15 @@ int snmp6_alloc_dev(struct inet6_dev *idev)
return 0;
err_icmp:
+ snmp6_mib_free((void **)idev->stats.ipv6);
+err_ip:
return err;
}
int snmp6_free_dev(struct inet6_dev *idev)
{
snmp6_mib_free((void **)idev->stats.icmpv6);
+ snmp6_mib_free((void **)idev->stats.ipv6);
return 0;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d09329ca326..4ae1b19ada5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -220,7 +220,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
- __u32 v4addr = 0;
+ __be32 v4addr = 0;
int addr_type;
int err;
@@ -290,7 +290,7 @@ out:
void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
- int type, int code, int offset, u32 info)
+ int type, int code, int offset, __be32 info)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -370,9 +370,9 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
&skb->nh.ipv6h->daddr,
- skb->len, inet->num, 0);
+ skb->len, inet->num, 0));
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
@@ -479,8 +479,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
int offset;
int len;
int total_len;
- u32 tmp_csum;
- u16 csum;
+ __wsum tmp_csum;
+ __sum16 csum;
if (!rp->checksum)
goto send;
@@ -530,16 +530,15 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
/* in case cksum was not initialized */
if (unlikely(csum))
- tmp_csum = csum_sub(tmp_csum, csum);
+ tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
- tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+ csum = csum_ipv6_magic(&fl->fl6_src,
&fl->fl6_dst,
total_len, fl->proto, tmp_csum);
- if (tmp_csum == 0)
- tmp_csum = -1;
+ if (csum == 0 && fl->proto == IPPROTO_UDP)
+ csum = CSUM_MANGLED_0;
- csum = tmp_csum;
if (skb_store_bits(skb, offset, &csum, 2))
BUG();
@@ -586,7 +585,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
if (err)
goto error_fault;
- IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
dst_output);
if (err > 0)
@@ -600,11 +599,11 @@ error_fault:
err = -EFAULT;
kfree_skb(skb);
error:
- IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
return err;
}
-static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
{
struct iovec *iov;
u8 __user *type = NULL;
@@ -616,7 +615,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
int i;
if (!msg->msg_iov)
- return;
+ return 0;
for (i = 0; i < msg->msg_iovlen; i++) {
iov = &msg->msg_iov[i];
@@ -638,8 +637,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
code = iov->iov_base;
if (type && code) {
- get_user(fl->fl_icmp_type, type);
- get_user(fl->fl_icmp_code, code);
+ if (get_user(fl->fl_icmp_type, type) ||
+ get_user(fl->fl_icmp_code, code))
+ return -EFAULT;
probed = 1;
}
break;
@@ -650,7 +650,8 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
/* check if type field is readable or not. */
if (iov->iov_len > 2 - len) {
u8 __user *p = iov->iov_base;
- get_user(fl->fl_mh_type, &p[2 - len]);
+ if (get_user(fl->fl_mh_type, &p[2 - len]))
+ return -EFAULT;
probed = 1;
} else
len += iov->iov_len;
@@ -664,6 +665,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
if (probed)
break;
}
+ return 0;
}
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -787,7 +789,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
opt = ipv6_fixup_options(&opt_space, opt);
fl.proto = proto;
- rawv6_probe_proto_opt(&fl, msg);
+ err = rawv6_probe_proto_opt(&fl, msg);
+ if (err)
+ goto out;
ipv6_addr_copy(&fl.fl6_dst, daddr);
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
@@ -850,7 +854,8 @@ back_from_confirm:
}
done:
dst_release(dst);
- release_sock(sk);
+ if (!inet->hdrincl)
+ release_sock(sk);
out:
fl6_sock_release(flowlabel);
return err<0?err:len;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f39bbedd132..6f9a9046510 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -47,6 +47,7 @@
#include <net/snmp.h>
#include <net/ipv6.h>
+#include <net/ip6_route.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
@@ -76,7 +77,7 @@ struct frag_queue
struct hlist_node list;
struct list_head lru_list; /* lru list member */
- __u32 id; /* fragment id */
+ __be32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
@@ -124,28 +125,28 @@ static __inline__ void fq_unlink(struct frag_queue *fq)
* callers should be careful not to use the hash value outside the ipfrag_lock
* as doing so could race with ipfrag_hash_rnd being recalculated.
*/
-static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
+static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
{
u32 a, b, c;
- a = saddr->s6_addr32[0];
- b = saddr->s6_addr32[1];
- c = saddr->s6_addr32[2];
+ a = (__force u32)saddr->s6_addr32[0];
+ b = (__force u32)saddr->s6_addr32[1];
+ c = (__force u32)saddr->s6_addr32[2];
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += ip6_frag_hash_rnd;
__jhash_mix(a, b, c);
- a += saddr->s6_addr32[3];
- b += daddr->s6_addr32[0];
- c += daddr->s6_addr32[1];
+ a += (__force u32)saddr->s6_addr32[3];
+ b += (__force u32)daddr->s6_addr32[0];
+ c += (__force u32)daddr->s6_addr32[1];
__jhash_mix(a, b, c);
- a += daddr->s6_addr32[2];
- b += daddr->s6_addr32[3];
- c += id;
+ a += (__force u32)daddr->s6_addr32[2];
+ b += (__force u32)daddr->s6_addr32[3];
+ c += (__force u32)id;
__jhash_mix(a, b, c);
return c & (IP6Q_HASHSZ - 1);
@@ -257,7 +258,7 @@ static __inline__ void fq_kill(struct frag_queue *fq)
}
}
-static void ip6_evictor(void)
+static void ip6_evictor(struct inet6_dev *idev)
{
struct frag_queue *fq;
struct list_head *tmp;
@@ -284,14 +285,14 @@ static void ip6_evictor(void)
spin_unlock(&fq->lock);
fq_put(fq, &work);
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
}
}
static void ip6_frag_expire(unsigned long data)
{
struct frag_queue *fq = (struct frag_queue *) data;
- struct net_device *dev;
+ struct net_device *dev = NULL;
spin_lock(&fq->lock);
@@ -300,17 +301,19 @@ static void ip6_frag_expire(unsigned long data)
fq_kill(fq);
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ dev = dev_get_by_index(fq->iif);
+ if (!dev)
+ goto out;
+
+ rcu_read_lock();
+ IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ rcu_read_unlock();
/* Don't send error if the first segment did not arrive. */
if (!(fq->last_in&FIRST_IN) || !fq->fragments)
goto out;
- dev = dev_get_by_index(fq->iif);
- if (!dev)
- goto out;
-
/*
But use as source device on which LAST ARRIVED
segment was received. And do not use fq->dev
@@ -318,8 +321,9 @@ static void ip6_frag_expire(unsigned long data)
*/
fq->fragments->dev = dev;
icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
- dev_put(dev);
out:
+ if (dev)
+ dev_put(dev);
spin_unlock(&fq->lock);
fq_put(fq, NULL);
}
@@ -366,7 +370,8 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
static struct frag_queue *
-ip6_frag_create(u32 id, struct in6_addr *src, struct in6_addr *dst)
+ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
+ struct inet6_dev *idev)
{
struct frag_queue *fq;
@@ -386,12 +391,13 @@ ip6_frag_create(u32 id, struct in6_addr *src, struct in6_addr *dst)
return ip6_frag_intern(fq);
oom:
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
return NULL;
}
static __inline__ struct frag_queue *
-fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
+ struct inet6_dev *idev)
{
struct frag_queue *fq;
struct hlist_node *n;
@@ -410,7 +416,7 @@ fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
}
read_unlock(&ip6_frag_lock);
- return ip6_frag_create(id, src, dst);
+ return ip6_frag_create(id, src, dst, idev);
}
@@ -428,7 +434,8 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
return;
}
@@ -455,7 +462,8 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
- IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, payload_len));
return;
@@ -571,7 +579,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
return;
err:
- IP6_INC_STATS(IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
}
@@ -665,7 +673,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+ rcu_read_lock();
+ IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ rcu_read_unlock();
fq->fragments = NULL;
return 1;
@@ -677,7 +687,9 @@ out_oom:
if (net_ratelimit())
printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
out_fail:
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ rcu_read_lock();
+ IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ rcu_read_unlock();
return -1;
}
@@ -691,16 +703,16 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
hdr = skb->nh.ipv6h;
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
if (hdr->payload_len==0) {
- IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
return -1;
}
if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
- IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
return -1;
}
@@ -711,16 +723,17 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
if (!(fhdr->frag_off & htons(0xFFF9))) {
/* It is not a fragmented frame */
skb->h.raw += sizeof(struct frag_hdr);
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
return 1;
}
if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
- ip6_evictor();
+ ip6_evictor(ip6_dst_idev(skb->dst));
- if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) {
+ if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
+ ip6_dst_idev(skb->dst))) != NULL) {
int ret = -1;
spin_lock(&fq->lock);
@@ -736,7 +749,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
return ret;
}
- IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c953466b7af..8c3d56871b5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -330,6 +330,8 @@ static int inline rt6_check_neigh(struct rt6_info *rt)
read_lock_bh(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
m = 2;
+ else if (!(neigh->nud_state & NUD_FAILED))
+ m = 1;
read_unlock_bh(&neigh->lock);
}
return m;
@@ -347,9 +349,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
n = rt6_check_neigh(rt);
- if (n > 1)
- m |= 16;
- else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
+ if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
return -1;
return m;
}
@@ -380,10 +380,11 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
continue;
if (m > mpri) {
- rt6_probe(match);
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(match);
match = rt;
mpri = m;
- } else {
+ } else if (strict & RT6_LOOKUP_F_REACHABLE) {
rt6_probe(rt);
}
}
@@ -439,7 +440,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
if (pref == ICMPV6_ROUTER_PREF_INVALID)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
- lifetime = htonl(rinfo->lifetime);
+ lifetime = ntohl(rinfo->lifetime);
if (lifetime == 0xffffffff) {
/* infinity */
} else if (lifetime > 0x7fffffff/HZ) {
@@ -493,7 +494,7 @@ do { \
goto out; \
pn = fn->parent; \
if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
- fn = fib6_lookup(pn->subtree, NULL, saddr); \
+ fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
else \
fn = pn; \
if (fn->fn_flags & RTN_RTINFO) \
@@ -636,7 +637,7 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_LOOKUP_F_REACHABLE;
+ int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -710,12 +711,10 @@ void ip6_route_input(struct sk_buff *skb)
.ip6_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
- .fwmark = skb->nfmark,
-#endif
- .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+ .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
},
},
+ .mark = skb->mark,
.proto = iph->nexthdr,
};
@@ -733,7 +732,7 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_LOOKUP_F_REACHABLE;
+ int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -941,7 +940,7 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
fib6_force_start_gc();
out:
- return (struct dst_entry *)rt;
+ return &rt->u.dst;
}
int ndisc_dst_gc(int *more)
@@ -1224,7 +1223,7 @@ out:
if (idev)
in6_dev_put(idev);
if (rt)
- dst_free((struct dst_entry *) rt);
+ dst_free(&rt->u.dst);
return err;
}
@@ -1750,9 +1749,9 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
{
int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
- IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
- IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
kfree_skb(skb);
return 0;
@@ -1823,7 +1822,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_flags |= RTF_LOCAL;
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (rt->rt6i_nexthop == NULL) {
- dst_free((struct dst_entry *) rt);
+ dst_free(&rt->u.dst);
return ERR_PTR(-ENOMEM);
}
@@ -2007,6 +2006,20 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
return ip6_route_add(&cfg);
}
+static inline size_t rt6_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(16) /* RTA_SRC */
+ + nla_total_size(16) /* RTA_DST */
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + nla_total_size(16) /* RTA_PREFSRC */
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(4) /* RTA_IIF */
+ + nla_total_size(4) /* RTA_OIF */
+ + nla_total_size(4) /* RTA_PRIORITY */
+ + nla_total_size(sizeof(struct rta_cacheinfo));
+}
+
static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 pid, u32 seq,
@@ -2014,7 +2027,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
- struct rta_cacheinfo ci;
+ long expires;
u32 table;
if (prefix) { /* user wants prefix routes only */
@@ -2088,18 +2101,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
- ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
- if (rt->rt6i_expires)
- ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
- else
- ci.rta_expires = 0;
- ci.rta_used = rt->u.dst.__use;
- ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
- ci.rta_error = rt->u.dst.error;
- ci.rta_id = 0;
- ci.rta_ts = 0;
- ci.rta_tsage = 0;
- NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
+ expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
+ if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
+ expires, rt->u.dst.error) < 0)
+ goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2201,7 +2207,6 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
struct sk_buff *skb;
u32 pid = 0, seq = 0;
struct nlmsghdr *nlh = NULL;
- int payload = sizeof(struct rtmsg) + 256;
int err = -ENOBUFS;
if (info) {
@@ -2211,15 +2216,13 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
seq = nlh->nlmsg_seq;
}
- skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
if (skb == NULL)
goto errout;
err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in rt6_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
errout:
@@ -2247,7 +2250,6 @@ struct rt6_proc_arg
static int rt6_info_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
- int i;
if (arg->skip < arg->offset / RT6_INFO_LEN) {
arg->skip++;
@@ -2257,38 +2259,28 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
if (arg->len >= arg->length)
return 0;
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_dst.addr.s6_addr[i]);
- arg->len += 2;
- }
- arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+ arg->len += sprintf(arg->buffer + arg->len,
+ NIP6_SEQFMT " %02x ",
+ NIP6(rt->rt6i_dst.addr),
rt->rt6i_dst.plen);
#ifdef CONFIG_IPV6_SUBTREES
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_src.addr.s6_addr[i]);
- arg->len += 2;
- }
- arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+ arg->len += sprintf(arg->buffer + arg->len,
+ NIP6_SEQFMT " %02x ",
+ NIP6(rt->rt6i_src.addr),
rt->rt6i_src.plen);
#else
- sprintf(arg->buffer + arg->len,
- "00000000000000000000000000000000 00 ");
- arg->len += 36;
+ arg->len += sprintf(arg->buffer + arg->len,
+ "00000000000000000000000000000000 00 ");
#endif
if (rt->rt6i_nexthop) {
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_nexthop->primary_key[i]);
- arg->len += 2;
- }
+ arg->len += sprintf(arg->buffer + arg->len,
+ NIP6_SEQFMT,
+ NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
} else {
- sprintf(arg->buffer + arg->len,
- "00000000000000000000000000000000");
- arg->len += 32;
+ arg->len += sprintf(arg->buffer + arg->len,
+ "00000000000000000000000000000000");
}
arg->len += sprintf(arg->buffer + arg->len,
" %08x %08x %08x %08x %8s\n",
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b481a4d780c..77b7b091143 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -60,7 +60,7 @@
*/
#define HASH_SIZE 16
-#define HASH(addr) ((addr^(addr>>4))&0xF)
+#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
static int ipip6_fb_tunnel_init(struct net_device *dev);
static int ipip6_tunnel_init(struct net_device *dev);
@@ -76,7 +76,7 @@ static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunne
static DEFINE_RWLOCK(ipip6_lock);
-static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
+static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
@@ -102,8 +102,8 @@ static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
{
- u32 remote = t->parms.iph.daddr;
- u32 local = t->parms.iph.saddr;
+ __be32 remote = t->parms.iph.daddr;
+ __be32 local = t->parms.iph.saddr;
unsigned h = 0;
int prio = 0;
@@ -144,8 +144,8 @@ static void ipip6_tunnel_link(struct ip_tunnel *t)
static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
{
- u32 remote = parms->iph.daddr;
- u32 local = parms->iph.saddr;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
unsigned h = 0;
@@ -405,9 +405,9 @@ out:
/* Returns the embedded IPv4 address if the IPv6 address
comes from 6to4 (RFC 3056) addr space */
-static inline u32 try_6to4(struct in6_addr *v6dst)
+static inline __be32 try_6to4(struct in6_addr *v6dst)
{
- u32 dst = 0;
+ __be32 dst = 0;
if (v6dst->s6_addr16[0] == htons(0x2002)) {
/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
@@ -432,7 +432,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device *tdev; /* Device to other host */
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
- u32 dst = tiph->daddr;
+ __be32 dst = tiph->daddr;
int mtu;
struct in6_addr *addr6;
int addr_type;
@@ -809,7 +809,7 @@ static void __exit sit_destroy_tunnels(void)
}
}
-void __exit sit_cleanup(void)
+static void __exit sit_cleanup(void)
{
inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
@@ -819,7 +819,7 @@ void __exit sit_cleanup(void)
rtnl_unlock();
}
-int __init sit_init(void)
+static int __init sit_init(void)
{
int err;
@@ -854,3 +854,4 @@ int __init sit_init(void)
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS("sit0");
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4c2a7c0cafe..c25e930c2c6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -66,10 +66,13 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
/* Socket used for sending RSTs and ACKs */
static struct socket *tcp6_socket;
-static void tcp_v6_send_reset(struct sk_buff *skb);
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
static void tcp_v6_send_check(struct sock *sk, int len,
struct sk_buff *skb);
@@ -78,6 +81,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static struct inet_connection_sock_af_ops ipv6_mapped;
static struct inet_connection_sock_af_ops ipv6_specific;
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
+static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+#endif
static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
{
@@ -98,27 +105,20 @@ static void tcp_v6_hash(struct sock *sk)
}
}
-static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
+static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
struct in6_addr *saddr,
struct in6_addr *daddr,
- unsigned long base)
+ __wsum base)
{
return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
}
-static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
{
- if (skb->protocol == htons(ETH_P_IPV6)) {
- return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
- skb->h.th->dest,
- skb->h.th->source);
- } else {
- return secure_tcp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
- skb->h.th->dest,
- skb->h.th->source);
- }
+ return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
+ skb->nh.ipv6h->saddr.s6_addr32,
+ skb->h.th->dest,
+ skb->h.th->source);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -215,6 +215,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
icsk->icsk_af_ops = &ipv6_mapped;
sk->sk_backlog_rcv = tcp_v4_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv6_mapped_specific;
+#endif
err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
@@ -222,6 +225,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
sk->sk_backlog_rcv = tcp_v6_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv6_specific;
+#endif
goto failure;
} else {
ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
@@ -310,7 +316,7 @@ failure:
}
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
@@ -509,8 +515,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
err = ip6_xmit(sk, skb, &fl, opt, 0);
- if (err == NET_XMIT_CN)
- err = 0;
+ err = net_xmit_eval(err);
}
done:
@@ -526,7 +531,396 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet6_rsk(req)->pktopts);
}
-static struct request_sock_ops tcp6_request_sock_ops = {
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+ struct in6_addr *addr)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int i;
+
+ BUG_ON(tp == NULL);
+
+ if (!tp->md5sig_info || !tp->md5sig_info->entries6)
+ return NULL;
+
+ for (i = 0; i < tp->md5sig_info->entries6; i++) {
+ if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, addr) == 0)
+ return (struct tcp_md5sig_key *)&tp->md5sig_info->keys6[i];
+ }
+ return NULL;
+}
+
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
+ struct sock *addr_sk)
+{
+ return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+}
+
+static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
+ struct request_sock *req)
+{
+ return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
+}
+
+static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
+ char *newkey, u8 newkeylen)
+{
+ /* Add key to the list */
+ struct tcp6_md5sig_key *key;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp6_md5sig_key *keys;
+
+ key = (struct tcp6_md5sig_key*) tcp_v6_md5_do_lookup(sk, peer);
+ if (key) {
+ /* modify existing entry - just update that one */
+ kfree(key->key);
+ key->key = newkey;
+ key->keylen = newkeylen;
+ } else {
+ /* reallocate new list if current one is full. */
+ if (!tp->md5sig_info) {
+ tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
+ if (!tp->md5sig_info) {
+ kfree(newkey);
+ return -ENOMEM;
+ }
+ }
+ tcp_alloc_md5sig_pool();
+ if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
+ keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
+ (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
+
+ if (!keys) {
+ tcp_free_md5sig_pool();
+ kfree(newkey);
+ return -ENOMEM;
+ }
+
+ if (tp->md5sig_info->entries6)
+ memmove(keys, tp->md5sig_info->keys6,
+ (sizeof (tp->md5sig_info->keys6[0]) *
+ tp->md5sig_info->entries6));
+
+ kfree(tp->md5sig_info->keys6);
+ tp->md5sig_info->keys6 = keys;
+ tp->md5sig_info->alloced6++;
+ }
+
+ ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
+ peer);
+ tp->md5sig_info->keys6[tp->md5sig_info->entries6].key = newkey;
+ tp->md5sig_info->keys6[tp->md5sig_info->entries6].keylen = newkeylen;
+
+ tp->md5sig_info->entries6++;
+ }
+ return 0;
+}
+
+static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
+ u8 *newkey, __u8 newkeylen)
+{
+ return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
+ newkey, newkeylen);
+}
+
+static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int i;
+
+ for (i = 0; i < tp->md5sig_info->entries6; i++) {
+ if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, peer) == 0) {
+ /* Free the key */
+ kfree(tp->md5sig_info->keys6[i].key);
+ tp->md5sig_info->entries6--;
+
+ if (tp->md5sig_info->entries6 == 0) {
+ kfree(tp->md5sig_info->keys6);
+ tp->md5sig_info->keys6 = NULL;
+
+ tcp_free_md5sig_pool();
+
+ return 0;
+ } else {
+ /* shrink the database */
+ if (tp->md5sig_info->entries6 != i)
+ memmove(&tp->md5sig_info->keys6[i],
+ &tp->md5sig_info->keys6[i+1],
+ (tp->md5sig_info->entries6 - i)
+ * sizeof (tp->md5sig_info->keys6[0]));
+ }
+ }
+ }
+ return -ENOENT;
+}
+
+static void tcp_v6_clear_md5_list (struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int i;
+
+ if (tp->md5sig_info->entries6) {
+ for (i = 0; i < tp->md5sig_info->entries6; i++)
+ kfree(tp->md5sig_info->keys6[i].key);
+ tp->md5sig_info->entries6 = 0;
+ tcp_free_md5sig_pool();
+ }
+
+ kfree(tp->md5sig_info->keys6);
+ tp->md5sig_info->keys6 = NULL;
+ tp->md5sig_info->alloced6 = 0;
+
+ if (tp->md5sig_info->entries4) {
+ for (i = 0; i < tp->md5sig_info->entries4; i++)
+ kfree(tp->md5sig_info->keys4[i].key);
+ tp->md5sig_info->entries4 = 0;
+ tcp_free_md5sig_pool();
+ }
+
+ kfree(tp->md5sig_info->keys4);
+ tp->md5sig_info->keys4 = NULL;
+ tp->md5sig_info->alloced4 = 0;
+}
+
+static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
+ int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ u8 *newkey;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin6->sin6_family != AF_INET6)
+ return -EINVAL;
+
+ if (!cmd.tcpm_keylen) {
+ if (!tcp_sk(sk)->md5sig_info)
+ return -ENOENT;
+ if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
+ return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
+ return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
+ }
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ if (!tcp_sk(sk)->md5sig_info) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *p;
+
+ p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ tp->md5sig_info = p;
+ }
+
+ newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+ if (!newkey)
+ return -ENOMEM;
+ if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) {
+ return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
+ newkey, cmd.tcpm_keylen);
+ }
+ return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
+}
+
+static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+ struct in6_addr *saddr,
+ struct in6_addr *daddr,
+ struct tcphdr *th, int protocol,
+ int tcplen)
+{
+ struct scatterlist sg[4];
+ __u16 data_len;
+ int block = 0;
+ __sum16 cksum;
+ struct tcp_md5sig_pool *hp;
+ struct tcp6_pseudohdr *bp;
+ struct hash_desc *desc;
+ int err;
+ unsigned int nbytes = 0;
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp) {
+ printk(KERN_WARNING "%s(): hash pool not found...\n", __FUNCTION__);
+ goto clear_hash_noput;
+ }
+ bp = &hp->md5_blk.ip6;
+ desc = &hp->md5_desc;
+
+ /* 1. TCP pseudo-header (RFC2460) */
+ ipv6_addr_copy(&bp->saddr, saddr);
+ ipv6_addr_copy(&bp->daddr, daddr);
+ bp->len = htonl(tcplen);
+ bp->protocol = htonl(protocol);
+
+ sg_set_buf(&sg[block++], bp, sizeof(*bp));
+ nbytes += sizeof(*bp);
+
+ /* 2. TCP header, excluding options */
+ cksum = th->check;
+ th->check = 0;
+ sg_set_buf(&sg[block++], th, sizeof(*th));
+ nbytes += sizeof(*th);
+
+ /* 3. TCP segment data (if any) */
+ data_len = tcplen - (th->doff << 2);
+ if (data_len > 0) {
+ u8 *data = (u8 *)th + (th->doff << 2);
+ sg_set_buf(&sg[block++], data, data_len);
+ nbytes += data_len;
+ }
+
+ /* 4. shared key */
+ sg_set_buf(&sg[block++], key->key, key->keylen);
+ nbytes += key->keylen;
+
+ /* Now store the hash into the packet */
+ err = crypto_hash_init(desc);
+ if (err) {
+ printk(KERN_WARNING "%s(): hash_init failed\n", __FUNCTION__);
+ goto clear_hash;
+ }
+ err = crypto_hash_update(desc, sg, nbytes);
+ if (err) {
+ printk(KERN_WARNING "%s(): hash_update failed\n", __FUNCTION__);
+ goto clear_hash;
+ }
+ err = crypto_hash_final(desc, md5_hash);
+ if (err) {
+ printk(KERN_WARNING "%s(): hash_final failed\n", __FUNCTION__);
+ goto clear_hash;
+ }
+
+ /* Reset header, and free up the crypto */
+ tcp_put_md5sig_pool();
+ th->check = cksum;
+out:
+ return 0;
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ goto out;
+}
+
+static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+ struct sock *sk,
+ struct dst_entry *dst,
+ struct request_sock *req,
+ struct tcphdr *th, int protocol,
+ int tcplen)
+{
+ struct in6_addr *saddr, *daddr;
+
+ if (sk) {
+ saddr = &inet6_sk(sk)->saddr;
+ daddr = &inet6_sk(sk)->daddr;
+ } else {
+ saddr = &inet6_rsk(req)->loc_addr;
+ daddr = &inet6_rsk(req)->rmt_addr;
+ }
+ return tcp_v6_do_calc_md5_hash(md5_hash, key,
+ saddr, daddr,
+ th, protocol, tcplen);
+}
+
+static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
+{
+ __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ struct ipv6hdr *ip6h = skb->nh.ipv6h;
+ struct tcphdr *th = skb->h.th;
+ int length = (th->doff << 2) - sizeof (*th);
+ int genhash;
+ u8 *ptr;
+ u8 newhash[16];
+
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+
+ /* If the TCP option is too short, we can short cut */
+ if (length < TCPOLEN_MD5SIG)
+ return hash_expected ? 1 : 0;
+
+ /* parse options */
+ ptr = (u8*)(th + 1);
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch(opcode) {
+ case TCPOPT_EOL:
+ goto done_opts;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ goto done_opts;
+ if (opcode == TCPOPT_MD5SIG) {
+ hash_location = ptr;
+ goto done_opts;
+ }
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+
+done_opts:
+ /* do we have a hash as expected? */
+ if (!hash_expected) {
+ if (!hash_location)
+ return 0;
+ if (net_ratelimit()) {
+ printk(KERN_INFO "MD5 Hash NOT expected but found "
+ "(" NIP6_FMT ", %u)->"
+ "(" NIP6_FMT ", %u)\n",
+ NIP6(ip6h->saddr), ntohs(th->source),
+ NIP6(ip6h->daddr), ntohs(th->dest));
+ }
+ return 1;
+ }
+
+ if (!hash_location) {
+ if (net_ratelimit()) {
+ printk(KERN_INFO "MD5 Hash expected but NOT found "
+ "(" NIP6_FMT ", %u)->"
+ "(" NIP6_FMT ", %u)\n",
+ NIP6(ip6h->saddr), ntohs(th->source),
+ NIP6(ip6h->daddr), ntohs(th->dest));
+ }
+ return 1;
+ }
+
+ /* check the signature */
+ genhash = tcp_v6_do_calc_md5_hash(newhash,
+ hash_expected,
+ &ip6h->saddr, &ip6h->daddr,
+ th, sk->sk_protocol,
+ skb->len);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ if (net_ratelimit()) {
+ printk(KERN_INFO "MD5 Hash %s for "
+ "(" NIP6_FMT ", %u)->"
+ "(" NIP6_FMT ", %u)\n",
+ genhash ? "failed" : "mismatch",
+ NIP6(ip6h->saddr), ntohs(th->source),
+ NIP6(ip6h->daddr), ntohs(th->dest));
+ }
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+static struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
.rtx_syn_ack = tcp_v6_send_synack,
@@ -535,9 +929,16 @@ static struct request_sock_ops tcp6_request_sock_ops = {
.send_reset = tcp_v6_send_reset
};
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+ .md5_lookup = tcp_v6_reqsk_md5_lookup,
+};
+#endif
+
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
+ .twsk_destructor= tcp_twsk_destructor,
};
static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
@@ -547,7 +948,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
- skb->csum = offsetof(struct tcphdr, check);
+ skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
csum_partial((char *)th, th->doff<<2,
@@ -569,16 +970,20 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
th->check = 0;
th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
IPPROTO_TCP, 0);
- skb->csum = offsetof(struct tcphdr, check);
+ skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
}
-static void tcp_v6_send_reset(struct sk_buff *skb)
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = skb->h.th, *t1;
struct sk_buff *buff;
struct flowi fl;
+ int tot_len = sizeof(*th);
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
if (th->rst)
return;
@@ -586,25 +991,35 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
return;
+#ifdef CONFIG_TCP_MD5SIG
+ if (sk)
+ key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr);
+ else
+ key = NULL;
+
+ if (key)
+ tot_len += TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
/*
* We need to grab some memory, and put together an RST,
* and then put it into the queue to be sent.
*/
- buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
+ buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (buff == NULL)
return;
- skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
+ skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
- t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
+ t1 = (struct tcphdr *) skb_push(buff, tot_len);
/* Swap the send and the receive. */
memset(t1, 0, sizeof(*t1));
t1->dest = th->source;
t1->source = th->dest;
- t1->doff = sizeof(*t1)/4;
+ t1->doff = tot_len / 4;
t1->rst = 1;
if(th->ack) {
@@ -615,6 +1030,22 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
+ skb->len - (th->doff<<2));
}
+#ifdef CONFIG_TCP_MD5SIG
+ if (key) {
+ __be32 *opt = (__be32*)(t1 + 1);
+ opt[0] = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ tcp_v6_do_calc_md5_hash((__u8*)&opt[1],
+ key,
+ &skb->nh.ipv6h->daddr,
+ &skb->nh.ipv6h->saddr,
+ t1, IPPROTO_TCP,
+ tot_len);
+ }
+#endif
+
buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
memset(&fl, 0, sizeof(fl));
@@ -645,15 +1076,37 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
kfree_skb(buff);
}
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
+ struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
{
struct tcphdr *th = skb->h.th, *t1;
struct sk_buff *buff;
struct flowi fl;
int tot_len = sizeof(struct tcphdr);
+ __be32 *topt;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+ struct tcp_md5sig_key tw_key;
+#endif
+
+#ifdef CONFIG_TCP_MD5SIG
+ if (!tw && skb->sk) {
+ key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr);
+ } else if (tw && tw->tw_md5_keylen) {
+ tw_key.key = tw->tw_md5_key;
+ tw_key.keylen = tw->tw_md5_keylen;
+ key = &tw_key;
+ } else {
+ key = NULL;
+ }
+#endif
if (ts)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
+#ifdef CONFIG_TCP_MD5SIG
+ if (key)
+ tot_len += TCPOLEN_MD5SIG_ALIGNED;
+#endif
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
@@ -673,15 +1126,29 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
t1->ack_seq = htonl(ack);
t1->ack = 1;
t1->window = htons(win);
+
+ topt = (__be32 *)(t1 + 1);
if (ts) {
- u32 *ptr = (u32*)(t1 + 1);
- *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tcp_time_stamp);
- *ptr = htonl(ts);
+ *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+ *topt++ = htonl(tcp_time_stamp);
+ *topt = htonl(ts);
}
+#ifdef CONFIG_TCP_MD5SIG
+ if (key) {
+ *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+ tcp_v6_do_calc_md5_hash((__u8 *)topt,
+ key,
+ &skb->nh.ipv6h->daddr,
+ &skb->nh.ipv6h->saddr,
+ t1, IPPROTO_TCP,
+ tot_len);
+ }
+#endif
+
buff->csum = csum_partial((char *)t1, tot_len, 0);
memset(&fl, 0, sizeof(fl));
@@ -712,9 +1179,9 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
- const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcptw->tw_ts_recent);
@@ -723,7 +1190,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
{
- tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
+ tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
}
@@ -794,6 +1261,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
+#ifdef CONFIG_TCP_MD5SIG
+ tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
+#endif
+
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
@@ -822,7 +1293,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
treq->iif = inet6_iif(skb);
if (isn == 0)
- isn = tcp_v6_init_sequence(sk,skb);
+ isn = tcp_v6_init_sequence(skb);
tcp_rsk(req)->snt_isn = isn;
@@ -852,6 +1323,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *newtp;
struct sock *newsk;
struct ipv6_txoptions *opt;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -882,6 +1356,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+ newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
+#endif
+
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
@@ -1016,6 +1494,21 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+#ifdef CONFIG_TCP_MD5SIG
+ /* Copy over the MD5 key from the original socket */
+ if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+ /* We're using one, so create a matching key
+ * on the newsk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
+ if (newkey != NULL)
+ tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr,
+ newkey, key->keylen);
+ }
+#endif
+
__inet6_hash(&tcp_hashinfo, newsk);
inet_inherit_port(&tcp_hashinfo, sk, newsk);
@@ -1031,7 +1524,7 @@ out:
return NULL;
}
-static int tcp_v6_checksum_init(struct sk_buff *skb)
+static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
@@ -1041,8 +1534,8 @@ static int tcp_v6_checksum_init(struct sk_buff *skb)
}
}
- skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, 0);
+ skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr, 0));
if (skb->len <= 76) {
return __skb_checksum_complete(skb);
@@ -1075,6 +1568,11 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
+#ifdef CONFIG_TCP_MD5SIG
+ if (tcp_v6_inbound_md5_hash (sk, skb))
+ goto discard;
+#endif
+
if (sk_filter(sk, skb))
goto discard;
@@ -1140,7 +1638,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v6_send_reset(skb);
+ tcp_v6_send_reset(sk, skb);
discard:
if (opt_skb)
__kfree_skb(opt_skb);
@@ -1265,7 +1763,7 @@ no_tcp_socket:
bad_packet:
TCP_INC_STATS_BH(TCP_MIB_INERRS);
} else {
- tcp_v6_send_reset(skb);
+ tcp_v6_send_reset(NULL, skb);
}
discard_it:
@@ -1344,6 +1842,15 @@ static struct inet_connection_sock_af_ops ipv6_specific = {
#endif
};
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+ .md5_lookup = tcp_v6_md5_lookup,
+ .calc_md5_hash = tcp_v6_calc_md5_hash,
+ .md5_add = tcp_v6_md5_add_func,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+#endif
+
/*
* TCP over IPv4 via INET6 API
*/
@@ -1366,6 +1873,15 @@ static struct inet_connection_sock_af_ops ipv6_mapped = {
#endif
};
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_calc_md5_hash,
+ .md5_add = tcp_v6_md5_add_func,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+#endif
+
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
@@ -1405,6 +1921,10 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv6_specific;
+#endif
+
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1415,6 +1935,11 @@ static int tcp_v6_init_sock(struct sock *sk)
static int tcp_v6_destroy_sock(struct sock *sk)
{
+#ifdef CONFIG_TCP_MD5SIG
+ /* Clean up the MD5 key list */
+ if (tcp_sk(sk)->md5sig_info)
+ tcp_v6_clear_md5_list(sk);
+#endif
tcp_v4_destroy_sock(sk);
return inet6_destroy_sock(sk);
}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 0ef9a35798d..918d07dd121 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -104,7 +104,7 @@ drop:
}
static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct xfrm6_tunnel *handler;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e0c3934a7e4..f52a5c3cc0a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -38,26 +38,18 @@
#include <linux/skbuff.h>
#include <asm/uaccess.h>
-#include <net/sock.h>
-#include <net/snmp.h>
-
-#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
-#include <net/addrconf.h>
-#include <net/ip.h>
-#include <net/udp.h>
#include <net/raw.h>
-#include <net/inet_common.h>
#include <net/tcp_states.h>
-
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include "udp_impl.h"
DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
@@ -66,23 +58,9 @@ static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
}
-static void udp_v6_hash(struct sock *sk)
-{
- BUG();
-}
-
-static void udp_v6_unhash(struct sock *sk)
-{
- write_lock_bh(&udp_hash_lock);
- if (sk_del_node_init(sk)) {
- inet_sk(sk)->num = 0;
- sock_prot_dec_use(sk->sk_prot);
- }
- write_unlock_bh(&udp_hash_lock);
-}
-
-static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
- struct in6_addr *daddr, u16 dport, int dif)
+static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
+ struct in6_addr *daddr, __be16 dport,
+ int dif, struct hlist_head udptable[])
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
@@ -90,7 +68,7 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
if (inet->num == hnum && sk->sk_family == PF_INET6) {
@@ -132,20 +110,11 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
}
/*
- *
- */
-
-static void udpv6_close(struct sock *sk, long timeout)
-{
- sk_common_release(sk);
-}
-
-/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
-static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len,
int noblock, int flags, int *addr_len)
{
@@ -153,7 +122,7 @@ static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
size_t copied;
- int err;
+ int err, copy_only, is_udplite = IS_UDPLITE(sk);
if (addr_len)
*addr_len=sizeof(struct sockaddr_in6);
@@ -172,15 +141,21 @@ try_again:
msg->msg_flags |= MSG_TRUNC;
}
- if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
- copied);
- } else if (msg->msg_flags&MSG_TRUNC) {
- if (__skb_checksum_complete(skb))
+ /*
+ * Decide whether to checksum and/or copy data.
+ */
+ copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
+
+ if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
+ if (__udp_lib_checksum_complete(skb))
goto csum_copy_err;
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
- copied);
- } else {
+ copy_only = 1;
+ }
+
+ if (copy_only)
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov, copied );
+ else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
@@ -231,26 +206,26 @@ csum_copy_err:
skb_kill_datagram(sk, skb, flags);
if (flags & MSG_DONTWAIT) {
- UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
+ UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
return -EAGAIN;
}
goto try_again;
}
-static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __be32 info,
+ struct hlist_head udptable[] )
{
struct ipv6_pinfo *np;
struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
- struct net_device *dev = skb->dev;
struct in6_addr *saddr = &hdr->saddr;
struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr*)(skb->data+offset);
struct sock *sk;
int err;
- sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex);
-
+ sk = __udp6_lib_lookup(daddr, uh->dest,
+ saddr, uh->source, inet6_iif(skb), udptable);
if (sk == NULL)
return;
@@ -271,36 +246,60 @@ out:
sock_put(sk);
}
-static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+static __inline__ void udpv6_err(struct sk_buff *skb,
+ struct inet6_skb_parm *opt, int type,
+ int code, int offset, __be32 info )
+{
+ return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
+}
+
+int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
+ struct udp_sock *up = udp_sk(sk);
int rc;
- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- return -1;
- }
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto drop;
- if (skb_checksum_complete(skb)) {
- UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
- kfree_skb(skb);
- return 0;
+ /*
+ * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
+ */
+ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+
+ if (up->pcrlen == 0) { /* full coverage was set */
+ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
+ " %d while full coverage %d requested\n",
+ UDP_SKB_CB(skb)->cscov, skb->len);
+ goto drop;
+ }
+ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
+ "too small, need min %d\n",
+ UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ goto drop;
+ }
}
+ if (udp_lib_checksum_complete(skb))
+ goto drop;
+
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
- UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
- UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
- kfree_skb(skb);
- return 0;
+ UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+ goto drop;
}
- UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
+ UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
return 0;
+drop:
+ UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
+ kfree_skb(skb);
+ return -1;
}
static struct sock *udp_v6_mcast_next(struct sock *sk,
- u16 loc_port, struct in6_addr *loc_addr,
- u16 rmt_port, struct in6_addr *rmt_addr,
+ __be16 loc_port, struct in6_addr *loc_addr,
+ __be16 rmt_port, struct in6_addr *rmt_addr,
int dif)
{
struct hlist_node *node;
@@ -339,16 +338,16 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
-static void udpv6_mcast_deliver(struct udphdr *uh,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct sk_buff *skb)
+static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
+ struct in6_addr *daddr, struct hlist_head udptable[])
{
struct sock *sk, *sk2;
+ const struct udphdr *uh = skb->h.uh;
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
- dif = skb->dev->ifindex;
+ sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ dif = inet6_iif(skb);
sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (!sk) {
kfree_skb(skb);
@@ -365,9 +364,35 @@ static void udpv6_mcast_deliver(struct udphdr *uh,
udpv6_queue_rcv_skb(sk, skb);
out:
read_unlock(&udp_hash_lock);
+ return 0;
+}
+
+static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
+
+{
+ if (uh->check == 0) {
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+ return 1;
+ }
+ if (skb->ip_summed == CHECKSUM_COMPLETE &&
+ !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+ skb->len, IPPROTO_UDP, skb->csum ))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr,
+ skb->len, IPPROTO_UDP,
+ 0));
+
+ return (UDP_SKB_CB(skb)->partial_cov = 0);
}
-static int udpv6_rcv(struct sk_buff **pskb)
+int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
+ int is_udplite)
{
struct sk_buff *skb = *pskb;
struct sock *sk;
@@ -384,44 +409,39 @@ static int udpv6_rcv(struct sk_buff **pskb)
uh = skb->h.uh;
ulen = ntohs(uh->len);
+ if (ulen > skb->len)
+ goto short_packet;
- /* Check for jumbo payload */
- if (ulen == 0)
- ulen = skb->len;
+ if(! is_udplite ) { /* UDP validates ulen. */
- if (ulen > skb->len || ulen < sizeof(*uh))
- goto short_packet;
+ /* Check for jumbo payload */
+ if (ulen == 0)
+ ulen = skb->len;
- if (uh->check == 0) {
- /* RFC 2460 section 8.1 says that we SHOULD log
- this error. Well, it is reasonable.
- */
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
- goto discard;
- }
+ if (ulen < sizeof(*uh))
+ goto short_packet;
- if (ulen < skb->len) {
- if (pskb_trim_rcsum(skb, ulen))
- goto discard;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
- uh = skb->h.uh;
- }
+ if (ulen < skb->len) {
+ if (pskb_trim_rcsum(skb, ulen))
+ goto short_packet;
+ saddr = &skb->nh.ipv6h->saddr;
+ daddr = &skb->nh.ipv6h->daddr;
+ uh = skb->h.uh;
+ }
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (udp6_csum_init(skb, uh))
+ goto discard;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
+ } else { /* UDP-Lite validates cscov. */
+ if (udplite6_csum_init(skb, uh))
+ goto discard;
+ }
/*
* Multicast receive code
*/
- if (ipv6_addr_is_multicast(daddr)) {
- udpv6_mcast_deliver(uh, saddr, daddr, skb);
- return 0;
- }
+ if (ipv6_addr_is_multicast(daddr))
+ return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable);
/* Unicast */
@@ -429,15 +449,16 @@ static int udpv6_rcv(struct sk_buff **pskb)
* check socket cache ... must talk to Alan about his plans
* for sock caches... i'll skip this for now.
*/
- sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
+ sk = __udp6_lib_lookup(saddr, uh->source,
+ daddr, uh->dest, inet6_iif(skb), udptable);
if (sk == NULL) {
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
- if (skb_checksum_complete(skb))
+ if (udp_lib_checksum_complete(skb))
goto discard;
- UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
+ UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
@@ -452,14 +473,20 @@ static int udpv6_rcv(struct sk_buff **pskb)
return(0);
short_packet:
- if (net_ratelimit())
- printk(KERN_DEBUG "UDP: short packet: %d/%u\n", ulen, skb->len);
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
+ is_udplite? "-Lite" : "", ulen, skb->len);
discard:
- UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+ UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return(0);
}
+
+static __inline__ int udpv6_rcv(struct sk_buff **pskb)
+{
+ return __udp6_lib_rcv(pskb, udp_hash, 0);
+}
+
/*
* Throw away all pending data and cancel the corking. Socket is locked.
*/
@@ -478,13 +505,15 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
* Sending
*/
-static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up)
+static int udp_v6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
struct udphdr *uh;
+ struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi *fl = &inet->cork.fl;
int err = 0;
+ __wsum csum = 0;
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
@@ -499,35 +528,17 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up)
uh->len = htons(up->len);
uh->check = 0;
- if (sk->sk_no_check == UDP_CSUM_NOXMIT) {
- skb->ip_summed = CHECKSUM_NONE;
- goto send;
- }
-
- if (skb_queue_len(&sk->sk_write_queue) == 1) {
- skb->csum = csum_partial((char *)uh,
- sizeof(struct udphdr), skb->csum);
- uh->check = csum_ipv6_magic(&fl->fl6_src,
- &fl->fl6_dst,
- up->len, fl->proto, skb->csum);
- } else {
- u32 tmp_csum = 0;
+ if (up->pcflag)
+ csum = udplite_csum_outgoing(sk, skb);
+ else
+ csum = udp_csum_outgoing(sk, skb);
- skb_queue_walk(&sk->sk_write_queue, skb) {
- tmp_csum = csum_add(tmp_csum, skb->csum);
- }
- tmp_csum = csum_partial((char *)uh,
- sizeof(struct udphdr), tmp_csum);
- tmp_csum = csum_ipv6_magic(&fl->fl6_src,
- &fl->fl6_dst,
- up->len, fl->proto, tmp_csum);
- uh->check = tmp_csum;
-
- }
+ /* add protocol-dependent pseudo-header */
+ uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst,
+ up->len, fl->proto, csum );
if (uh->check == 0)
- uh->check = -1;
+ uh->check = CSUM_MANGLED_0;
-send:
err = ip6_push_pending_frames(sk);
out:
up->len = 0;
@@ -535,7 +546,7 @@ out:
return err;
}
-static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
@@ -555,6 +566,8 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
int connected = 0;
+ int is_udplite = up->pcflag;
+ int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
/* destination address check */
if (sin6) {
@@ -695,7 +708,7 @@ do_udp_sendmsg:
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
- fl.proto = IPPROTO_UDP;
+ fl.proto = sk->sk_protocol;
ipv6_addr_copy(&fl.fl6_dst, daddr);
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
@@ -762,14 +775,15 @@ back_from_confirm:
do_append_data:
up->len += ulen;
- err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
+ getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
+ err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl,
(struct rt6_info*)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
- err = udp_v6_push_pending_frames(sk, up);
+ err = udp_v6_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
@@ -794,7 +808,7 @@ do_append_data:
out:
fl6_sock_release(flowlabel);
if (!err) {
- UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
+ UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
return len;
}
/*
@@ -805,7 +819,7 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+ UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
@@ -817,7 +831,7 @@ do_confirm:
goto out;
}
-static int udpv6_destroy_sock(struct sock *sk)
+int udpv6_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_v6_flush_pending_frames(sk);
@@ -831,119 +845,41 @@ static int udpv6_destroy_sock(struct sock *sk)
/*
* Socket option code for UDP
*/
-static int do_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
-{
- struct udp_sock *up = udp_sk(sk);
- int val;
- int err = 0;
-
- if(optlen<sizeof(int))
- return -EINVAL;
-
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
-
- switch(optname) {
- case UDP_CORK:
- if (val != 0) {
- up->corkflag = 1;
- } else {
- up->corkflag = 0;
- lock_sock(sk);
- udp_v6_push_pending_frames(sk, up);
- release_sock(sk);
- }
- break;
-
- case UDP_ENCAP:
- switch (val) {
- case 0:
- up->encap_type = val;
- break;
- default:
- err = -ENOPROTOOPT;
- break;
- }
- break;
-
- default:
- err = -ENOPROTOOPT;
- break;
- };
-
- return err;
-}
-
-static int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
{
- if (level != SOL_UDP)
- return ipv6_setsockopt(sk, level, optname, optval, optlen);
- return do_udpv6_setsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ udp_v6_push_pending_frames);
+ return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
-static int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
{
- if (level != SOL_UDP)
- return compat_ipv6_setsockopt(sk, level, optname,
- optval, optlen);
- return do_udpv6_setsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ udp_v6_push_pending_frames);
+ return compat_ipv6_setsockopt(sk, level, optname, optval, optlen);
}
#endif
-static int do_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- struct udp_sock *up = udp_sk(sk);
- int val, len;
-
- if(get_user(len,optlen))
- return -EFAULT;
-
- len = min_t(unsigned int, len, sizeof(int));
-
- if(len < 0)
- return -EINVAL;
-
- switch(optname) {
- case UDP_CORK:
- val = up->corkflag;
- break;
-
- case UDP_ENCAP:
- val = up->encap_type;
- break;
-
- default:
- return -ENOPROTOOPT;
- };
-
- if(put_user(len, optlen))
- return -EFAULT;
- if(copy_to_user(optval, &val,len))
- return -EFAULT;
- return 0;
-}
-
-static int udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
{
- if (level != SOL_UDP)
- return ipv6_getsockopt(sk, level, optname, optval, optlen);
- return do_udpv6_getsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+ return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
-static int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
{
- if (level != SOL_UDP)
- return compat_ipv6_getsockopt(sk, level, optname,
- optval, optlen);
- return do_udpv6_getsockopt(sk, level, optname, optval, optlen);
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+ return compat_ipv6_getsockopt(sk, level, optname, optval, optlen);
}
#endif
@@ -984,7 +920,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
atomic_read(&sp->sk_refcnt), sp);
}
-static int udp6_seq_show(struct seq_file *seq, void *v)
+int udp6_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_printf(seq,
@@ -1003,6 +939,7 @@ static struct udp_seq_afinfo udp6_seq_afinfo = {
.owner = THIS_MODULE,
.name = "udp6",
.family = AF_INET6,
+ .hashtable = udp_hash,
.seq_show = udp6_seq_show,
.seq_fops = &udp6_seq_fops,
};
@@ -1022,7 +959,7 @@ void udp6_proc_exit(void) {
struct proto udpv6_prot = {
.name = "UDPv6",
.owner = THIS_MODULE,
- .close = udpv6_close,
+ .close = udp_lib_close,
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
@@ -1032,8 +969,8 @@ struct proto udpv6_prot = {
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
.backlog_rcv = udpv6_queue_rcv_skb,
- .hash = udp_v6_hash,
- .unhash = udp_v6_unhash,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
.obj_size = sizeof(struct udp6_sock),
#ifdef CONFIG_COMPAT
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
new file mode 100644
index 00000000000..ec987889912
--- /dev/null
+++ b/net/ipv6/udp_impl.h
@@ -0,0 +1,34 @@
+#ifndef _UDP6_IMPL_H
+#define _UDP6_IMPL_H
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+
+extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int );
+extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
+ int , int , int , __be32 , struct hlist_head []);
+
+extern int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen);
+#ifdef CONFIG_COMPAT
+extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen);
+extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+#endif
+extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len);
+extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len);
+extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
+extern int udpv6_destroy_sock(struct sock *sk);
+
+#ifdef CONFIG_PROC_FS
+extern int udp6_seq_show(struct seq_file *seq, void *v);
+#endif
+#endif /* _UDP6_IMPL_H */
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
new file mode 100644
index 00000000000..629f97162fb
--- /dev/null
+++ b/net/ipv6/udplite.c
@@ -0,0 +1,105 @@
+/*
+ * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6.
+ * See also net/ipv4/udplite.c
+ *
+ * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $
+ *
+ * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ *
+ * Changes:
+ * Fixes:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "udp_impl.h"
+
+DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
+
+static int udplitev6_rcv(struct sk_buff **pskb)
+{
+ return __udp6_lib_rcv(pskb, udplite_hash, 1);
+}
+
+static void udplitev6_err(struct sk_buff *skb,
+ struct inet6_skb_parm *opt,
+ int type, int code, int offset, __be32 info)
+{
+ return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
+}
+
+static struct inet6_protocol udplitev6_protocol = {
+ .handler = udplitev6_rcv,
+ .err_handler = udplitev6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static int udplite_v6_get_port(struct sock *sk, unsigned short snum)
+{
+ return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal);
+}
+
+struct proto udplitev6_prot = {
+ .name = "UDPLITEv6",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udplite_sk_init,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+ .sendmsg = udpv6_sendmsg,
+ .recvmsg = udpv6_recvmsg,
+ .backlog_rcv = udpv6_queue_rcv_skb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .get_port = udplite_v6_get_port,
+ .obj_size = sizeof(struct udp6_sock),
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_udpv6_setsockopt,
+ .compat_getsockopt = compat_udpv6_getsockopt,
+#endif
+};
+
+static struct inet_protosw udplite6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDPLITE,
+ .prot = &udplitev6_prot,
+ .ops = &inet6_dgram_ops,
+ .capability = -1,
+ .no_check = 0,
+ .flags = INET_PROTOSW_PERMANENT,
+};
+
+void __init udplitev6_init(void)
+{
+ if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0)
+ printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__);
+
+ inet6_register_protosw(&udplite6_protosw);
+}
+
+#ifdef CONFIG_PROC_FS
+static struct file_operations udplite6_seq_fops;
+static struct udp_seq_afinfo udplite6_seq_afinfo = {
+ .owner = THIS_MODULE,
+ .name = "udplite6",
+ .family = AF_INET6,
+ .hashtable = udplite_hash,
+ .seq_show = udp6_seq_show,
+ .seq_fops = &udplite6_seq_fops,
+};
+
+int __init udplite6_proc_init(void)
+{
+ return udp_proc_register(&udplite6_seq_afinfo);
+}
+
+void udplite6_proc_exit(void)
+{
+ udp_proc_unregister(&udplite6_seq_afinfo);
+}
+#endif
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d400f8fae12..8dffd4daae9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -274,11 +274,12 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
break;
case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
- u16 *ports = (u16 *)exthdr;
+ __be16 *ports = (__be16 *)exthdr;
fl->fl_ip_sport = ports[0];
fl->fl_ip_dport = ports[1];
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 7af227bb155..12e426b9aac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -50,7 +50,7 @@ static u32 xfrm6_tunnel_spi;
#define XFRM6_TUNNEL_SPI_MIN 1
#define XFRM6_TUNNEL_SPI_MAX 0xffffffff
-static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly;
+static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
@@ -62,7 +62,7 @@ static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
{
unsigned h;
- h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3];
+ h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
h ^= h >> 16;
h ^= h >> 8;
h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
@@ -126,7 +126,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
return NULL;
}
-u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
{
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
@@ -135,7 +135,7 @@ u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
x6spi = __xfrm6_tunnel_spi_lookup(saddr);
spi = x6spi ? x6spi->spi : 0;
read_unlock_bh(&xfrm6_tunnel_spi_lock);
- return spi;
+ return htonl(spi);
}
EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
@@ -180,7 +180,7 @@ try_next_2:;
spi = 0;
goto out;
alloc_spi:
- x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
+ x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC);
if (!x6spi)
goto out;
@@ -196,7 +196,7 @@ out:
return spi;
}
-u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
{
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
@@ -210,7 +210,7 @@ u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
spi = __xfrm6_tunnel_alloc_spi(saddr);
write_unlock_bh(&xfrm6_tunnel_spi_lock);
- return spi;
+ return htonl(spi);
}
EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
@@ -265,7 +265,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
/* xfrm6_tunnel native err handling */
switch (type) {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index bef3f61569f..76c661566df 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -83,13 +83,13 @@ DEFINE_SPINLOCK(ipx_interfaces_lock);
struct ipx_interface *ipx_primary_net;
struct ipx_interface *ipx_internal_net;
-extern int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc,
+extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
unsigned char *node);
extern void ipxrtr_del_routes(struct ipx_interface *intrfc);
extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
struct iovec *iov, int len, int noblock);
extern int ipxrtr_route_skb(struct sk_buff *skb);
-extern struct ipx_route *ipxrtr_lookup(__u32 net);
+extern struct ipx_route *ipxrtr_lookup(__be32 net);
extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
#undef IPX_REFCNT_DEBUG
@@ -177,7 +177,7 @@ static void ipxitf_clear_primary_net(void)
}
static struct ipx_interface *__ipxitf_find_using_phys(struct net_device *dev,
- unsigned short datalink)
+ __be16 datalink)
{
struct ipx_interface *i;
@@ -190,7 +190,7 @@ out:
}
static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
- unsigned short datalink)
+ __be16 datalink)
{
struct ipx_interface *i;
@@ -202,7 +202,7 @@ static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
return i;
}
-struct ipx_interface *ipxitf_find_using_net(__u32 net)
+struct ipx_interface *ipxitf_find_using_net(__be32 net)
{
struct ipx_interface *i;
@@ -237,7 +237,7 @@ static void ipxitf_insert_socket(struct ipx_interface *intrfc, struct sock *sk)
/* caller must hold intrfc->if_sklist_lock */
static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
- unsigned short port)
+ __be16 port)
{
struct sock *s;
struct hlist_node *node;
@@ -252,7 +252,7 @@ found:
/* caller must hold a reference to intrfc */
static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
- unsigned short port)
+ __be16 port)
{
struct sock *s;
@@ -268,7 +268,7 @@ static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
#ifdef CONFIG_IPX_INTERN
static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
unsigned char *ipx_node,
- unsigned short port)
+ __be16 port)
{
struct sock *s;
struct hlist_node *node;
@@ -600,10 +600,10 @@ int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node)
/* see if we need to include the netnum in the route list */
if (IPX_SKB_CB(skb)->last_hop.index >= 0) {
- u32 *last_hop = (u32 *)(((u8 *) skb->data) +
+ __be32 *last_hop = (__be32 *)(((u8 *) skb->data) +
sizeof(struct ipxhdr) +
IPX_SKB_CB(skb)->last_hop.index *
- sizeof(u32));
+ sizeof(__be32));
*last_hop = IPX_SKB_CB(skb)->last_hop.netnum;
IPX_SKB_CB(skb)->last_hop.index = -1;
}
@@ -772,7 +772,7 @@ static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
} else {
printk(KERN_WARNING "IPX: Network number collision "
"%lx\n %s %s and %s %s\n",
- (unsigned long) htonl(cb->ipx_source_net),
+ (unsigned long) ntohl(cb->ipx_source_net),
ipx_device_name(i),
ipx_frame_name(i->if_dlink_type),
ipx_device_name(intrfc),
@@ -812,7 +812,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
int i, rc = -EINVAL;
struct ipx_interface *ifcs;
char *c;
- u32 *l;
+ __be32 *l;
/* Illegal packet - too many hops or too short */
/* We decide to throw it away: no broadcasting, no local processing.
@@ -833,7 +833,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
goto out;
c = ((u8 *) ipx) + sizeof(struct ipxhdr);
- l = (u32 *) c;
+ l = (__be32 *) c;
/* Don't broadcast packet if already seen this net */
for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
@@ -855,7 +855,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
/* That aren't in the list */
if (ifcs == intrfc)
continue;
- l = (__u32 *) c;
+ l = (__be32 *) c;
/* don't consider the last entry in the packet list,
* it is our netnum, and it is not there yet */
for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
@@ -885,8 +885,8 @@ static void ipxitf_insert(struct ipx_interface *intrfc)
ipx_primary_net = intrfc;
}
-static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __u32 netnum,
- unsigned short dlink_type,
+static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __be32 netnum,
+ __be16 dlink_type,
struct datalink_proto *dlink,
unsigned char internal,
int ipx_offset)
@@ -960,7 +960,7 @@ static __be16 ipx_map_frame_type(unsigned char type)
static int ipxitf_create(struct ipx_interface_definition *idef)
{
struct net_device *dev;
- unsigned short dlink_type = 0;
+ __be16 dlink_type = 0;
struct datalink_proto *datalink = NULL;
struct ipx_interface *intrfc;
int rc;
@@ -1073,7 +1073,7 @@ out:
static int ipxitf_delete(struct ipx_interface_definition *idef)
{
struct net_device *dev = NULL;
- unsigned short dlink_type = 0;
+ __be16 dlink_type = 0;
struct ipx_interface *intrfc;
int rc = 0;
@@ -1110,7 +1110,7 @@ out:
}
static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
- unsigned short dlink_type)
+ __be16 dlink_type)
{
struct ipx_interface *intrfc = NULL;
struct datalink_proto *datalink;
@@ -1122,7 +1122,7 @@ static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
if (dev->addr_len > IPX_NODE_LEN)
goto out;
- switch (htons(dlink_type)) {
+ switch (ntohs(dlink_type)) {
case ETH_P_IPX: datalink = pEII_datalink; break;
case ETH_P_802_2: datalink = p8022_datalink; break;
case ETH_P_SNAP: datalink = pSNAP_datalink; break;
@@ -1234,27 +1234,27 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
/* Note: We assume ipx_tctrl==0 and htons(length)==ipx_pktsize */
/* This functions should *not* mess with packet contents */
-__u16 ipx_cksum(struct ipxhdr *packet, int length)
+__be16 ipx_cksum(struct ipxhdr *packet, int length)
{
/*
* NOTE: sum is a net byte order quantity, which optimizes the
* loop. This only works on big and little endian machines. (I
* don't know of a machine that isn't.)
*/
- /* start at ipx_dest - We skip the checksum field and start with
- * ipx_type before the loop, not considering ipx_tctrl in the calc */
- __u16 *p = (__u16 *)&packet->ipx_dest;
- __u32 i = (length >> 1) - 1; /* Number of complete words */
- __u32 sum = packet->ipx_type << sizeof(packet->ipx_tctrl);
-
- /* Loop through all complete words except the checksum field,
- * ipx_type (accounted above) and ipx_tctrl (not used in the cksum) */
- while (--i)
+ /* handle the first 3 words separately; checksum should be skipped
+ * and ipx_tctrl masked out */
+ __u16 *p = (__u16 *)packet;
+ __u32 sum = p[1] + (p[2] & (__force u16)htons(0x00ff));
+ __u32 i = (length >> 1) - 3; /* Number of remaining complete words */
+
+ /* Loop through them */
+ p += 3;
+ while (i--)
sum += *p++;
/* Add on the last part word if it exists */
if (packet->ipx_pktsize & htons(1))
- sum += ntohs(0xff00) & *p;
+ sum += (__force u16)htons(0xff00) & *p;
/* Do final fixup */
sum = (sum & 0xffff) + (sum >> 16);
@@ -1263,10 +1263,17 @@ __u16 ipx_cksum(struct ipxhdr *packet, int length)
if (sum >= 0x10000)
sum++;
- return ~sum;
+ /*
+ * Leave 0 alone; we don't want 0xffff here. Note that we can't get
+ * here with 0x10000, so this check is the same as ((__u16)sum)
+ */
+ if (sum)
+ sum = ~sum;
+
+ return (__force __be16)sum;
}
-const char *ipx_frame_name(unsigned short frame)
+const char *ipx_frame_name(__be16 frame)
{
char* rc = "None";
@@ -1401,7 +1408,7 @@ out:
/* caller must hold a reference to intrfc */
-static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
+static __be16 ipx_first_free_socketnum(struct ipx_interface *intrfc)
{
unsigned short socketNum = intrfc->if_sknum;
@@ -1410,7 +1417,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
if (socketNum < IPX_MIN_EPHEMERAL_SOCKET)
socketNum = IPX_MIN_EPHEMERAL_SOCKET;
- while (__ipxitf_find_socket(intrfc, ntohs(socketNum)))
+ while (__ipxitf_find_socket(intrfc, htons(socketNum)))
if (socketNum > IPX_MAX_EPHEMERAL_SOCKET)
socketNum = IPX_MIN_EPHEMERAL_SOCKET;
else
@@ -1419,7 +1426,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
spin_unlock_bh(&intrfc->if_sklist_lock);
intrfc->if_sknum = socketNum;
- return ntohs(socketNum);
+ return htons(socketNum);
}
static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -1473,7 +1480,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
ipxs->port)) {
SOCK_DEBUG(sk,
"IPX: bind failed because port %X in use.\n",
- ntohs((int)addr->sipx_port));
+ ntohs(addr->sipx_port));
goto out_put;
}
} else {
@@ -1488,7 +1495,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
SOCK_DEBUG(sk,
"IPX: bind failed because port %X in use.\n",
- ntohs((int)addr->sipx_port));
+ ntohs(addr->sipx_port));
goto out_put;
}
}
@@ -1665,7 +1672,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
intrfc = ipxitf_find_using_phys(dev, pt->type);
if (!intrfc) {
if (ipxcfg_auto_create_interfaces &&
- ntohl(IPX_SKB_CB(skb)->ipx_dest_net)) {
+ IPX_SKB_CB(skb)->ipx_dest_net) {
intrfc = ipxitf_auto_create(dev, pt->type);
if (intrfc)
ipxitf_hold(intrfc);
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 4c0c71206e5..b7463dfca63 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -260,22 +260,22 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
ipxs = ipx_sk(s);
#ifdef CONFIG_IPX_INTERN
seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ",
- (unsigned long)htonl(ipxs->intrfc->if_netnum),
+ (unsigned long)ntohl(ipxs->intrfc->if_netnum),
ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3],
- ipxs->node[4], ipxs->node[5], htons(ipxs->port));
+ ipxs->node[4], ipxs->node[5], ntohs(ipxs->port));
#else
- seq_printf(seq, "%08lX:%04X ", (unsigned long) htonl(ipxs->intrfc->if_netnum),
- htons(ipxs->port));
+ seq_printf(seq, "%08lX:%04X ", (unsigned long) ntohl(ipxs->intrfc->if_netnum),
+ ntohs(ipxs->port));
#endif /* CONFIG_IPX_INTERN */
if (s->sk_state != TCP_ESTABLISHED)
seq_printf(seq, "%-28s", "Not_Connected");
else {
seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ",
- (unsigned long)htonl(ipxs->dest_addr.net),
+ (unsigned long)ntohl(ipxs->dest_addr.net),
ipxs->dest_addr.node[0], ipxs->dest_addr.node[1],
ipxs->dest_addr.node[2], ipxs->dest_addr.node[3],
ipxs->dest_addr.node[4], ipxs->dest_addr.node[5],
- htons(ipxs->dest_addr.sock));
+ ntohs(ipxs->dest_addr.sock));
}
seq_printf(seq, "%08X %08X %02X %03d\n",
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index a30dbb1e08f..68560ee0d79 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -19,17 +19,17 @@ DEFINE_RWLOCK(ipx_routes_lock);
extern struct ipx_interface *ipx_internal_net;
-extern __u16 ipx_cksum(struct ipxhdr *packet, int length);
-extern struct ipx_interface *ipxitf_find_using_net(__u32 net);
+extern __be16 ipx_cksum(struct ipxhdr *packet, int length);
+extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
struct sk_buff *skb, int copy);
extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
struct sk_buff *skb, int copy);
extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb,
char *node);
-extern struct ipx_interface *ipxitf_find_using_net(__u32 net);
+extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
-struct ipx_route *ipxrtr_lookup(__u32 net)
+struct ipx_route *ipxrtr_lookup(__be32 net)
{
struct ipx_route *r;
@@ -48,7 +48,7 @@ unlock:
/*
* Caller must hold a reference to intrfc
*/
-int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc,
+int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
unsigned char *node)
{
struct ipx_route *rt;
@@ -118,7 +118,7 @@ out:
return rc;
}
-static int ipxrtr_delete(__u32 net)
+static int ipxrtr_delete(__be32 net)
{
struct ipx_route *r, *tmp;
int rc;
@@ -238,7 +238,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
/* Apply checksum. Not allowed on 802.3 links. */
if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023))
- ipx->ipx_checksum = 0xFFFF;
+ ipx->ipx_checksum = htons(0xFFFF);
else
ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index 3fefc822c1c..89fd2a2cbca 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -32,6 +32,7 @@
#include <linux/string.h>
#include <linux/socket.h>
+#include <linux/fs.h>
#include <linux/seq_file.h>
#include <net/irda/irda.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index d50a02030ad..262bda808d9 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -61,7 +61,7 @@ static void ircomm_tty_flush_buffer(struct tty_struct *tty);
static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch);
static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout);
static void ircomm_tty_hangup(struct tty_struct *tty);
-static void ircomm_tty_do_softint(void *private_);
+static void ircomm_tty_do_softint(struct work_struct *work);
static void ircomm_tty_shutdown(struct ircomm_tty_cb *self);
static void ircomm_tty_stop(struct tty_struct *tty);
@@ -389,7 +389,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
self->flow = FLOW_STOP;
self->line = line;
- INIT_WORK(&self->tqueue, ircomm_tty_do_softint, self);
+ INIT_WORK(&self->tqueue, ircomm_tty_do_softint);
self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED;
self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED;
self->close_delay = 5*HZ/10;
@@ -594,15 +594,16 @@ static void ircomm_tty_flush_buffer(struct tty_struct *tty)
}
/*
- * Function ircomm_tty_do_softint (private_)
+ * Function ircomm_tty_do_softint (work)
*
* We use this routine to give the write wakeup to the user at at a
* safe time (as fast as possible after write have completed). This
* can be compared to the Tx interrupt.
*/
-static void ircomm_tty_do_softint(void *private_)
+static void ircomm_tty_do_softint(struct work_struct *work)
{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) private_;
+ struct ircomm_tty_cb *self =
+ container_of(work, struct ircomm_tty_cb, tqueue);
struct tty_struct *tty;
unsigned long flags;
struct sk_buff *skb, *ctrl_skb;
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 197e3e7ed7e..75e39ea599d 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -146,7 +146,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self)
* do something rational.
*/
void ircomm_tty_set_termios(struct tty_struct *tty,
- struct termios *old_termios)
+ struct ktermios *old_termios)
{
struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
unsigned int cflag = tty->termios->c_cflag;
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 415cf4eec23..8f1c6d65b24 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/skbuff.h>
+#include <linux/fs.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -172,7 +173,7 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
- self = kmalloc(sizeof(struct iriap_cb), GFP_ATOMIC);
+ self = kzalloc(sizeof(*self), GFP_ATOMIC);
if (!self) {
IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
return NULL;
@@ -181,7 +182,6 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
/*
* Initialize instance
*/
- memset(self, 0, sizeof(struct iriap_cb));
self->magic = IAS_MAGIC;
self->mode = mode;
@@ -451,12 +451,12 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
n = 2;
/* Get length, MSB first */
- len = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); n += 2;
+ len = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2;
IRDA_DEBUG(4, "%s(), len=%d\n", __FUNCTION__, len);
/* Get object ID, MSB first */
- obj_id = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); n += 2;
+ obj_id = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2;
type = fp[n++];
IRDA_DEBUG(4, "%s(), Value type = %d\n", __FUNCTION__, type);
@@ -506,7 +506,7 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
value = irias_new_string_value(fp+n);
break;
case IAS_OCT_SEQ:
- value_len = be16_to_cpu(get_unaligned((__u16 *)(fp+n)));
+ value_len = be16_to_cpu(get_unaligned((__be16 *)(fp+n)));
n += 2;
/* Will truncate to IAS_MAX_OCTET_STRING bytes */
@@ -544,7 +544,7 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self,
{
struct sk_buff *tx_skb;
int n;
- __u32 tmp_be32;
+ __be32 tmp_be32;
__be16 tmp_be16;
__u8 *fp;
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
index 56292ab7d65..b1ee99a59c0 100644
--- a/net/irda/irias_object.c
+++ b/net/irda/irias_object.c
@@ -501,13 +501,12 @@ struct ias_value *irias_new_octseq_value(__u8 *octseq , int len)
len = IAS_MAX_OCTET_STRING;
value->len = len;
- value->t.oct_seq = kmalloc(len, GFP_ATOMIC);
+ value->t.oct_seq = kmemdup(octseq, len, GFP_ATOMIC);
if (value->t.oct_seq == NULL){
IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
kfree(value);
return NULL;
}
- memcpy(value->t.oct_seq, octseq , len);
return value;
}
@@ -522,7 +521,6 @@ struct ias_value *irias_new_missing_value(void)
}
value->type = IAS_MISSING;
- value->len = 0;
return value;
}
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 9b962f24771..2bb04ac0932 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -995,7 +995,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
{
__u8 *frame;
__u8 param_len;
- __u16 tmp_le; /* Temporary value in little endian format */
+ __le16 tmp_le; /* Temporary value in little endian format */
int n=0;
if (skb == NULL) {
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 5073261b9d0..7e5d12ab3b9 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -641,15 +641,13 @@ struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance)
}
/* Allocate a new instance */
- new = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
+ new = kmemdup(orig, sizeof(*new), GFP_ATOMIC);
if (!new) {
IRDA_DEBUG(0, "%s(), unable to kmalloc\n", __FUNCTION__);
spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
flags);
return NULL;
}
- /* Dup */
- memcpy(new, orig, sizeof(struct lsap_cb));
/* new->lap = orig->lap; => done in the memcpy() */
/* new->slsap_sel = orig->slsap_sel; => done in the memcpy() */
new->conn_skb = NULL;
@@ -1678,7 +1676,8 @@ static int irlmp_slsap_inuse(__u8 slsap_sel)
* every IrLAP connection and check every LSAP associated with each
* the connection.
*/
- spin_lock_irqsave(&irlmp->links->hb_spinlock, flags);
+ spin_lock_irqsave_nested(&irlmp->links->hb_spinlock, flags,
+ SINGLE_DEPTH_NESTING);
lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
while (lap != NULL) {
IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, goto errlap;);
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 1ba8c710663..1d26cd33ea1 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -356,14 +356,13 @@ hashbin_t *hashbin_new(int type)
/*
* Allocate new hashbin
*/
- hashbin = kmalloc( sizeof(hashbin_t), GFP_ATOMIC);
+ hashbin = kzalloc(sizeof(*hashbin), GFP_ATOMIC);
if (!hashbin)
return NULL;
/*
* Initialize structure
*/
- memset(hashbin, 0, sizeof(hashbin_t));
hashbin->hb_type = type;
hashbin->magic = HB_MAGIC;
//hashbin->hb_current = NULL;
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 3c2e70b77df..03504f3e499 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -26,6 +26,7 @@
#include <linux/skbuff.h>
#include <linux/init.h>
+#include <linux/fs.h>
#include <linux/seq_file.h>
#include <asm/byteorder.h>
@@ -1099,7 +1100,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
return -ENOMEM;
/* Reserve space for MUX_CONTROL and LAP header */
- skb_reserve(tx_skb, TTP_MAX_HEADER);
+ skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER);
} else {
tx_skb = userdata;
/*
@@ -1147,7 +1148,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
frame[3] = 0x02; /* Value length */
put_unaligned(cpu_to_be16((__u16) max_sdu_size),
- (__u16 *)(frame+4));
+ (__be16 *)(frame+4));
} else {
/* Insert plain TTP header */
frame = skb_push(tx_skb, TTP_HEADER);
@@ -1348,7 +1349,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
return -ENOMEM;
/* Reserve space for MUX_CONTROL and LAP header */
- skb_reserve(tx_skb, TTP_MAX_HEADER);
+ skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER);
} else {
tx_skb = userdata;
/*
@@ -1394,7 +1395,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
frame[3] = 0x02; /* Value length */
put_unaligned(cpu_to_be16((__u16) max_sdu_size),
- (__u16 *)(frame+4));
+ (__be16 *)(frame+4));
} else {
/* Insert TTP header */
frame = skb_push(tx_skb, TTP_HEADER);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 20ff7cca1d0..5dd5094659a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -27,6 +27,7 @@
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <net/xfrm.h>
+#include <linux/audit.h>
#include <net/sock.h>
@@ -1420,6 +1421,9 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
else
err = xfrm_state_update(x);
+ xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+ AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x);
+
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
__xfrm_state_put(x);
@@ -1460,8 +1464,12 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
err = -EPERM;
goto out;
}
-
+
err = xfrm_state_delete(x);
+
+ xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+ AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
if (err < 0)
goto out;
@@ -1637,12 +1645,15 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
{
unsigned proto;
struct km_event c;
+ struct xfrm_audit audit_info;
proto = pfkey_satype2proto(hdr->sadb_msg_satype);
if (proto == 0)
return -EINVAL;
- xfrm_state_flush(proto);
+ audit_info.loginuid = audit_get_loginuid(current->audit_context);
+ audit_info.secid = 0;
+ xfrm_state_flush(proto, &audit_info);
c.data.proto = proto;
c.seq = hdr->sadb_msg_seq;
c.pid = hdr->sadb_msg_pid;
@@ -1767,11 +1778,11 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
/* addresses present only in tunnel mode */
if (t->mode == XFRM_MODE_TUNNEL) {
- switch (xp->family) {
+ struct sockaddr *sa;
+ sa = (struct sockaddr *)(rq+1);
+ switch(sa->sa_family) {
case AF_INET:
- sin = (void*)(rq+1);
- if (sin->sin_family != AF_INET)
- return -EINVAL;
+ sin = (struct sockaddr_in*)sa;
t->saddr.a4 = sin->sin_addr.s_addr;
sin++;
if (sin->sin_family != AF_INET)
@@ -1780,9 +1791,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
- sin6 = (void *)(rq+1);
- if (sin6->sin6_family != AF_INET6)
- return -EINVAL;
+ sin6 = (struct sockaddr_in6*)sa;
memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
sin6++;
if (sin6->sin6_family != AF_INET6)
@@ -1793,7 +1802,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
default:
return -EINVAL;
}
- }
+ t->encap_family = sa->sa_family;
+ } else
+ t->encap_family = xp->family;
+
/* No way to set this via kame pfkey */
t->aalgos = t->ealgos = t->calgos = ~0;
xp->xfrm_nr++;
@@ -1830,18 +1842,25 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
{
+ struct xfrm_tmpl *t;
int sockaddr_size = pfkey_sockaddr_size(xp->family);
- int socklen = (xp->family == AF_INET ?
- sizeof(struct sockaddr_in) :
- sizeof(struct sockaddr_in6));
+ int socklen = 0;
+ int i;
+
+ for (i=0; i<xp->xfrm_nr; i++) {
+ t = xp->xfrm_vec + i;
+ socklen += (t->encap_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ }
return sizeof(struct sadb_msg) +
(sizeof(struct sadb_lifetime) * 3) +
(sizeof(struct sadb_address) * 2) +
(sockaddr_size * 2) +
sizeof(struct sadb_x_policy) +
- (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
- (socklen * 2))) +
+ (xp->xfrm_nr * sizeof(struct sadb_x_ipsecrequest)) +
+ (socklen * 2) +
pfkey_xfrm_policy2sec_ctx_size(xp);
}
@@ -1999,7 +2018,9 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
req_size = sizeof(struct sadb_x_ipsecrequest);
if (t->mode == XFRM_MODE_TUNNEL)
- req_size += 2*socklen;
+ req_size += ((t->encap_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6)) * 2);
else
size -= 2*socklen;
rq = (void*)skb_put(skb, req_size);
@@ -2015,7 +2036,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
rq->sadb_x_ipsecrequest_reqid = t->reqid;
if (t->mode == XFRM_MODE_TUNNEL) {
- switch (xp->family) {
+ switch (t->encap_family) {
case AF_INET:
sin = (void*)(rq+1);
sin->sin_family = AF_INET;
@@ -2195,6 +2216,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
hdr->sadb_msg_type != SADB_X_SPDUPDATE);
+ xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+ AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL);
+
if (err)
goto out;
@@ -2272,6 +2296,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1,
&sel, tmp.security, 1);
security_xfrm_policy_free(&tmp);
+
+ xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+ AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
+
if (xp == NULL)
return -ENOENT;
@@ -2406,8 +2434,11 @@ static int key_notify_policy_flush(struct km_event *c)
static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
{
struct km_event c;
+ struct xfrm_audit audit_info;
- xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN);
+ audit_info.loginuid = audit_get_loginuid(current->audit_context);
+ audit_info.secid = 0;
+ xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info);
c.data.type = XFRM_POLICY_TYPE_MAIN;
c.event = XFRM_MSG_FLUSHPOLICY;
c.pid = hdr->sadb_msg_pid;
@@ -2938,7 +2969,7 @@ out:
return NULL;
}
-static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
{
struct sk_buff *skb;
struct sadb_msg *hdr;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2652ead96c6..190bb3e0518 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -64,7 +64,7 @@ static inline u16 llc_ui_next_link_no(int sap)
*
* Given an ARP header type return the corresponding ethernet protocol.
*/
-static inline u16 llc_proto_type(u16 arphrd)
+static inline __be16 llc_proto_type(u16 arphrd)
{
return arphrd == ARPHRD_IEEE802_TR ?
htons(ETH_P_TR_802_2) : htons(ETH_P_802_2);
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 94d2368ade9..db82aff6e40 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -115,8 +115,8 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
skb->h.raw += llc_len;
skb_pull(skb, llc_len);
if (skb->protocol == htons(ETH_P_802_2)) {
- u16 pdulen = eth_hdr(skb)->h_proto,
- data_size = ntohs(pdulen) - llc_len;
+ __be16 pdulen = eth_hdr(skb)->h_proto;
+ u16 data_size = ntohs(pdulen) - llc_len;
if (unlikely(pskb_trim_rcsum(skb, data_size)))
return 0;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f619c652726..1b853c34d30 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,5 +1,5 @@
menu "Core Netfilter Configuration"
- depends on NET && NETFILTER
+ depends on NET && INET && NETFILTER
config NETFILTER_NETLINK
tristate "Netfilter netlink interface"
@@ -25,19 +25,57 @@ config NETFILTER_NETLINK_LOG
and is also scheduled to replace the old syslog-based ipt_LOG
and ip6t_LOG modules.
-config NF_CONNTRACK
- tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_CONNTRACK=n
- default n
- ---help---
+config NF_CONNTRACK_ENABLED
+ tristate "Netfilter connection tracking support"
+ help
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections.
+ This is required to do Masquerading or other kinds of Network
+ Address Translation (except for Fast NAT). It can also be used to
+ enhance packet filtering (see `Connection state match support'
+ below).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+choice
+ prompt "Netfilter connection tracking support"
+ depends on NF_CONNTRACK_ENABLED
+
+config NF_CONNTRACK_SUPPORT
+ bool "Layer 3 Independent Connection tracking (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
- To compile it as a module, choose M here. If unsure, say N.
+ This is required to do Masquerading or other kinds of Network
+ Address Translation (except for Fast NAT). It can also be used to
+ enhance packet filtering (see `Connection state match support'
+ below).
+
+config IP_NF_CONNTRACK_SUPPORT
+ bool "Layer 3 Dependent Connection tracking (OBSOLETE)"
+ help
+ The old, Layer 3 dependent ip_conntrack subsystem of netfilter.
+
+ This is required to do Masquerading or other kinds of Network
+ Address Translation (except for Fast NAT). It can also be used to
+ enhance packet filtering (see `Connection state match support'
+ below).
+
+endchoice
+
+config NF_CONNTRACK
+ tristate
+ default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
+ default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
+
+config IP_NF_CONNTRACK
+ tristate
+ default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
+ default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
config NF_CT_ACCT
bool "Connection tracking flow accounting"
@@ -82,8 +120,12 @@ config NF_CONNTRACK_EVENTS
If unsure, say `N'.
+config NF_CT_PROTO_GRE
+ tristate
+ depends on EXPERIMENTAL && NF_CONNTRACK
+
config NF_CT_PROTO_SCTP
- tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)'
+ tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
depends on EXPERIMENTAL && NF_CONNTRACK
default n
help
@@ -93,8 +135,23 @@ config NF_CT_PROTO_SCTP
If you want to compile it as a module, say M here and read
Documentation/modules.txt. If unsure, say `N'.
+config NF_CONNTRACK_AMANDA
+ tristate "Amanda backup protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ select TEXTSEARCH
+ select TEXTSEARCH_KMP
+ help
+ If you are running the Amanda backup package <http://www.amanda.org/>
+ on this machine or machines that will be MASQUERADED through this
+ machine, then you may want to enable this feature. This allows the
+ connection tracking and natting code to allow the sub-channels that
+ Amanda requires for communication of the backup data, messages and
+ index.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NF_CONNTRACK_FTP
- tristate "FTP support on new connection tracking (EXPERIMENTAL)"
+ tristate "FTP protocol support (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_CONNTRACK
help
Tracking FTP connections is problematic: special helpers are
@@ -107,6 +164,101 @@ config NF_CONNTRACK_FTP
To compile it as a module, choose M here. If unsure, say N.
+config NF_CONNTRACK_H323
+ tristate "H.323 protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ help
+ H.323 is a VoIP signalling protocol from ITU-T. As one of the most
+ important VoIP protocols, it is widely used by voice hardware and
+ software including voice gateways, IP phones, Netmeeting, OpenPhone,
+ Gnomemeeting, etc.
+
+ With this module you can support H.323 on a connection tracking/NAT
+ firewall.
+
+ This module supports RAS, Fast Start, H.245 Tunnelling, Call
+ Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
+ whiteboard, file transfer, etc. For more information, please
+ visit http://nath323.sourceforge.net/.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_IRC
+ tristate "IRC protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ help
+ There is a commonly-used extension to IRC called
+ Direct Client-to-Client Protocol (DCC). This enables users to send
+ files to each other, and also chat to each other without the need
+ of a server. DCC Sending is used anywhere you send files over IRC,
+ and DCC Chat is most commonly used by Eggdrop bots. If you are
+ using NAT, this extension will enable you to send files and initiate
+ chats. Note that you do NOT need this extension to get files or
+ have others initiate chats, or everything else in IRC.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_NETBIOS_NS
+ tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ help
+ NetBIOS name service requests are sent as broadcast messages from an
+ unprivileged port and responded to with unicast messages to the
+ same port. This make them hard to firewall properly because connection
+ tracking doesn't deal with broadcasts. This helper tracks locally
+ originating NetBIOS name service requests and the corresponding
+ responses. It relies on correct IP address configuration, specifically
+ netmask and broadcast address. When properly configured, the output
+ of "ip address show" should look similar to this:
+
+ $ ip -4 address show eth0
+ 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
+ inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_PPTP
+ tristate "PPtP protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ select NF_CT_PROTO_GRE
+ help
+ This module adds support for PPTP (Point to Point Tunnelling
+ Protocol, RFC2637) connection tracking and NAT.
+
+ If you are running PPTP sessions over a stateful firewall or NAT
+ box, you may want to enable this feature.
+
+ Please note that not all PPTP modes of operation are supported yet.
+ Specifically these limitations exist:
+ - Blindy assumes that control connections are always established
+ in PNS->PAC direction. This is a violation of RFC2637.
+ - Only supports a single call within each session
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_SIP
+ tristate "SIP protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ help
+ SIP is an application-layer control protocol that can establish,
+ modify, and terminate multimedia sessions (conferences) such as
+ Internet telephony calls. With the ip_conntrack_sip and
+ the nf_nat_sip modules you can support the protocol on a connection
+ tracking/NATing firewall.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_TFTP
+ tristate "TFTP protocol support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ help
+ TFTP connection tracking helper, this is required depending
+ on how restrictive your ruleset is.
+ If you are using a tftp client behind -j SNAT or -j MASQUERADING
+ you will need this.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NF_CT_NETLINK
tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
@@ -184,6 +336,17 @@ config NETFILTER_XT_TARGET_NFQUEUE
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_TARGET_NFLOG
+ tristate '"NFLOG" target support'
+ depends on NETFILTER_XTABLES
+ help
+ This option enables the NFLOG target, which allows to LOG
+ messages through the netfilter logging API, which can use
+ either the old LOG target, the old ULOG target or nfnetlink_log
+ as backend.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_NOTRACK
tristate '"NOTRACK" target support'
depends on NETFILTER_XTABLES
@@ -464,5 +627,19 @@ config NETFILTER_XT_MATCH_TCPMSS
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_MATCH_HASHLIMIT
+ tristate '"hashlimit" match support'
+ depends on NETFILTER_XTABLES
+ help
+ This option adds a `hashlimit' match.
+
+ As opposed to `limit', this match dynamically creates a hash table
+ of limit buckets, based on your selection of source/destination
+ addresses and/or ports.
+
+ It enables you to express policies like `10kpps for any given
+ destination address' or `500pps from any given source address'
+ with a single rule.
+
endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a74be492fd0..5dc5574f7e9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,7 +1,10 @@
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
-nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
+
+nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_SYSCTL) += nf_sysctl.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
@@ -11,13 +14,23 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
# SCTP protocol connection tracking
+obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
# netlink interface for nf_conntrack
obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
# connection tracking helpers
+nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o
+
+obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
+obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
+obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
+obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
+obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
@@ -28,6 +41,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
@@ -56,3 +70,4 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index d80b935b3a9..291b8c6862f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -28,7 +28,7 @@
static DEFINE_SPINLOCK(afinfo_lock);
-struct nf_afinfo *nf_afinfo[NPROTO];
+struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
EXPORT_SYMBOL(nf_afinfo);
int nf_register_afinfo(struct nf_afinfo *afinfo)
@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
* of skbuffs queued for userspace, and not deregister a hook unless
* this is zero, but that sucks. Now, we simply check when the
* packets come back: if the hook is gone, the packet is discarded. */
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
static DEFINE_SPINLOCK(nf_hook_lock);
@@ -222,28 +222,21 @@ copy_skb:
}
EXPORT_SYMBOL(skb_make_writable);
-u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
-{
- u_int32_t diff[] = { oldval, newval };
-
- return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
-}
-EXPORT_SYMBOL(nf_csum_update);
-
-u_int16_t nf_proto_csum_update(struct sk_buff *skb,
- u_int32_t oldval, u_int32_t newval,
- u_int16_t csum, int pseudohdr)
+void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
+ __be32 from, __be32 to, int pseudohdr)
{
+ __be32 diff[] = { ~from, to };
if (skb->ip_summed != CHECKSUM_PARTIAL) {
- csum = nf_csum_update(oldval, newval, csum);
+ *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
+ ~csum_unfold(*sum)));
if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = nf_csum_update(oldval, newval, skb->csum);
+ skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+ ~skb->csum);
} else if (pseudohdr)
- csum = ~nf_csum_update(oldval, newval, ~csum);
-
- return csum;
+ *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
+ csum_unfold(*sum)));
}
-EXPORT_SYMBOL(nf_proto_csum_update);
+EXPORT_SYMBOL(nf_proto_csum_replace4);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
new file mode 100644
index 00000000000..b8869eab765
--- /dev/null
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -0,0 +1,238 @@
+/* Amanda extension for IP connection tracking
+ *
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on HW's ip_conntrack_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/textsearch.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+
+static unsigned int master_timeout __read_mostly = 300;
+static char *ts_algo = "kmp";
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda connection tracking module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_amanda");
+
+module_param(master_timeout, uint, 0600);
+MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
+module_param(ts_algo, charp, 0400);
+MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
+
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_amanda_hook);
+
+enum amanda_strings {
+ SEARCH_CONNECT,
+ SEARCH_NEWLINE,
+ SEARCH_DATA,
+ SEARCH_MESG,
+ SEARCH_INDEX,
+};
+
+static struct {
+ char *string;
+ size_t len;
+ struct ts_config *ts;
+} search[] __read_mostly = {
+ [SEARCH_CONNECT] = {
+ .string = "CONNECT ",
+ .len = 8,
+ },
+ [SEARCH_NEWLINE] = {
+ .string = "\n",
+ .len = 1,
+ },
+ [SEARCH_DATA] = {
+ .string = "DATA ",
+ .len = 5,
+ },
+ [SEARCH_MESG] = {
+ .string = "MESG ",
+ .len = 5,
+ },
+ [SEARCH_INDEX] = {
+ .string = "INDEX ",
+ .len = 6,
+ },
+};
+
+static int amanda_help(struct sk_buff **pskb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct ts_state ts;
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ unsigned int dataoff, start, stop, off, i;
+ char pbuf[sizeof("65535")], *tmp;
+ u_int16_t len;
+ __be16 port;
+ int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ int ret = NF_ACCEPT;
+ typeof(nf_nat_amanda_hook) nf_nat_amanda;
+
+ /* Only look at packets from the Amanda server */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* increase the UDP timeout of the master connection as replies from
+ * Amanda clients to the server can be quite delayed */
+ nf_ct_refresh(ct, *pskb, master_timeout * HZ);
+
+ /* No data? */
+ dataoff = protoff + sizeof(struct udphdr);
+ if (dataoff >= (*pskb)->len) {
+ if (net_ratelimit())
+ printk("amanda_help: skblen = %u\n", (*pskb)->len);
+ return NF_ACCEPT;
+ }
+
+ memset(&ts, 0, sizeof(ts));
+ start = skb_find_text(*pskb, dataoff, (*pskb)->len,
+ search[SEARCH_CONNECT].ts, &ts);
+ if (start == UINT_MAX)
+ goto out;
+ start += dataoff + search[SEARCH_CONNECT].len;
+
+ memset(&ts, 0, sizeof(ts));
+ stop = skb_find_text(*pskb, start, (*pskb)->len,
+ search[SEARCH_NEWLINE].ts, &ts);
+ if (stop == UINT_MAX)
+ goto out;
+ stop += start;
+
+ for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
+ memset(&ts, 0, sizeof(ts));
+ off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
+ if (off == UINT_MAX)
+ continue;
+ off += start + search[i].len;
+
+ len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
+ if (skb_copy_bits(*pskb, off, pbuf, len))
+ break;
+ pbuf[len] = '\0';
+
+ port = htons(simple_strtoul(pbuf, &tmp, 10));
+ len = tmp - pbuf;
+ if (port == 0 || len > 5)
+ break;
+
+ exp = nf_conntrack_expect_alloc(ct);
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ nf_conntrack_expect_init(exp, family,
+ &tuple->src.u3, &tuple->dst.u3,
+ IPPROTO_TCP, NULL, &port);
+
+ nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
+ if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_amanda(pskb, ctinfo, off - dataoff,
+ len, exp);
+ else if (nf_conntrack_expect_related(exp) != 0)
+ ret = NF_DROP;
+ nf_conntrack_expect_put(exp);
+ }
+
+out:
+ return ret;
+}
+
+static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
+ {
+ .name = "amanda",
+ .max_expected = 3,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = amanda_help,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(10080),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ },
+ {
+ .name = "amanda",
+ .max_expected = 3,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = amanda_help,
+ .tuple.src.l3num = AF_INET6,
+ .tuple.src.u.udp.port = __constant_htons(10080),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ },
+};
+
+static void __exit nf_conntrack_amanda_fini(void)
+{
+ int i;
+
+ nf_conntrack_helper_unregister(&amanda_helper[0]);
+ nf_conntrack_helper_unregister(&amanda_helper[1]);
+ for (i = 0; i < ARRAY_SIZE(search); i++)
+ textsearch_destroy(search[i].ts);
+}
+
+static int __init nf_conntrack_amanda_init(void)
+{
+ int ret, i;
+
+ ret = -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(search); i++) {
+ search[i].ts = textsearch_prepare(ts_algo, search[i].string,
+ search[i].len,
+ GFP_KERNEL, TS_AUTOLOAD);
+ if (search[i].ts == NULL)
+ goto err1;
+ }
+ ret = nf_conntrack_helper_register(&amanda_helper[0]);
+ if (ret < 0)
+ goto err1;
+ ret = nf_conntrack_helper_register(&amanda_helper[1]);
+ if (ret < 0)
+ goto err2;
+ return 0;
+
+err2:
+ nf_conntrack_helper_unregister(&amanda_helper[0]);
+err1:
+ for (; i >= 0; i--) {
+ if (search[i].ts)
+ textsearch_destroy(search[i].ts);
+ }
+ return ret;
+}
+
+module_init(nf_conntrack_amanda_init);
+module_exit(nf_conntrack_amanda_fini);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 093b3ddc513..9b02ec4012f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -46,15 +46,12 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
-
-/* This rwlock protects the main hash table, protocol/helper/expected
- registrations, conntrack timers*/
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
+#include <linux/mm.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -67,92 +64,32 @@
#endif
DEFINE_RWLOCK(nf_conntrack_lock);
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
/* nf_conntrack_standalone needs this */
atomic_t nf_conntrack_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL_GPL(nf_conntrack_count);
-void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
-LIST_HEAD(nf_conntrack_expect_list);
-struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
-static LIST_HEAD(helpers);
-unsigned int nf_conntrack_htable_size __read_mostly = 0;
-int nf_conntrack_max __read_mostly;
-struct list_head *nf_conntrack_hash __read_mostly;
-static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
-struct nf_conn nf_conntrack_untracked;
-unsigned int nf_ct_log_invalid __read_mostly;
-static LIST_HEAD(unconfirmed);
-static int nf_conntrack_vmalloc __read_mostly;
-
-static unsigned int nf_conntrack_next_id;
-static unsigned int nf_conntrack_expect_next_id;
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
+void (*nf_conntrack_destroyed)(struct nf_conn *conntrack);
+EXPORT_SYMBOL_GPL(nf_conntrack_destroyed);
-DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
+unsigned int nf_conntrack_htable_size __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
-{
- DEBUGP("ecache: delivering events for %p\n", ecache->ct);
- if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
- && ecache->events)
- atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
- ecache->ct);
-
- ecache->events = 0;
- nf_ct_put(ecache->ct);
- ecache->ct = NULL;
-}
+int nf_conntrack_max __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_max);
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling for freeing the skb */
-void nf_ct_deliver_cached_events(const struct nf_conn *ct)
-{
- struct nf_conntrack_ecache *ecache;
+struct list_head *nf_conntrack_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash);
- local_bh_disable();
- ecache = &__get_cpu_var(nf_conntrack_ecache);
- if (ecache->ct == ct)
- __nf_ct_deliver_cached_events(ecache);
- local_bh_enable();
-}
+struct nf_conn nf_conntrack_untracked __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
-/* Deliver cached events for old pending events, if current conntrack != old */
-void __nf_ct_event_cache_init(struct nf_conn *ct)
-{
- struct nf_conntrack_ecache *ecache;
-
- /* take care of delivering potentially old events */
- ecache = &__get_cpu_var(nf_conntrack_ecache);
- BUG_ON(ecache->ct == ct);
- if (ecache->ct)
- __nf_ct_deliver_cached_events(ecache);
- /* initialize for this conntrack/packet */
- ecache->ct = ct;
- nf_conntrack_get(&ct->ct_general);
-}
-
-/* flush the event cache - touches other CPU's data and must not be called
- * while packets are still passing through the code */
-static void nf_ct_event_cache_flush(void)
-{
- struct nf_conntrack_ecache *ecache;
- int cpu;
+unsigned int nf_ct_log_invalid __read_mostly;
+LIST_HEAD(unconfirmed);
+static int nf_conntrack_vmalloc __read_mostly;
- for_each_possible_cpu(cpu) {
- ecache = &per_cpu(nf_conntrack_ecache, cpu);
- if (ecache->ct)
- nf_ct_put(ecache->ct);
- }
-}
-#else
-static inline void nf_ct_event_cache_flush(void) {}
-#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+static unsigned int nf_conntrack_next_id;
DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
@@ -171,7 +108,7 @@ static struct {
size_t size;
/* slab cache pointer */
- kmem_cache_t *cachep;
+ struct kmem_cache *cachep;
/* allocated slab cache + modules which uses this slab cache */
int use;
@@ -184,85 +121,6 @@ DEFINE_RWLOCK(nf_ct_cache_lock);
/* This avoids calling kmem_cache_create() with same name simultaneously */
static DEFINE_MUTEX(nf_ct_cache_mutex);
-extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
-struct nf_conntrack_protocol *
-__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
-{
- if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
- return &nf_conntrack_generic_protocol;
-
- return nf_ct_protos[l3proto][protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct nf_conntrack_protocol *
-nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
-{
- struct nf_conntrack_protocol *p;
-
- preempt_disable();
- p = __nf_ct_proto_find(l3proto, protocol);
- if (!try_module_get(p->me))
- p = &nf_conntrack_generic_protocol;
- preempt_enable();
-
- return p;
-}
-
-void nf_ct_proto_put(struct nf_conntrack_protocol *p)
-{
- module_put(p->me);
-}
-
-struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto)
-{
- struct nf_conntrack_l3proto *p;
-
- preempt_disable();
- p = __nf_ct_l3proto_find(l3proto);
- if (!try_module_get(p->me))
- p = &nf_conntrack_generic_l3proto;
- preempt_enable();
-
- return p;
-}
-
-void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
-{
- module_put(p->me);
-}
-
-int
-nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
- int ret;
- struct nf_conntrack_l3proto *p;
-
-retry: p = nf_ct_l3proto_find_get(l3proto);
- if (p == &nf_conntrack_generic_l3proto) {
- ret = request_module("nf_conntrack-%d", l3proto);
- if (!ret)
- goto retry;
-
- return -EPROTOTYPE;
- }
-
- return 0;
-}
-
-void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
- struct nf_conntrack_l3proto *p;
-
- preempt_disable();
- p = __nf_ct_l3proto_find(l3proto);
- preempt_enable();
-
- module_put(p->me);
-}
-
static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;
@@ -289,7 +147,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name,
{
int ret = 0;
char *cache_name;
- kmem_cache_t *cachep;
+ struct kmem_cache *cachep;
DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
features, name, size);
@@ -363,11 +221,12 @@ out_up_mutex:
mutex_unlock(&nf_ct_cache_mutex);
return ret;
}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_cache);
/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
void nf_conntrack_unregister_cache(u_int32_t features)
{
- kmem_cache_t *cachep;
+ struct kmem_cache *cachep;
char *name;
/*
@@ -397,6 +256,7 @@ void nf_conntrack_unregister_cache(u_int32_t features)
mutex_unlock(&nf_ct_cache_mutex);
}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_cache);
int
nf_ct_get_tuple(const struct sk_buff *skb,
@@ -406,7 +266,7 @@ nf_ct_get_tuple(const struct sk_buff *skb,
u_int8_t protonum,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
- const struct nf_conntrack_protocol *protocol)
+ const struct nf_conntrack_l4proto *l4proto)
{
NF_CT_TUPLE_U_BLANK(tuple);
@@ -417,14 +277,15 @@ nf_ct_get_tuple(const struct sk_buff *skb,
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- return protocol->pkt_to_tuple(skb, dataoff, tuple);
+ return l4proto->pkt_to_tuple(skb, dataoff, tuple);
}
+EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
int
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_l3proto *l3proto,
- const struct nf_conntrack_protocol *protocol)
+ const struct nf_conntrack_l4proto *l4proto)
{
NF_CT_TUPLE_U_BLANK(inverse);
@@ -435,111 +296,14 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
- return protocol->invert_tuple(inverse, orig);
-}
-
-/* nf_conntrack_expect helper functions */
-void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
-{
- struct nf_conn_help *master_help = nfct_help(exp->master);
-
- NF_CT_ASSERT(master_help);
- ASSERT_WRITE_LOCK(&nf_conntrack_lock);
- NF_CT_ASSERT(!timer_pending(&exp->timeout));
-
- list_del(&exp->list);
- NF_CT_STAT_INC(expect_delete);
- master_help->expecting--;
- nf_conntrack_expect_put(exp);
-}
-
-static void expectation_timed_out(unsigned long ul_expect)
-{
- struct nf_conntrack_expect *exp = (void *)ul_expect;
-
- write_lock_bh(&nf_conntrack_lock);
- nf_ct_unlink_expect(exp);
- write_unlock_bh(&nf_conntrack_lock);
- nf_conntrack_expect_put(exp);
-}
-
-struct nf_conntrack_expect *
-__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
-{
- struct nf_conntrack_expect *i;
-
- list_for_each_entry(i, &nf_conntrack_expect_list, list) {
- if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
- atomic_inc(&i->use);
- return i;
- }
- }
- return NULL;
-}
-
-/* Just find a expectation corresponding to a tuple. */
-struct nf_conntrack_expect *
-nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
-{
- struct nf_conntrack_expect *i;
-
- read_lock_bh(&nf_conntrack_lock);
- i = __nf_conntrack_expect_find(tuple);
- read_unlock_bh(&nf_conntrack_lock);
-
- return i;
-}
-
-/* If an expectation for this connection is found, it gets delete from
- * global list then returned. */
-static struct nf_conntrack_expect *
-find_expectation(const struct nf_conntrack_tuple *tuple)
-{
- struct nf_conntrack_expect *i;
-
- list_for_each_entry(i, &nf_conntrack_expect_list, list) {
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
- master ct never got confirmed, we'd hold a reference to it
- and weird things would happen to future packets). */
- if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
- && nf_ct_is_confirmed(i->master)) {
- if (i->flags & NF_CT_EXPECT_PERMANENT) {
- atomic_inc(&i->use);
- return i;
- } else if (del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- return i;
- }
- }
- }
- return NULL;
-}
-
-/* delete all expectations for this conntrack */
-void nf_ct_remove_expectations(struct nf_conn *ct)
-{
- struct nf_conntrack_expect *i, *tmp;
- struct nf_conn_help *help = nfct_help(ct);
-
- /* Optimization: most connection never expect any others. */
- if (!help || help->expecting == 0)
- return;
-
- list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
- if (i->master == ct && del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- nf_conntrack_expect_put(i);
- }
- }
+ return l4proto->invert_tuple(inverse, orig);
}
+EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
static void
clean_from_lists(struct nf_conn *ct)
{
DEBUGP("clean_from_lists(%p)\n", ct);
- ASSERT_WRITE_LOCK(&nf_conntrack_lock);
list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
@@ -551,8 +315,9 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
+ struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
DEBUGP("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
@@ -561,6 +326,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_event(IPCT_DESTROY, ct);
set_bit(IPS_DYING_BIT, &ct->status);
+ if (help && help->helper && help->helper->destroy)
+ help->helper->destroy(ct);
+
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */
@@ -568,9 +336,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (l3proto && l3proto->destroy)
l3proto->destroy(ct);
- proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
- if (proto && proto->destroy)
- proto->destroy(ct);
+ l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+ if (l4proto && l4proto->destroy)
+ l4proto->destroy(ct);
if (nf_conntrack_destroyed)
nf_conntrack_destroyed(ct);
@@ -618,7 +386,6 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple_hash *h;
unsigned int hash = hash_conntrack(tuple);
- ASSERT_READ_LOCK(&nf_conntrack_lock);
list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
nf_ct_tuple_equal(tuple, &h->tuple)) {
@@ -630,6 +397,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
return NULL;
}
+EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
@@ -646,6 +414,7 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
return h;
}
+EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
unsigned int hash,
@@ -669,6 +438,7 @@ void nf_conntrack_hash_insert(struct nf_conn *ct)
__nf_conntrack_hash_insert(ct, hash, repl_hash);
write_unlock_bh(&nf_conntrack_lock);
}
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
/* Confirm a connection given skb; places it in hash table */
int
@@ -746,6 +516,7 @@ out:
write_unlock_bh(&nf_conntrack_lock);
return NF_DROP;
}
+EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
/* Returns true if a connection correspondings to the tuple (required
for NAT). */
@@ -761,6 +532,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
return h != NULL;
}
+EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
@@ -794,53 +566,13 @@ static int early_drop(struct list_head *chain)
return dropped;
}
-static struct nf_conntrack_helper *
-__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
-{
- struct nf_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
- return h;
- }
- return NULL;
-}
-
-struct nf_conntrack_helper *
-nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
-{
- struct nf_conntrack_helper *helper;
-
- /* need nf_conntrack_lock to assure that helper exists until
- * try_module_get() is called */
- read_lock_bh(&nf_conntrack_lock);
-
- helper = __nf_ct_helper_find(tuple);
- if (helper) {
- /* need to increase module usage count to assure helper will
- * not go away while the caller is e.g. busy putting a
- * conntrack in the hash that uses the helper */
- if (!try_module_get(helper->me))
- helper = NULL;
- }
-
- read_unlock_bh(&nf_conntrack_lock);
-
- return helper;
-}
-
-void nf_ct_helper_put(struct nf_conntrack_helper *helper)
-{
- module_put(helper->me);
-}
-
static struct nf_conn *
__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
- const struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ u_int32_t features)
{
struct nf_conn *conntrack = NULL;
- u_int32_t features = 0;
struct nf_conntrack_helper *helper;
if (unlikely(!nf_conntrack_hash_rnd_initted)) {
@@ -866,12 +598,13 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
}
/* find features needed by this conntrack. */
- features = l3proto->get_features(orig);
+ features |= l3proto->get_features(orig);
/* FIXME: protect helper list per RCU */
read_lock_bh(&nf_conntrack_lock);
helper = __nf_ct_helper_find(repl);
- if (helper)
+ /* NAT might want to assign a helper later */
+ if (helper || features & NF_CT_F_NAT)
features |= NF_CT_F_HELP;
read_unlock_bh(&nf_conntrack_lock);
@@ -893,12 +626,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
memset(conntrack, 0, nf_ct_cache[features].size);
conntrack->features = features;
- if (helper) {
- struct nf_conn_help *help = nfct_help(conntrack);
- NF_CT_ASSERT(help);
- help->helper = helper;
- }
-
atomic_set(&conntrack->ct_general.use, 1);
conntrack->ct_general.destroy = destroy_conntrack;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
@@ -922,8 +649,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
struct nf_conntrack_l3proto *l3proto;
l3proto = __nf_ct_l3proto_find(orig->src.l3num);
- return __nf_conntrack_alloc(orig, repl, l3proto);
+ return __nf_conntrack_alloc(orig, repl, l3proto, 0);
}
+EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
void nf_conntrack_free(struct nf_conn *conntrack)
{
@@ -934,32 +662,40 @@ void nf_conntrack_free(struct nf_conn *conntrack)
kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
atomic_dec(&nf_conntrack_count);
}
+EXPORT_SYMBOL_GPL(nf_conntrack_free);
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
static struct nf_conntrack_tuple_hash *
init_conntrack(const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_protocol *protocol,
+ struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff)
{
struct nf_conn *conntrack;
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_expect *exp;
+ u_int32_t features = 0;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
DEBUGP("Can't invert tuple.\n");
return NULL;
}
- conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
+ read_lock_bh(&nf_conntrack_lock);
+ exp = __nf_conntrack_expect_find(tuple);
+ if (exp && exp->helper)
+ features = NF_CT_F_HELP;
+ read_unlock_bh(&nf_conntrack_lock);
+
+ conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features);
if (conntrack == NULL || IS_ERR(conntrack)) {
DEBUGP("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)conntrack;
}
- if (!protocol->new(conntrack, skb, dataoff)) {
+ if (!l4proto->new(conntrack, skb, dataoff)) {
nf_conntrack_free(conntrack);
DEBUGP("init conntrack: can't track with proto module\n");
return NULL;
@@ -974,6 +710,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
conntrack->master = exp->master;
+ if (exp->helper)
+ nfct_help(conntrack)->helper = exp->helper;
#ifdef CONFIG_NF_CONNTRACK_MARK
conntrack->mark = exp->master->mark;
#endif
@@ -982,8 +720,13 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
#endif
nf_conntrack_get(&conntrack->master->ct_general);
NF_CT_STAT_INC(expect_new);
- } else
+ } else {
+ struct nf_conn_help *help = nfct_help(conntrack);
+
+ if (help)
+ help->helper = __nf_ct_helper_find(&repl_tuple);
NF_CT_STAT_INC(new);
+ }
/* Overload tuple linked list to put us in unconfirmed list. */
list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
@@ -1006,7 +749,7 @@ resolve_normal_ct(struct sk_buff *skb,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_protocol *proto,
+ struct nf_conntrack_l4proto *l4proto,
int *set_reply,
enum ip_conntrack_info *ctinfo)
{
@@ -1016,7 +759,7 @@ resolve_normal_ct(struct sk_buff *skb,
if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
dataoff, l3num, protonum, &tuple, l3proto,
- proto)) {
+ l4proto)) {
DEBUGP("resolve_normal_ct: Can't get tuple\n");
return NULL;
}
@@ -1024,7 +767,7 @@ resolve_normal_ct(struct sk_buff *skb,
/* look for tuple match */
h = nf_conntrack_find_get(&tuple, NULL);
if (!h) {
- h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
+ h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -1062,7 +805,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
unsigned int dataoff;
u_int8_t protonum;
int set_reply = 0;
@@ -1080,19 +823,19 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
return -ret;
}
- proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
+ l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum);
/* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
- if (proto->error != NULL &&
- (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+ if (l4proto->error != NULL &&
+ (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
NF_CT_STAT_INC(error);
NF_CT_STAT_INC(invalid);
return -ret;
}
- ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
+ ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto,
&set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
@@ -1108,7 +851,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
NF_CT_ASSERT((*pskb)->nfct);
- ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+ ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
if (ret < 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1124,255 +867,38 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
return ret;
}
+EXPORT_SYMBOL_GPL(nf_conntrack_in);
int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig)
{
return nf_ct_invert_tuple(inverse, orig,
__nf_ct_l3proto_find(orig->src.l3num),
- __nf_ct_proto_find(orig->src.l3num,
+ __nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
}
+EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
-/* Would two expected things clash? */
-static inline int expect_clash(const struct nf_conntrack_expect *a,
- const struct nf_conntrack_expect *b)
-{
- /* Part covered by intersection of masks must be unequal,
- otherwise they clash */
- struct nf_conntrack_tuple intersect_mask;
- int count;
-
- intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
- intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
- intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
- intersect_mask.dst.protonum = a->mask.dst.protonum
- & b->mask.dst.protonum;
-
- for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
- intersect_mask.src.u3.all[count] =
- a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
- }
-
- for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
- intersect_mask.dst.u3.all[count] =
- a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
- }
-
- return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
-}
-
-static inline int expect_matches(const struct nf_conntrack_expect *a,
- const struct nf_conntrack_expect *b)
-{
- return a->master == b->master
- && nf_ct_tuple_equal(&a->tuple, &b->tuple)
- && nf_ct_tuple_equal(&a->mask, &b->mask);
-}
-
-/* Generally a bad idea to call this: could have matched already. */
-void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
-{
- struct nf_conntrack_expect *i;
-
- write_lock_bh(&nf_conntrack_lock);
- /* choose the the oldest expectation to evict */
- list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
- if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- write_unlock_bh(&nf_conntrack_lock);
- nf_conntrack_expect_put(i);
- return;
- }
- }
- write_unlock_bh(&nf_conntrack_lock);
-}
-
-/* We don't increase the master conntrack refcount for non-fulfilled
- * conntracks. During the conntrack destruction, the expectations are
- * always killed before the conntrack itself */
-struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
-{
- struct nf_conntrack_expect *new;
-
- new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
- if (!new) {
- DEBUGP("expect_related: OOM allocating expect\n");
- return NULL;
- }
- new->master = me;
- atomic_set(&new->use, 1);
- return new;
-}
-
-void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
+/* Alter reply tuple (maybe alter helper). This is for NAT, and is
+ implicitly racy: see __nf_conntrack_confirm */
+void nf_conntrack_alter_reply(struct nf_conn *ct,
+ const struct nf_conntrack_tuple *newreply)
{
- if (atomic_dec_and_test(&exp->use))
- kmem_cache_free(nf_conntrack_expect_cachep, exp);
-}
-
-static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
-{
- struct nf_conn_help *master_help = nfct_help(exp->master);
-
- atomic_inc(&exp->use);
- master_help->expecting++;
- list_add(&exp->list, &nf_conntrack_expect_list);
-
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
- exp->timeout.function = expectation_timed_out;
- exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
- add_timer(&exp->timeout);
-
- exp->id = ++nf_conntrack_expect_next_id;
- atomic_inc(&exp->use);
- NF_CT_STAT_INC(expect_create);
-}
-
-/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct nf_conn *master)
-{
- struct nf_conntrack_expect *i;
-
- list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
- if (i->master == master) {
- if (del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- nf_conntrack_expect_put(i);
- }
- break;
- }
- }
-}
-
-static inline int refresh_timer(struct nf_conntrack_expect *i)
-{
- struct nf_conn_help *master_help = nfct_help(i->master);
-
- if (!del_timer(&i->timeout))
- return 0;
-
- i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
- add_timer(&i->timeout);
- return 1;
-}
-
-int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
-{
- struct nf_conntrack_expect *i;
- struct nf_conn *master = expect->master;
- struct nf_conn_help *master_help = nfct_help(master);
- int ret;
-
- NF_CT_ASSERT(master_help);
-
- DEBUGP("nf_conntrack_expect_related %p\n", related_to);
- DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
- DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
-
- write_lock_bh(&nf_conntrack_lock);
- list_for_each_entry(i, &nf_conntrack_expect_list, list) {
- if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
- ret = 0;
- goto out;
- }
- } else if (expect_clash(i, expect)) {
- ret = -EBUSY;
- goto out;
- }
- }
- /* Will be over limit? */
- if (master_help->helper->max_expected &&
- master_help->expecting >= master_help->helper->max_expected)
- evict_oldest_expect(master);
-
- nf_conntrack_expect_insert(expect);
- nf_conntrack_expect_event(IPEXP_NEW, expect);
- ret = 0;
-out:
- write_unlock_bh(&nf_conntrack_lock);
- return ret;
-}
-
-int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
-{
- int ret;
- BUG_ON(me->timeout == 0);
-
- ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
- sizeof(struct nf_conn)
- + sizeof(struct nf_conn_help)
- + __alignof__(struct nf_conn_help));
- if (ret < 0) {
- printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
- return ret;
- }
- write_lock_bh(&nf_conntrack_lock);
- list_add(&me->list, &helpers);
- write_unlock_bh(&nf_conntrack_lock);
-
- return 0;
-}
-
-struct nf_conntrack_helper *
-__nf_conntrack_helper_find_byname(const char *name)
-{
- struct nf_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (!strcmp(h->name, name))
- return h;
- }
-
- return NULL;
-}
-
-static inline void unhelp(struct nf_conntrack_tuple_hash *i,
- const struct nf_conntrack_helper *me)
-{
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
- if (help && help->helper == me) {
- nf_conntrack_event(IPCT_HELPER, ct);
- help->helper = NULL;
- }
-}
-
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
-{
- unsigned int i;
- struct nf_conntrack_tuple_hash *h;
- struct nf_conntrack_expect *exp, *tmp;
-
- /* Need write lock here, to delete helper. */
write_lock_bh(&nf_conntrack_lock);
- list_del(&me->list);
-
- /* Get rid of expectations */
- list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
- struct nf_conn_help *help = nfct_help(exp->master);
- if (help->helper == me && del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_conntrack_expect_put(exp);
- }
- }
+ /* Should be unconfirmed, so not in hash table yet */
+ NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
- /* Get rid of expecteds, set helpers to NULL. */
- list_for_each_entry(h, &unconfirmed, list)
- unhelp(h, me);
- for (i = 0; i < nf_conntrack_htable_size; i++) {
- list_for_each_entry(h, &nf_conntrack_hash[i], list)
- unhelp(h, me);
- }
- write_unlock_bh(&nf_conntrack_lock);
+ DEBUGP("Altering reply tuple of %p to ", ct);
+ NF_CT_DUMP_TUPLE(newreply);
- /* Someone could be still looking at the helper in a bh. */
- synchronize_net();
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+ if (!ct->master && help && help->expecting == 0)
+ help->helper = __nf_ct_helper_find(newreply);
+ write_unlock_bh(&nf_conntrack_lock);
}
+EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
void __nf_ct_refresh_acct(struct nf_conn *ct,
@@ -1399,9 +925,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
ct->timeout.expires = extra_jiffies;
event = IPCT_REFRESH;
} else {
- /* Need del_timer for race avoidance (may already be dying). */
- if (del_timer(&ct->timeout)) {
- ct->timeout.expires = jiffies + extra_jiffies;
+ unsigned long newtime = jiffies + extra_jiffies;
+
+ /* Only update the timeout if the new timeout is at least
+ HZ jiffies from the old timeout. Need del_timer for race
+ avoidance (may already be dying). */
+ if (newtime - ct->timeout.expires >= HZ
+ && del_timer(&ct->timeout)) {
+ ct->timeout.expires = newtime;
add_timer(&ct->timeout);
event = IPCT_REFRESH;
}
@@ -1412,9 +943,10 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
ct->counters[CTINFO2DIR(ctinfo)].packets++;
ct->counters[CTINFO2DIR(ctinfo)].bytes +=
skb->len - (unsigned int)(skb->nh.raw - skb->data);
- if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
- || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
- event |= IPCT_COUNTER_FILLING;
+
+ if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+ || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+ event |= IPCT_COUNTER_FILLING;
}
#endif
@@ -1424,6 +956,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
if (event)
nf_conntrack_event_cache(event, skb);
}
+EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
#if defined(CONFIG_NF_CT_NETLINK) || \
defined(CONFIG_NF_CT_NETLINK_MODULE)
@@ -1448,6 +981,7 @@ int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
nfattr_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nfattr);
static const size_t cta_min_proto[CTA_PROTO_MAX] = {
[CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
@@ -1463,13 +997,12 @@ int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
return -EINVAL;
- t->src.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
- t->dst.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+ t->src.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+ t->dst.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
return 0;
}
+EXPORT_SYMBOL_GPL(nf_ct_port_nfattr_to_tuple);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
@@ -1490,6 +1023,7 @@ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
nskb->nfctinfo = ctinfo;
nf_conntrack_get(nskb->nfct);
}
+EXPORT_SYMBOL_GPL(__nf_conntrack_attach);
static inline int
do_iter(const struct nf_conntrack_tuple_hash *i,
@@ -1520,9 +1054,10 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
if (iter(ct, data))
goto found;
}
+ write_unlock_bh(&nf_conntrack_lock);
return NULL;
found:
- atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
+ atomic_inc(&ct->ct_general.use);
write_unlock_bh(&nf_conntrack_lock);
return ct;
}
@@ -1542,6 +1077,7 @@ nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
nf_ct_put(ct);
}
}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
static int kill_all(struct nf_conn *i, void *data)
{
@@ -1557,10 +1093,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
get_order(sizeof(struct list_head) * size));
}
-void nf_conntrack_flush()
+void nf_conntrack_flush(void)
{
nf_ct_iterate_cleanup(kill_all, NULL);
}
+EXPORT_SYMBOL_GPL(nf_conntrack_flush);
/* Mishearing the voices in his head, our hero wonders how he's
supposed to kill the mall. */
@@ -1598,6 +1135,8 @@ void nf_conntrack_cleanup(void)
free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
nf_conntrack_htable_size);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic);
+
/* free l3proto protocol tables */
for (i = 0; i < PF_MAX; i++)
if (nf_ct_protos[i]) {
@@ -1723,10 +1262,14 @@ int __init nf_conntrack_init(void)
goto err_free_conntrack_slab;
}
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic);
+ if (ret < 0)
+ goto out_free_expect_slab;
+
/* Don't NEED lock here, but good form anyway. */
write_lock_bh(&nf_conntrack_lock);
- for (i = 0; i < PF_MAX; i++)
- nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
+ for (i = 0; i < AF_MAX; i++)
+ nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
write_unlock_bh(&nf_conntrack_lock);
/* For use by REJECT target */
@@ -1740,6 +1283,8 @@ int __init nf_conntrack_init(void)
return ret;
+out_free_expect_slab:
+ kmem_cache_destroy(nf_conntrack_expect_cachep);
err_free_conntrack_slab:
nf_conntrack_unregister_cache(NF_CT_F_BASIC);
err_free_hash:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
new file mode 100644
index 00000000000..1a223e0c085
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -0,0 +1,93 @@
+/* Event cache for netfilter. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+
+ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
+
+DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
+EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
+
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+static inline void
+__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+{
+ if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
+ && ecache->events)
+ atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
+ ecache->ct);
+
+ ecache->events = 0;
+ nf_ct_put(ecache->ct);
+ ecache->ct = NULL;
+}
+
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling for freeing the skb */
+void nf_ct_deliver_cached_events(const struct nf_conn *ct)
+{
+ struct nf_conntrack_ecache *ecache;
+
+ local_bh_disable();
+ ecache = &__get_cpu_var(nf_conntrack_ecache);
+ if (ecache->ct == ct)
+ __nf_ct_deliver_cached_events(ecache);
+ local_bh_enable();
+}
+EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+
+/* Deliver cached events for old pending events, if current conntrack != old */
+void __nf_ct_event_cache_init(struct nf_conn *ct)
+{
+ struct nf_conntrack_ecache *ecache;
+
+ /* take care of delivering potentially old events */
+ ecache = &__get_cpu_var(nf_conntrack_ecache);
+ BUG_ON(ecache->ct == ct);
+ if (ecache->ct)
+ __nf_ct_deliver_cached_events(ecache);
+ /* initialize for this conntrack/packet */
+ ecache->ct = ct;
+ nf_conntrack_get(&ct->ct_general);
+}
+EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
+
+/* flush the event cache - touches other CPU's data and must not be called
+ * while packets are still passing through the code */
+void nf_ct_event_cache_flush(void)
+{
+ struct nf_conntrack_ecache *ecache;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ecache = &per_cpu(nf_conntrack_ecache, cpu);
+ if (ecache->ct)
+ nf_ct_put(ecache->ct);
+ }
+}
+
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
new file mode 100644
index 00000000000..9cbf926cdd1
--- /dev/null
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -0,0 +1,445 @@
+/* Expectation handling for nf_conntrack. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+LIST_HEAD(nf_conntrack_expect_list);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
+
+struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
+static unsigned int nf_conntrack_expect_next_id;
+
+/* nf_conntrack_expect helper functions */
+void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+{
+ struct nf_conn_help *master_help = nfct_help(exp->master);
+
+ NF_CT_ASSERT(master_help);
+ NF_CT_ASSERT(!timer_pending(&exp->timeout));
+
+ list_del(&exp->list);
+ NF_CT_STAT_INC(expect_delete);
+ master_help->expecting--;
+ nf_conntrack_expect_put(exp);
+}
+EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
+
+static void expectation_timed_out(unsigned long ul_expect)
+{
+ struct nf_conntrack_expect *exp = (void *)ul_expect;
+
+ write_lock_bh(&nf_conntrack_lock);
+ nf_ct_unlink_expect(exp);
+ write_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_expect_put(exp);
+}
+
+struct nf_conntrack_expect *
+__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_expect *i;
+
+ list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+ if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
+ return i;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
+
+/* Just find a expectation corresponding to a tuple. */
+struct nf_conntrack_expect *
+nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_expect *i;
+
+ read_lock_bh(&nf_conntrack_lock);
+ i = __nf_conntrack_expect_find(tuple);
+ if (i)
+ atomic_inc(&i->use);
+ read_unlock_bh(&nf_conntrack_lock);
+
+ return i;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
+
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+struct nf_conntrack_expect *
+find_expectation(const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_expect *exp;
+
+ exp = __nf_conntrack_expect_find(tuple);
+ if (!exp)
+ return NULL;
+
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+ master ct never got confirmed, we'd hold a reference to it
+ and weird things would happen to future packets). */
+ if (!nf_ct_is_confirmed(exp->master))
+ return NULL;
+
+ if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ atomic_inc(&exp->use);
+ return exp;
+ } else if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ return exp;
+ }
+
+ return NULL;
+}
+
+/* delete all expectations for this conntrack */
+void nf_ct_remove_expectations(struct nf_conn *ct)
+{
+ struct nf_conntrack_expect *i, *tmp;
+ struct nf_conn_help *help = nfct_help(ct);
+
+ /* Optimization: most connection never expect any others. */
+ if (!help || help->expecting == 0)
+ return;
+
+ list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
+ if (i->master == ct && del_timer(&i->timeout)) {
+ nf_ct_unlink_expect(i);
+ nf_conntrack_expect_put(i);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
+
+/* Would two expected things clash? */
+static inline int expect_clash(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b)
+{
+ /* Part covered by intersection of masks must be unequal,
+ otherwise they clash */
+ struct nf_conntrack_tuple intersect_mask;
+ int count;
+
+ intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
+ intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
+ intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
+ intersect_mask.dst.protonum = a->mask.dst.protonum
+ & b->mask.dst.protonum;
+
+ for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+ intersect_mask.src.u3.all[count] =
+ a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
+ }
+
+ for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
+ intersect_mask.dst.u3.all[count] =
+ a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
+ }
+
+ return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+}
+
+static inline int expect_matches(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b)
+{
+ return a->master == b->master
+ && nf_ct_tuple_equal(&a->tuple, &b->tuple)
+ && nf_ct_tuple_equal(&a->mask, &b->mask);
+}
+
+/* Generally a bad idea to call this: could have matched already. */
+void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
+{
+ struct nf_conntrack_expect *i;
+
+ write_lock_bh(&nf_conntrack_lock);
+ /* choose the the oldest expectation to evict */
+ list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+ if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+ nf_ct_unlink_expect(i);
+ write_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_expect_put(i);
+ return;
+ }
+ }
+ write_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
+
+/* We don't increase the master conntrack refcount for non-fulfilled
+ * conntracks. During the conntrack destruction, the expectations are
+ * always killed before the conntrack itself */
+struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
+{
+ struct nf_conntrack_expect *new;
+
+ new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
+ if (!new)
+ return NULL;
+
+ new->master = me;
+ atomic_set(&new->use, 1);
+ return new;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
+
+void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
+ union nf_conntrack_address *saddr,
+ union nf_conntrack_address *daddr,
+ u_int8_t proto, __be16 *src, __be16 *dst)
+{
+ int len;
+
+ if (family == AF_INET)
+ len = 4;
+ else
+ len = 16;
+
+ exp->flags = 0;
+ exp->expectfn = NULL;
+ exp->helper = NULL;
+ exp->tuple.src.l3num = family;
+ exp->tuple.dst.protonum = proto;
+ exp->mask.src.l3num = 0xFFFF;
+ exp->mask.dst.protonum = 0xFF;
+
+ if (saddr) {
+ memcpy(&exp->tuple.src.u3, saddr, len);
+ if (sizeof(exp->tuple.src.u3) > len)
+ /* address needs to be cleared for nf_ct_tuple_equal */
+ memset((void *)&exp->tuple.src.u3 + len, 0x00,
+ sizeof(exp->tuple.src.u3) - len);
+ memset(&exp->mask.src.u3, 0xFF, len);
+ if (sizeof(exp->mask.src.u3) > len)
+ memset((void *)&exp->mask.src.u3 + len, 0x00,
+ sizeof(exp->mask.src.u3) - len);
+ } else {
+ memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
+ memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
+ }
+
+ if (daddr) {
+ memcpy(&exp->tuple.dst.u3, daddr, len);
+ if (sizeof(exp->tuple.dst.u3) > len)
+ /* address needs to be cleared for nf_ct_tuple_equal */
+ memset((void *)&exp->tuple.dst.u3 + len, 0x00,
+ sizeof(exp->tuple.dst.u3) - len);
+ memset(&exp->mask.dst.u3, 0xFF, len);
+ if (sizeof(exp->mask.dst.u3) > len)
+ memset((void *)&exp->mask.dst.u3 + len, 0x00,
+ sizeof(exp->mask.dst.u3) - len);
+ } else {
+ memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
+ memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
+ }
+
+ if (src) {
+ exp->tuple.src.u.all = (__force u16)*src;
+ exp->mask.src.u.all = 0xFFFF;
+ } else {
+ exp->tuple.src.u.all = 0;
+ exp->mask.src.u.all = 0;
+ }
+
+ if (dst) {
+ exp->tuple.dst.u.all = (__force u16)*dst;
+ exp->mask.dst.u.all = 0xFFFF;
+ } else {
+ exp->tuple.dst.u.all = 0;
+ exp->mask.dst.u.all = 0;
+ }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
+
+void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
+{
+ if (atomic_dec_and_test(&exp->use))
+ kmem_cache_free(nf_conntrack_expect_cachep, exp);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
+
+static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
+{
+ struct nf_conn_help *master_help = nfct_help(exp->master);
+
+ atomic_inc(&exp->use);
+ master_help->expecting++;
+ list_add(&exp->list, &nf_conntrack_expect_list);
+
+ init_timer(&exp->timeout);
+ exp->timeout.data = (unsigned long)exp;
+ exp->timeout.function = expectation_timed_out;
+ exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
+ add_timer(&exp->timeout);
+
+ exp->id = ++nf_conntrack_expect_next_id;
+ atomic_inc(&exp->use);
+ NF_CT_STAT_INC(expect_create);
+}
+
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct nf_conn *master)
+{
+ struct nf_conntrack_expect *i;
+
+ list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+ if (i->master == master) {
+ if (del_timer(&i->timeout)) {
+ nf_ct_unlink_expect(i);
+ nf_conntrack_expect_put(i);
+ }
+ break;
+ }
+ }
+}
+
+static inline int refresh_timer(struct nf_conntrack_expect *i)
+{
+ struct nf_conn_help *master_help = nfct_help(i->master);
+
+ if (!del_timer(&i->timeout))
+ return 0;
+
+ i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
+ add_timer(&i->timeout);
+ return 1;
+}
+
+int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
+{
+ struct nf_conntrack_expect *i;
+ struct nf_conn *master = expect->master;
+ struct nf_conn_help *master_help = nfct_help(master);
+ int ret;
+
+ NF_CT_ASSERT(master_help);
+
+ write_lock_bh(&nf_conntrack_lock);
+ list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+ if (expect_matches(i, expect)) {
+ /* Refresh timer: if it's dying, ignore.. */
+ if (refresh_timer(i)) {
+ ret = 0;
+ goto out;
+ }
+ } else if (expect_clash(i, expect)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+ /* Will be over limit? */
+ if (master_help->helper->max_expected &&
+ master_help->expecting >= master_help->helper->max_expected)
+ evict_oldest_expect(master);
+
+ nf_conntrack_expect_insert(expect);
+ nf_conntrack_expect_event(IPEXP_NEW, expect);
+ ret = 0;
+out:
+ write_unlock_bh(&nf_conntrack_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
+
+#ifdef CONFIG_PROC_FS
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct list_head *e = &nf_conntrack_expect_list;
+ loff_t i;
+
+ /* strange seq_file api calls stop even if we fail,
+ * thus we need to grab lock since stop unlocks */
+ read_lock_bh(&nf_conntrack_lock);
+
+ if (list_empty(e))
+ return NULL;
+
+ for (i = 0; i <= *pos; i++) {
+ e = e->next;
+ if (e == &nf_conntrack_expect_list)
+ return NULL;
+ }
+ return e;
+}
+
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct list_head *e = v;
+
+ ++*pos;
+ e = e->next;
+
+ if (e == &nf_conntrack_expect_list)
+ return NULL;
+
+ return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
+ read_unlock_bh(&nf_conntrack_lock);
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_expect *expect = v;
+
+ if (expect->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+ ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+ seq_printf(s, "l3proto = %u proto=%u ",
+ expect->tuple.src.l3num,
+ expect->tuple.dst.protonum);
+ print_tuple(s, &expect->tuple,
+ __nf_ct_l3proto_find(expect->tuple.src.l3num),
+ __nf_ct_l4proto_find(expect->tuple.src.l3num,
+ expect->tuple.dst.protonum));
+ return seq_putc(s, '\n');
+}
+
+static struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &exp_seq_ops);
+}
+
+struct file_operations exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+#endif /* CONFIG_PROC_FS */
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 0c17a5bd112..92a94716876 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -26,12 +26,15 @@
#include <net/tcp.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <linux/netfilter/nf_conntrack_ftp.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
MODULE_DESCRIPTION("ftp connection tracking helper");
+MODULE_ALIAS("ip_conntrack_ftp");
/* This is slow, but it's simple. --RR */
static char *ftp_buffer;
@@ -48,7 +51,7 @@ module_param(loose, bool, 0600);
unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
+ enum nf_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp,
@@ -71,7 +74,7 @@ static struct ftp_search {
size_t plen;
char skip;
char term;
- enum ip_ct_ftp_type ftptype;
+ enum nf_ct_ftp_type ftptype;
int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
} search[IP_CT_DIR_MAX][2] = {
[IP_CT_DIR_ORIGINAL] = {
@@ -80,7 +83,7 @@ static struct ftp_search {
.plen = sizeof("PORT") - 1,
.skip = ' ',
.term = '\r',
- .ftptype = IP_CT_FTP_PORT,
+ .ftptype = NF_CT_FTP_PORT,
.getnum = try_rfc959,
},
{
@@ -88,7 +91,7 @@ static struct ftp_search {
.plen = sizeof("EPRT") - 1,
.skip = ' ',
.term = '\r',
- .ftptype = IP_CT_FTP_EPRT,
+ .ftptype = NF_CT_FTP_EPRT,
.getnum = try_eprt,
},
},
@@ -98,7 +101,7 @@ static struct ftp_search {
.plen = sizeof("227 ") - 1,
.skip = '(',
.term = ')',
- .ftptype = IP_CT_FTP_PASV,
+ .ftptype = NF_CT_FTP_PASV,
.getnum = try_rfc959,
},
{
@@ -106,7 +109,7 @@ static struct ftp_search {
.plen = sizeof("229 ") - 1,
.skip = '(',
.term = ')',
- .ftptype = IP_CT_FTP_EPSV,
+ .ftptype = NF_CT_FTP_EPSV,
.getnum = try_epsv_response,
},
},
@@ -171,7 +174,7 @@ static int try_rfc959(const char *data, size_t dlen,
/* Grab port: number up to delimiter */
static int get_port(const char *data, int start, size_t dlen, char delim,
- u_int16_t *port)
+ __be16 *port)
{
u_int16_t tmp_port = 0;
int i;
@@ -317,7 +320,7 @@ static int find_pattern(const char *data, size_t dlen,
}
/* Look up to see if we're just after a \n. */
-static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
+static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir)
{
unsigned int i;
@@ -328,7 +331,7 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
}
/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
struct sk_buff *skb)
{
unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -364,12 +367,12 @@ static int help(struct sk_buff **pskb,
u32 seq;
int dir = CTINFO2DIR(ctinfo);
unsigned int matchlen, matchoff;
- struct ip_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
+ struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
struct nf_conntrack_expect *exp;
struct nf_conntrack_man cmd = {};
-
unsigned int i;
int found = 0, ends_in_nl;
+ typeof(nf_nat_ftp_hook) nf_nat_ftp;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED
@@ -500,12 +503,12 @@ static int help(struct sk_buff **pskb,
.u = { .tcp = { 0 }},
},
.dst = { .protonum = 0xFF,
- .u = { .tcp = { 0xFFFF }},
+ .u = { .tcp = { __constant_htons(0xFFFF) }},
},
};
if (cmd.l3num == PF_INET) {
- exp->mask.src.u3.ip = 0xFFFFFFFF;
- exp->mask.dst.u3.ip = 0xFFFFFFFF;
+ exp->mask.src.u3.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u3.ip = htonl(0xFFFFFFFF);
} else {
memset(exp->mask.src.u3.ip6, 0xFF,
sizeof(exp->mask.src.u3.ip6));
@@ -514,13 +517,15 @@ static int help(struct sk_buff **pskb,
}
exp->expectfn = NULL;
+ exp->helper = NULL;
exp->flags = 0;
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
- if (nf_nat_ftp_hook)
- ret = nf_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype,
- matchoff, matchlen, exp, &seq);
+ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
+ if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
+ matchoff, matchlen, exp, &seq);
else {
/* Can't expect this? Best to drop packet now. */
if (nf_conntrack_expect_related(exp) != 0)
@@ -584,7 +589,8 @@ static int __init nf_conntrack_ftp_init(void)
for (j = 0; j < 2; j++) {
ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i][j].mask.src.u.tcp.port = 0xFFFF;
+ ftp[i][j].mask.src.l3num = 0xFFFF;
+ ftp[i][j].mask.src.u.tcp.port = htons(0xFFFF);
ftp[i][j].mask.dst.protonum = 0xFF;
ftp[i][j].max_expected = 1;
ftp[i][j].timeout = 5 * 60; /* 5 Minutes */
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 26dfecadb33..f6fad713d48 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -15,7 +15,7 @@
#else
#include <stdio.h>
#endif
-#include <linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h>
+#include <linux/netfilter/nf_conntrack_h323_asn1.h>
/* Trace Flag */
#ifndef H323_TRACE
@@ -144,7 +144,7 @@ static decoder_t Decoders[] = {
/****************************************************************************
* H.323 Types
****************************************************************************/
-#include "ip_conntrack_helper_h323_types.c"
+#include "nf_conntrack_h323_types.c"
/****************************************************************************
* Functions
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
new file mode 100644
index 00000000000..6d8568959f8
--- /dev/null
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -0,0 +1,1856 @@
+/*
+ * H.323 connection tracking helper
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * Based on the 'brute force' H.323 connection tracking module by
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * For more information, please see http://nath323.sourceforge.net/
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Parameters */
+static unsigned int default_rrq_ttl __read_mostly = 300;
+module_param(default_rrq_ttl, uint, 0600);
+MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
+
+static int gkrouted_only __read_mostly = 1;
+module_param(gkrouted_only, int, 0600);
+MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
+
+static int callforward_filter __read_mostly = 1;
+module_param(callforward_filter, bool, 0600);
+MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
+ "if both endpoints are on different sides "
+ "(determined by routing information)");
+
+/* Hooks for NAT */
+int (*set_h245_addr_hook) (struct sk_buff **pskb,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ union nf_conntrack_address *addr, __be16 port)
+ __read_mostly;
+int (*set_h225_addr_hook) (struct sk_buff **pskb,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr,
+ union nf_conntrack_address *addr, __be16 port)
+ __read_mostly;
+int (*set_sig_addr_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count) __read_mostly;
+int (*set_ras_addr_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count) __read_mostly;
+int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ __be16 port, __be16 rtp_port,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp) __read_mostly;
+int (*nat_t120_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp) __read_mostly;
+int (*nat_h245_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp) __read_mostly;
+int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp) __read_mostly;
+int (*nat_q931_hook) (struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, TransportAddress *taddr, int idx,
+ __be16 port, struct nf_conntrack_expect *exp)
+ __read_mostly;
+
+static DEFINE_SPINLOCK(nf_h323_lock);
+static char *h323_buffer;
+
+static struct nf_conntrack_helper nf_conntrack_helper_h245;
+static struct nf_conntrack_helper nf_conntrack_helper_q931[];
+static struct nf_conntrack_helper nf_conntrack_helper_ras[];
+
+/****************************************************************************/
+static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned char **data, int *datalen, int *dataoff)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ struct tcphdr _tcph, *th;
+ int tcpdatalen;
+ int tcpdataoff;
+ unsigned char *tpkt;
+ int tpktlen;
+ int tpktoff;
+
+ /* Get TCP header */
+ th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return 0;
+
+ /* Get TCP data offset */
+ tcpdataoff = protoff + th->doff * 4;
+
+ /* Get TCP data length */
+ tcpdatalen = (*pskb)->len - tcpdataoff;
+ if (tcpdatalen <= 0) /* No TCP data */
+ goto clear_out;
+
+ if (*data == NULL) { /* first TPKT */
+ /* Get first TPKT pointer */
+ tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
+ h323_buffer);
+ BUG_ON(tpkt == NULL);
+
+ /* Validate TPKT identifier */
+ if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
+ /* Netmeeting sends TPKT header and data separately */
+ if (info->tpkt_len[dir] > 0) {
+ DEBUGP("nf_ct_h323: previous packet "
+ "indicated separate TPKT data of %hu "
+ "bytes\n", info->tpkt_len[dir]);
+ if (info->tpkt_len[dir] <= tcpdatalen) {
+ /* Yes, there was a TPKT header
+ * received */
+ *data = tpkt;
+ *datalen = info->tpkt_len[dir];
+ *dataoff = 0;
+ goto out;
+ }
+
+ /* Fragmented TPKT */
+ if (net_ratelimit())
+ printk("nf_ct_h323: "
+ "fragmented TPKT\n");
+ goto clear_out;
+ }
+
+ /* It is not even a TPKT */
+ return 0;
+ }
+ tpktoff = 0;
+ } else { /* Next TPKT */
+ tpktoff = *dataoff + *datalen;
+ tcpdatalen -= tpktoff;
+ if (tcpdatalen <= 4) /* No more TPKT */
+ goto clear_out;
+ tpkt = *data + *datalen;
+
+ /* Validate TPKT identifier */
+ if (tpkt[0] != 0x03 || tpkt[1] != 0)
+ goto clear_out;
+ }
+
+ /* Validate TPKT length */
+ tpktlen = tpkt[2] * 256 + tpkt[3];
+ if (tpktlen < 4)
+ goto clear_out;
+ if (tpktlen > tcpdatalen) {
+ if (tcpdatalen == 4) { /* Separate TPKT header */
+ /* Netmeeting sends TPKT header and data separately */
+ DEBUGP("nf_ct_h323: separate TPKT header indicates "
+ "there will be TPKT data of %hu bytes\n",
+ tpktlen - 4);
+ info->tpkt_len[dir] = tpktlen - 4;
+ return 0;
+ }
+
+ if (net_ratelimit())
+ printk("nf_ct_h323: incomplete TPKT (fragmented?)\n");
+ goto clear_out;
+ }
+
+ /* This is the encapsulated data */
+ *data = tpkt + 4;
+ *datalen = tpktlen - 4;
+ *dataoff = tpktoff + 4;
+
+ out:
+ /* Clear TPKT length */
+ info->tpkt_len[dir] = 0;
+ return 1;
+
+ clear_out:
+ info->tpkt_len[dir] = 0;
+ return 0;
+}
+
+/****************************************************************************/
+static int get_h245_addr(struct nf_conn *ct, unsigned char *data,
+ H245_TransportAddress *taddr,
+ union nf_conntrack_address *addr, __be16 *port)
+{
+ unsigned char *p;
+ int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ int len;
+
+ if (taddr->choice != eH245_TransportAddress_unicastAddress)
+ return 0;
+
+ switch (taddr->unicastAddress.choice) {
+ case eUnicastAddress_iPAddress:
+ if (family != AF_INET)
+ return 0;
+ p = data + taddr->unicastAddress.iPAddress.network;
+ len = 4;
+ break;
+ case eUnicastAddress_iP6Address:
+ if (family != AF_INET6)
+ return 0;
+ p = data + taddr->unicastAddress.iP6Address.network;
+ len = 16;
+ break;
+ default:
+ return 0;
+ }
+
+ memcpy(addr, p, len);
+ memset((void *)addr + len, 0, sizeof(*addr) - len);
+ memcpy(port, p + len, sizeof(__be16));
+
+ return 1;
+}
+
+/****************************************************************************/
+static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ __be16 rtp_port, rtcp_port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *rtp_exp;
+ struct nf_conntrack_expect *rtcp_exp;
+ typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
+
+ /* Read RTP or RTCP address */
+ if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+ port == 0)
+ return 0;
+
+ /* RTP port is even */
+ port &= htons(~1);
+ rtp_port = port;
+ rtcp_port = htons(ntohs(port) + 1);
+
+ /* Create expect for RTP */
+ if ((rtp_exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_UDP, NULL, &rtp_port);
+
+ /* Create expect for RTCP */
+ if ((rtcp_exp = nf_conntrack_expect_alloc(ct)) == NULL) {
+ nf_conntrack_expect_put(rtp_exp);
+ return -1;
+ }
+ nf_conntrack_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_UDP, NULL, &rtcp_port);
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* NAT needed */
+ ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ taddr, port, rtp_port, rtp_exp, rtcp_exp);
+ } else { /* Conntrack only */
+ if (nf_conntrack_expect_related(rtp_exp) == 0) {
+ if (nf_conntrack_expect_related(rtcp_exp) == 0) {
+ DEBUGP("nf_ct_h323: expect RTP ");
+ NF_CT_DUMP_TUPLE(&rtp_exp->tuple);
+ DEBUGP("nf_ct_h323: expect RTCP ");
+ NF_CT_DUMP_TUPLE(&rtcp_exp->tuple);
+ } else {
+ nf_conntrack_unexpect_related(rtp_exp);
+ ret = -1;
+ }
+ } else
+ ret = -1;
+ }
+
+ nf_conntrack_expect_put(rtp_exp);
+ nf_conntrack_expect_put(rtcp_exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int expect_t120(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_t120_hook) nat_t120;
+
+ /* Read T.120 address */
+ if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+ port == 0)
+ return 0;
+
+ /* Create expect for T.120 connections */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_TCP, NULL, &port);
+ exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_t120 = rcu_dereference(nat_t120_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* NAT needed */
+ ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr,
+ port, exp);
+ } else { /* Conntrack only */
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_h323: expect T.120 ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ } else
+ ret = -1;
+ }
+
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_h245_channel(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ H2250LogicalChannelParameters *channel)
+{
+ int ret;
+
+ if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
+ /* RTP */
+ ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ &channel->mediaChannel);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (channel->
+ options & eH2250LogicalChannelParameters_mediaControlChannel) {
+ /* RTCP */
+ ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ &channel->mediaControlChannel);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ OpenLogicalChannel *olc)
+{
+ int ret;
+
+ DEBUGP("nf_ct_h323: OpenLogicalChannel\n");
+
+ if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
+ eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
+ {
+ ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ &olc->
+ forwardLogicalChannelParameters.
+ multiplexParameters.
+ h2250LogicalChannelParameters);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((olc->options &
+ eOpenLogicalChannel_reverseLogicalChannelParameters) &&
+ (olc->reverseLogicalChannelParameters.options &
+ eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
+ && (olc->reverseLogicalChannelParameters.multiplexParameters.
+ choice ==
+ eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
+ {
+ ret =
+ process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ &olc->
+ reverseLogicalChannelParameters.
+ multiplexParameters.
+ h2250LogicalChannelParameters);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((olc->options & eOpenLogicalChannel_separateStack) &&
+ olc->forwardLogicalChannelParameters.dataType.choice ==
+ eDataType_data &&
+ olc->forwardLogicalChannelParameters.dataType.data.application.
+ choice == eDataApplicationCapability_application_t120 &&
+ olc->forwardLogicalChannelParameters.dataType.data.application.
+ t120.choice == eDataProtocolCapability_separateLANStack &&
+ olc->separateStack.networkAddress.choice ==
+ eNetworkAccessParameters_networkAddress_localAreaAddress) {
+ ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+ &olc->separateStack.networkAddress.
+ localAreaAddress);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ OpenLogicalChannelAck *olca)
+{
+ H2250LogicalChannelAckParameters *ack;
+ int ret;
+
+ DEBUGP("nf_ct_h323: OpenLogicalChannelAck\n");
+
+ if ((olca->options &
+ eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
+ (olca->reverseLogicalChannelParameters.options &
+ eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
+ && (olca->reverseLogicalChannelParameters.multiplexParameters.
+ choice ==
+ eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
+ {
+ ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ &olca->
+ reverseLogicalChannelParameters.
+ multiplexParameters.
+ h2250LogicalChannelParameters);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((olca->options &
+ eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
+ (olca->forwardMultiplexAckParameters.choice ==
+ eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
+ {
+ ack = &olca->forwardMultiplexAckParameters.
+ h2250LogicalChannelAckParameters;
+ if (ack->options &
+ eH2250LogicalChannelAckParameters_mediaChannel) {
+ /* RTP */
+ ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ &ack->mediaChannel);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (ack->options &
+ eH2250LogicalChannelAckParameters_mediaControlChannel) {
+ /* RTCP */
+ ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ &ack->mediaControlChannel);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ MultimediaSystemControlMessage *mscm)
+{
+ switch (mscm->choice) {
+ case eMultimediaSystemControlMessage_request:
+ if (mscm->request.choice ==
+ eRequestMessage_openLogicalChannel) {
+ return process_olc(pskb, ct, ctinfo, data, dataoff,
+ &mscm->request.openLogicalChannel);
+ }
+ DEBUGP("nf_ct_h323: H.245 Request %d\n",
+ mscm->request.choice);
+ break;
+ case eMultimediaSystemControlMessage_response:
+ if (mscm->response.choice ==
+ eResponseMessage_openLogicalChannelAck) {
+ return process_olca(pskb, ct, ctinfo, data, dataoff,
+ &mscm->response.
+ openLogicalChannelAck);
+ }
+ DEBUGP("nf_ct_h323: H.245 Response %d\n",
+ mscm->response.choice);
+ break;
+ default:
+ DEBUGP("nf_ct_h323: H.245 signal %d\n", mscm->choice);
+ break;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int h245_help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ static MultimediaSystemControlMessage mscm;
+ unsigned char *data = NULL;
+ int datalen;
+ int dataoff;
+ int ret;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+ return NF_ACCEPT;
+ }
+ DEBUGP("nf_ct_h245: skblen = %u\n", (*pskb)->len);
+
+ spin_lock_bh(&nf_h323_lock);
+
+ /* Process each TPKT */
+ while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+ &data, &datalen, &dataoff)) {
+ DEBUGP("nf_ct_h245: TPKT len=%d ", datalen);
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+ /* Decode H.245 signal */
+ ret = DecodeMultimediaSystemControlMessage(data, datalen,
+ &mscm);
+ if (ret < 0) {
+ if (net_ratelimit())
+ printk("nf_ct_h245: decoding error: %s\n",
+ ret == H323_ERROR_BOUND ?
+ "out of bound" : "out of range");
+ /* We don't drop when decoding error */
+ break;
+ }
+
+ /* Process H.245 signal */
+ if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+ goto drop;
+ }
+
+ spin_unlock_bh(&nf_h323_lock);
+ return NF_ACCEPT;
+
+ drop:
+ spin_unlock_bh(&nf_h323_lock);
+ if (net_ratelimit())
+ printk("nf_ct_h245: packet dropped\n");
+ return NF_DROP;
+}
+
+/****************************************************************************/
+static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
+ .name = "H.245",
+ .me = THIS_MODULE,
+ .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
+ .timeout = 240,
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ .help = h245_help
+};
+
+/****************************************************************************/
+int get_h225_addr(struct nf_conn *ct, unsigned char *data,
+ TransportAddress *taddr,
+ union nf_conntrack_address *addr, __be16 *port)
+{
+ unsigned char *p;
+ int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ int len;
+
+ switch (taddr->choice) {
+ case eTransportAddress_ipAddress:
+ if (family != AF_INET)
+ return 0;
+ p = data + taddr->ipAddress.ip;
+ len = 4;
+ break;
+ case eTransportAddress_ip6Address:
+ if (family != AF_INET6)
+ return 0;
+ p = data + taddr->ip6Address.ip6;
+ len = 16;
+ break;
+ default:
+ return 0;
+ }
+
+ memcpy(addr, p, len);
+ memset((void *)addr + len, 0, sizeof(*addr) - len);
+ memcpy(port, p + len, sizeof(__be16));
+
+ return 1;
+}
+
+/****************************************************************************/
+static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_h245_hook) nat_h245;
+
+ /* Read h245Address */
+ if (!get_h225_addr(ct, *data, taddr, &addr, &port) ||
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) ||
+ port == 0)
+ return 0;
+
+ /* Create expect for h245 connection */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_TCP, NULL, &port);
+ exp->helper = &nf_conntrack_helper_h245;
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_h245 = rcu_dereference(nat_h245_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* NAT needed */
+ ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr,
+ port, exp);
+ } else { /* Conntrack only */
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_q931: expect H.245 ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ } else
+ ret = -1;
+ }
+
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+/* If the calling party is on the same side of the forward-to party,
+ * we don't need to track the second call */
+static int callforward_do_filter(union nf_conntrack_address *src,
+ union nf_conntrack_address *dst,
+ int family)
+{
+ struct flowi fl1, fl2;
+ int ret = 0;
+
+ memset(&fl1, 0, sizeof(fl1));
+ memset(&fl2, 0, sizeof(fl2));
+
+ switch (family) {
+ case AF_INET: {
+ struct rtable *rt1, *rt2;
+
+ fl1.fl4_dst = src->ip;
+ fl2.fl4_dst = dst->ip;
+ if (ip_route_output_key(&rt1, &fl1) == 0) {
+ if (ip_route_output_key(&rt2, &fl2) == 0) {
+ if (rt1->rt_gateway == rt2->rt_gateway &&
+ rt1->u.dst.dev == rt2->u.dst.dev)
+ ret = 1;
+ dst_release(&rt2->u.dst);
+ }
+ dst_release(&rt1->u.dst);
+ }
+ break;
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6: {
+ struct rt6_info *rt1, *rt2;
+
+ memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst));
+ memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst));
+ rt1 = (struct rt6_info *)ip6_route_output(NULL, &fl1);
+ if (rt1) {
+ rt2 = (struct rt6_info *)ip6_route_output(NULL, &fl2);
+ if (rt2) {
+ if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
+ sizeof(rt1->rt6i_gateway)) &&
+ rt1->u.dst.dev == rt2->u.dst.dev)
+ ret = 1;
+ dst_release(&rt2->u.dst);
+ }
+ dst_release(&rt1->u.dst);
+ }
+ break;
+ }
+#endif
+ }
+ return ret;
+
+}
+
+/****************************************************************************/
+static int expect_callforwarding(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_callforwarding_hook) nat_callforwarding;
+
+ /* Read alternativeAddress */
+ if (!get_h225_addr(ct, *data, taddr, &addr, &port) || port == 0)
+ return 0;
+
+ /* If the calling party is on the same side of the forward-to party,
+ * we don't need to track the second call */
+ if (callforward_filter &&
+ callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3,
+ ct->tuplehash[!dir].tuple.src.l3num)) {
+ DEBUGP("nf_ct_q931: Call Forwarding not tracked\n");
+ return 0;
+ }
+
+ /* Create expect for the second call leg */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_TCP, NULL, &port);
+ exp->helper = nf_conntrack_helper_q931;
+
+ if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
+ (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
+ ct->status & IPS_NAT_MASK) {
+ /* Need NAT */
+ ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
+ taddr, port, exp);
+ } else { /* Conntrack only */
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_q931: expect Call Forwarding ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ } else
+ ret = -1;
+ }
+
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Setup_UUIE *setup)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret;
+ int i;
+ __be16 port;
+ union nf_conntrack_address addr;
+ typeof(set_h225_addr_hook) set_h225_addr;
+
+ DEBUGP("nf_ct_q931: Setup\n");
+
+ if (setup->options & eSetup_UUIE_h245Address) {
+ ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ &setup->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ set_h225_addr = rcu_dereference(set_h225_addr_hook);
+ if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
+ (set_h225_addr) && ct->status && IPS_NAT_MASK &&
+ get_h225_addr(ct, *data, &setup->destCallSignalAddress,
+ &addr, &port) &&
+ memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) {
+ DEBUGP("nf_ct_q931: set destCallSignalAddress "
+ NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
+ NIP6(*(struct in6_addr *)&addr), ntohs(port),
+ NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
+ ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
+ ret = set_h225_addr(pskb, data, dataoff,
+ &setup->destCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ ct->tuplehash[!dir].tuple.src.u.tcp.port);
+ if (ret < 0)
+ return -1;
+ }
+
+ if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
+ (set_h225_addr) && ct->status & IPS_NAT_MASK &&
+ get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
+ &addr, &port) &&
+ memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) {
+ DEBUGP("nf_ct_q931: set sourceCallSignalAddress "
+ NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n",
+ NIP6(*(struct in6_addr *)&addr), ntohs(port),
+ NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
+ ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
+ ret = set_h225_addr(pskb, data, dataoff,
+ &setup->sourceCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.dst.u.tcp.port);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (setup->options & eSetup_UUIE_fastStart) {
+ for (i = 0; i < setup->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &setup->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_callproceeding(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ CallProceeding_UUIE *callproc)
+{
+ int ret;
+ int i;
+
+ DEBUGP("nf_ct_q931: CallProceeding\n");
+
+ if (callproc->options & eCallProceeding_UUIE_h245Address) {
+ ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ &callproc->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (callproc->options & eCallProceeding_UUIE_fastStart) {
+ for (i = 0; i < callproc->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &callproc->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Connect_UUIE *connect)
+{
+ int ret;
+ int i;
+
+ DEBUGP("nf_ct_q931: Connect\n");
+
+ if (connect->options & eConnect_UUIE_h245Address) {
+ ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ &connect->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (connect->options & eConnect_UUIE_fastStart) {
+ for (i = 0; i < connect->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &connect->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Alerting_UUIE *alert)
+{
+ int ret;
+ int i;
+
+ DEBUGP("nf_ct_q931: Alerting\n");
+
+ if (alert->options & eAlerting_UUIE_h245Address) {
+ ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ &alert->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (alert->options & eAlerting_UUIE_fastStart) {
+ for (i = 0; i < alert->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &alert->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_information(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Information_UUIE *info)
+{
+ int ret;
+ int i;
+
+ DEBUGP("nf_ct_q931: Information\n");
+
+ if (info->options & eInformation_UUIE_fastStart) {
+ for (i = 0; i < info->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &info->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Facility_UUIE *facility)
+{
+ int ret;
+ int i;
+
+ DEBUGP("nf_ct_q931: Facility\n");
+
+ if (facility->reason.choice == eFacilityReason_callForwarded) {
+ if (facility->options & eFacility_UUIE_alternativeAddress)
+ return expect_callforwarding(pskb, ct, ctinfo, data,
+ dataoff,
+ &facility->
+ alternativeAddress);
+ return 0;
+ }
+
+ if (facility->options & eFacility_UUIE_h245Address) {
+ ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ &facility->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (facility->options & eFacility_UUIE_fastStart) {
+ for (i = 0; i < facility->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &facility->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff,
+ Progress_UUIE *progress)
+{
+ int ret;
+ int i;
+
+ DEBUGP("nf_ct_q931: Progress\n");
+
+ if (progress->options & eProgress_UUIE_h245Address) {
+ ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ &progress->h245Address);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (progress->options & eProgress_UUIE_fastStart) {
+ for (i = 0; i < progress->fastStart.count; i++) {
+ ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ &progress->fastStart.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, int dataoff, Q931 *q931)
+{
+ H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
+ int i;
+ int ret = 0;
+
+ switch (pdu->h323_message_body.choice) {
+ case eH323_UU_PDU_h323_message_body_setup:
+ ret = process_setup(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.setup);
+ break;
+ case eH323_UU_PDU_h323_message_body_callProceeding:
+ ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.
+ callProceeding);
+ break;
+ case eH323_UU_PDU_h323_message_body_connect:
+ ret = process_connect(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.connect);
+ break;
+ case eH323_UU_PDU_h323_message_body_alerting:
+ ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.alerting);
+ break;
+ case eH323_UU_PDU_h323_message_body_information:
+ ret = process_information(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.
+ information);
+ break;
+ case eH323_UU_PDU_h323_message_body_facility:
+ ret = process_facility(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.facility);
+ break;
+ case eH323_UU_PDU_h323_message_body_progress:
+ ret = process_progress(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h323_message_body.progress);
+ break;
+ default:
+ DEBUGP("nf_ct_q931: Q.931 signal %d\n",
+ pdu->h323_message_body.choice);
+ break;
+ }
+
+ if (ret < 0)
+ return -1;
+
+ if (pdu->options & eH323_UU_PDU_h245Control) {
+ for (i = 0; i < pdu->h245Control.count; i++) {
+ ret = process_h245(pskb, ct, ctinfo, data, dataoff,
+ &pdu->h245Control.item[i]);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int q931_help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ static Q931 q931;
+ unsigned char *data = NULL;
+ int datalen;
+ int dataoff;
+ int ret;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+ return NF_ACCEPT;
+ }
+ DEBUGP("nf_ct_q931: skblen = %u\n", (*pskb)->len);
+
+ spin_lock_bh(&nf_h323_lock);
+
+ /* Process each TPKT */
+ while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+ &data, &datalen, &dataoff)) {
+ DEBUGP("nf_ct_q931: TPKT len=%d ", datalen);
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+ /* Decode Q.931 signal */
+ ret = DecodeQ931(data, datalen, &q931);
+ if (ret < 0) {
+ if (net_ratelimit())
+ printk("nf_ct_q931: decoding error: %s\n",
+ ret == H323_ERROR_BOUND ?
+ "out of bound" : "out of range");
+ /* We don't drop when decoding error */
+ break;
+ }
+
+ /* Process Q.931 signal */
+ if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
+ goto drop;
+ }
+
+ spin_unlock_bh(&nf_h323_lock);
+ return NF_ACCEPT;
+
+ drop:
+ spin_unlock_bh(&nf_h323_lock);
+ if (net_ratelimit())
+ printk("nf_ct_q931: packet dropped\n");
+ return NF_DROP;
+}
+
+/****************************************************************************/
+static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
+ {
+ .name = "Q.931",
+ .me = THIS_MODULE,
+ /* T.120 and H.245 */
+ .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4,
+ .timeout = 240,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.tcp.port = __constant_htons(Q931_PORT),
+ .tuple.dst.protonum = IPPROTO_TCP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.tcp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ .help = q931_help
+ },
+ {
+ .name = "Q.931",
+ .me = THIS_MODULE,
+ /* T.120 and H.245 */
+ .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4,
+ .timeout = 240,
+ .tuple.src.l3num = AF_INET6,
+ .tuple.src.u.tcp.port = __constant_htons(Q931_PORT),
+ .tuple.dst.protonum = IPPROTO_TCP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.tcp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ .help = q931_help
+ },
+};
+
+/****************************************************************************/
+static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff,
+ int *datalen)
+{
+ struct udphdr _uh, *uh;
+ int dataoff;
+
+ uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh);
+ if (uh == NULL)
+ return NULL;
+ dataoff = protoff + sizeof(_uh);
+ if (dataoff >= (*pskb)->len)
+ return NULL;
+ *datalen = (*pskb)->len - dataoff;
+ return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
+}
+
+/****************************************************************************/
+static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
+ union nf_conntrack_address *addr,
+ __be16 port)
+{
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple tuple;
+
+ memset(&tuple.src.u3, 0, sizeof(tuple.src.u3));
+ tuple.src.u.tcp.port = 0;
+ memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3));
+ tuple.dst.u.tcp.port = port;
+ tuple.dst.protonum = IPPROTO_TCP;
+
+ exp = __nf_conntrack_expect_find(&tuple);
+ if (exp && exp->master == ct)
+ return exp;
+ return NULL;
+}
+
+/****************************************************************************/
+static int set_expect_timeout(struct nf_conntrack_expect *exp,
+ unsigned timeout)
+{
+ if (!exp || !del_timer(&exp->timeout))
+ return 0;
+
+ exp->timeout.expires = jiffies + timeout * HZ;
+ add_timer(&exp->timeout);
+
+ return 1;
+}
+
+/****************************************************************************/
+static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ int i;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+ typeof(nat_q931_hook) nat_q931;
+
+ /* Look for the first related address */
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+ memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3,
+ sizeof(addr)) == 0 && port != 0)
+ break;
+ }
+
+ if (i >= count) /* Not found */
+ return 0;
+
+ /* Create expect for Q.931 */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ gkrouted_only ? /* only accept calls from GK? */
+ &ct->tuplehash[!dir].tuple.src.u3 :
+ NULL,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ IPPROTO_TCP, NULL, &port);
+ exp->helper = nf_conntrack_helper_q931;
+ exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
+
+ nat_q931 = rcu_dereference(nat_q931_hook);
+ if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */
+ ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
+ } else { /* Conntrack only */
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_ras: expect Q.931 ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+
+ /* Save port for looking up expect in processing RCF */
+ info->sig_port[dir] = port;
+ } else
+ ret = -1;
+ }
+
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, GatekeeperRequest *grq)
+{
+ typeof(set_ras_addr_hook) set_ras_addr;
+
+ DEBUGP("nf_ct_ras: GRQ\n");
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */
+ return set_ras_addr(pskb, ct, ctinfo, data,
+ &grq->rasAddress, 1);
+ return 0;
+}
+
+/****************************************************************************/
+static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, GatekeeperConfirm *gcf)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+
+ DEBUGP("nf_ct_ras: GCF\n");
+
+ if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port))
+ return 0;
+
+ /* Registration port is the same as discovery port */
+ if (!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+ port == ct->tuplehash[dir].tuple.src.u.udp.port)
+ return 0;
+
+ /* Avoid RAS expectation loops. A GCF is never expected. */
+ if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+ return 0;
+
+ /* Need new expect */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_UDP, NULL, &port);
+ exp->helper = nf_conntrack_helper_ras;
+
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_ras: expect RAS ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ } else
+ ret = -1;
+
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, RegistrationRequest *rrq)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int ret;
+ typeof(set_ras_addr_hook) set_ras_addr;
+
+ DEBUGP("nf_ct_ras: RRQ\n");
+
+ ret = expect_q931(pskb, ct, ctinfo, data,
+ rrq->callSignalAddress.item,
+ rrq->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_ras_addr(pskb, ct, ctinfo, data,
+ rrq->rasAddress.item,
+ rrq->rasAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (rrq->options & eRegistrationRequest_timeToLive) {
+ DEBUGP("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
+ info->timeout = rrq->timeToLive;
+ } else
+ info->timeout = default_rrq_ttl;
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, RegistrationConfirm *rcf)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int ret;
+ struct nf_conntrack_expect *exp;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ DEBUGP("nf_ct_ras: RCF\n");
+
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(pskb, ct, ctinfo, data,
+ rcf->callSignalAddress.item,
+ rcf->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ if (rcf->options & eRegistrationConfirm_timeToLive) {
+ DEBUGP("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
+ info->timeout = rcf->timeToLive;
+ }
+
+ if (info->timeout > 0) {
+ DEBUGP
+ ("nf_ct_ras: set RAS connection timeout to %u seconds\n",
+ info->timeout);
+ nf_ct_refresh(ct, *pskb, info->timeout * HZ);
+
+ /* Set expect timeout */
+ read_lock_bh(&nf_conntrack_lock);
+ exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ if (exp) {
+ DEBUGP("nf_ct_ras: set Q.931 expect "
+ "timeout to %u seconds for",
+ info->timeout);
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ set_expect_timeout(exp, info->timeout);
+ }
+ read_unlock_bh(&nf_conntrack_lock);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, UnregistrationRequest *urq)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ int ret;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ DEBUGP("nf_ct_ras: URQ\n");
+
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(pskb, ct, ctinfo, data,
+ urq->callSignalAddress.item,
+ urq->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ /* Clear old expect */
+ nf_ct_remove_expectations(ct);
+ info->sig_port[dir] = 0;
+ info->sig_port[!dir] = 0;
+
+ /* Give it 30 seconds for UCF or URJ */
+ nf_ct_refresh(ct, *pskb, 30 * HZ);
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, AdmissionRequest *arq)
+{
+ struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ int dir = CTINFO2DIR(ctinfo);
+ __be16 port;
+ union nf_conntrack_address addr;
+ typeof(set_h225_addr_hook) set_h225_addr;
+
+ DEBUGP("nf_ct_ras: ARQ\n");
+
+ set_h225_addr = rcu_dereference(set_h225_addr_hook);
+ if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
+ get_h225_addr(ct, *data, &arq->destCallSignalAddress,
+ &addr, &port) &&
+ !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+ port == info->sig_port[dir] &&
+ set_h225_addr && ct->status & IPS_NAT_MASK) {
+ /* Answering ARQ */
+ return set_h225_addr(pskb, data, 0,
+ &arq->destCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ }
+
+ if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
+ get_h225_addr(ct, *data, &arq->srcCallSignalAddress,
+ &addr, &port) &&
+ !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
+ set_h225_addr && ct->status & IPS_NAT_MASK) {
+ /* Calling ARQ */
+ return set_h225_addr(pskb, data, 0,
+ &arq->srcCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ port);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, AdmissionConfirm *acf)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ DEBUGP("nf_ct_ras: ACF\n");
+
+ if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress,
+ &addr, &port))
+ return 0;
+
+ if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) {
+ /* Answering ACF */
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK)
+ return set_sig_addr(pskb, ct, ctinfo, data,
+ &acf->destCallSignalAddress, 1);
+ return 0;
+ }
+
+ /* Need new expect */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_TCP, NULL, &port);
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+ exp->helper = nf_conntrack_helper_q931;
+
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_ras: expect Q.931 ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ } else
+ ret = -1;
+
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, LocationRequest *lrq)
+{
+ typeof(set_ras_addr_hook) set_ras_addr;
+
+ DEBUGP("nf_ct_ras: LRQ\n");
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK)
+ return set_ras_addr(pskb, ct, ctinfo, data,
+ &lrq->replyAddress, 1);
+ return 0;
+}
+
+/****************************************************************************/
+static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, LocationConfirm *lcf)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int ret = 0;
+ __be16 port;
+ union nf_conntrack_address addr;
+ struct nf_conntrack_expect *exp;
+
+ DEBUGP("nf_ct_ras: LCF\n");
+
+ if (!get_h225_addr(ct, *data, &lcf->callSignalAddress,
+ &addr, &port))
+ return 0;
+
+ /* Need new expect for call signal */
+ if ((exp = nf_conntrack_expect_alloc(ct)) == NULL)
+ return -1;
+ nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num,
+ &ct->tuplehash[!dir].tuple.src.u3, &addr,
+ IPPROTO_TCP, NULL, &port);
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+ exp->helper = nf_conntrack_helper_q931;
+
+ if (nf_conntrack_expect_related(exp) == 0) {
+ DEBUGP("nf_ct_ras: expect Q.931 ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ } else
+ ret = -1;
+
+ nf_conntrack_expect_put(exp);
+
+ /* Ignore rasAddress */
+
+ return ret;
+}
+
+/****************************************************************************/
+static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, InfoRequestResponse *irr)
+{
+ int ret;
+ typeof(set_ras_addr_hook) set_ras_addr;
+ typeof(set_sig_addr_hook) set_sig_addr;
+
+ DEBUGP("nf_ct_ras: IRR\n");
+
+ set_ras_addr = rcu_dereference(set_ras_addr_hook);
+ if (set_ras_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_ras_addr(pskb, ct, ctinfo, data,
+ &irr->rasAddress, 1);
+ if (ret < 0)
+ return -1;
+ }
+
+ set_sig_addr = rcu_dereference(set_sig_addr_hook);
+ if (set_sig_addr && ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(pskb, ct, ctinfo, data,
+ irr->callSignalAddress.item,
+ irr->callSignalAddress.count);
+ if (ret < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned char **data, RasMessage *ras)
+{
+ switch (ras->choice) {
+ case eRasMessage_gatekeeperRequest:
+ return process_grq(pskb, ct, ctinfo, data,
+ &ras->gatekeeperRequest);
+ case eRasMessage_gatekeeperConfirm:
+ return process_gcf(pskb, ct, ctinfo, data,
+ &ras->gatekeeperConfirm);
+ case eRasMessage_registrationRequest:
+ return process_rrq(pskb, ct, ctinfo, data,
+ &ras->registrationRequest);
+ case eRasMessage_registrationConfirm:
+ return process_rcf(pskb, ct, ctinfo, data,
+ &ras->registrationConfirm);
+ case eRasMessage_unregistrationRequest:
+ return process_urq(pskb, ct, ctinfo, data,
+ &ras->unregistrationRequest);
+ case eRasMessage_admissionRequest:
+ return process_arq(pskb, ct, ctinfo, data,
+ &ras->admissionRequest);
+ case eRasMessage_admissionConfirm:
+ return process_acf(pskb, ct, ctinfo, data,
+ &ras->admissionConfirm);
+ case eRasMessage_locationRequest:
+ return process_lrq(pskb, ct, ctinfo, data,
+ &ras->locationRequest);
+ case eRasMessage_locationConfirm:
+ return process_lcf(pskb, ct, ctinfo, data,
+ &ras->locationConfirm);
+ case eRasMessage_infoRequestResponse:
+ return process_irr(pskb, ct, ctinfo, data,
+ &ras->infoRequestResponse);
+ default:
+ DEBUGP("nf_ct_ras: RAS message %d\n", ras->choice);
+ break;
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int ras_help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ static RasMessage ras;
+ unsigned char *data;
+ int datalen = 0;
+ int ret;
+
+ DEBUGP("nf_ct_ras: skblen = %u\n", (*pskb)->len);
+
+ spin_lock_bh(&nf_h323_lock);
+
+ /* Get UDP data */
+ data = get_udp_data(pskb, protoff, &datalen);
+ if (data == NULL)
+ goto accept;
+ DEBUGP("nf_ct_ras: RAS message len=%d ", datalen);
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
+
+ /* Decode RAS message */
+ ret = DecodeRasMessage(data, datalen, &ras);
+ if (ret < 0) {
+ if (net_ratelimit())
+ printk("nf_ct_ras: decoding error: %s\n",
+ ret == H323_ERROR_BOUND ?
+ "out of bound" : "out of range");
+ goto accept;
+ }
+
+ /* Process RAS message */
+ if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
+ goto drop;
+
+ accept:
+ spin_unlock_bh(&nf_h323_lock);
+ return NF_ACCEPT;
+
+ drop:
+ spin_unlock_bh(&nf_h323_lock);
+ if (net_ratelimit())
+ printk("nf_ct_ras: packet dropped\n");
+ return NF_DROP;
+}
+
+/****************************************************************************/
+static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
+ {
+ .name = "RAS",
+ .me = THIS_MODULE,
+ .max_expected = 32,
+ .timeout = 240,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(RAS_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ .help = ras_help,
+ },
+ {
+ .name = "RAS",
+ .me = THIS_MODULE,
+ .max_expected = 32,
+ .timeout = 240,
+ .tuple.src.l3num = AF_INET6,
+ .tuple.src.u.udp.port = __constant_htons(RAS_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ .help = ras_help,
+ },
+};
+
+/****************************************************************************/
+static void __exit nf_conntrack_h323_fini(void)
+{
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
+ kfree(h323_buffer);
+ DEBUGP("nf_ct_h323: fini\n");
+}
+
+/****************************************************************************/
+static int __init nf_conntrack_h323_init(void)
+{
+ int ret;
+
+ h323_buffer = kmalloc(65536, GFP_KERNEL);
+ if (!h323_buffer)
+ return -ENOMEM;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]);
+ if (ret < 0)
+ goto err1;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]);
+ if (ret < 0)
+ goto err2;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]);
+ if (ret < 0)
+ goto err3;
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
+ if (ret < 0)
+ goto err4;
+ DEBUGP("nf_ct_h323: init success\n");
+ return 0;
+
+err4:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
+err3:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
+err2:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
+err1:
+ return ret;
+}
+
+/****************************************************************************/
+module_init(nf_conntrack_h323_init);
+module_exit(nf_conntrack_h323_fini);
+
+EXPORT_SYMBOL_GPL(get_h225_addr);
+EXPORT_SYMBOL_GPL(set_h245_addr_hook);
+EXPORT_SYMBOL_GPL(set_h225_addr_hook);
+EXPORT_SYMBOL_GPL(set_sig_addr_hook);
+EXPORT_SYMBOL_GPL(set_ras_addr_hook);
+EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
+EXPORT_SYMBOL_GPL(nat_t120_hook);
+EXPORT_SYMBOL_GPL(nat_h245_hook);
+EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
+EXPORT_SYMBOL_GPL(nat_q931_hook);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_h323");
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c
index 4b359618bed..4c6f8b3b120 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c
+++ b/net/netfilter/nf_conntrack_h323_types.c
@@ -36,7 +36,8 @@ static field_t _TransportAddress_ipxAddress[] = { /* SEQUENCE */
};
static field_t _TransportAddress_ip6Address[] = { /* SEQUENCE */
- {FNAME("ip") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
+ {FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE,
+ offsetof(TransportAddress_ip6Address, ip6), NULL},
{FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL},
};
@@ -65,8 +66,8 @@ static field_t _TransportAddress[] = { /* CHOICE */
_TransportAddress_ipSourceRoute},
{FNAME("ipxAddress") SEQ, 0, 3, 3, SKIP, 0,
_TransportAddress_ipxAddress},
- {FNAME("ip6Address") SEQ, 0, 2, 2, SKIP | EXT, 0,
- _TransportAddress_ip6Address},
+ {FNAME("ip6Address") SEQ, 0, 2, 2, DECODE | EXT,
+ offsetof(TransportAddress, ip6Address), _TransportAddress_ip6Address},
{FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL},
{FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL},
{FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
new file mode 100644
index 00000000000..0743be4434b
--- /dev/null
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -0,0 +1,155 @@
+/* Helper handling for netfilter. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+static __read_mostly LIST_HEAD(helpers);
+
+struct nf_conntrack_helper *
+__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_helper *h;
+
+ list_for_each_entry(h, &helpers, list) {
+ if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+ return h;
+ }
+ return NULL;
+}
+
+struct nf_conntrack_helper *
+nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conntrack_helper *helper;
+
+ /* need nf_conntrack_lock to assure that helper exists until
+ * try_module_get() is called */
+ read_lock_bh(&nf_conntrack_lock);
+
+ helper = __nf_ct_helper_find(tuple);
+ if (helper) {
+ /* need to increase module usage count to assure helper will
+ * not go away while the caller is e.g. busy putting a
+ * conntrack in the hash that uses the helper */
+ if (!try_module_get(helper->me))
+ helper = NULL;
+ }
+
+ read_unlock_bh(&nf_conntrack_lock);
+
+ return helper;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_find_get);
+
+void nf_ct_helper_put(struct nf_conntrack_helper *helper)
+{
+ module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_put);
+
+struct nf_conntrack_helper *
+__nf_conntrack_helper_find_byname(const char *name)
+{
+ struct nf_conntrack_helper *h;
+
+ list_for_each_entry(h, &helpers, list) {
+ if (!strcmp(h->name, name))
+ return h;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
+
+static inline int unhelp(struct nf_conntrack_tuple_hash *i,
+ const struct nf_conntrack_helper *me)
+{
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
+ struct nf_conn_help *help = nfct_help(ct);
+
+ if (help && help->helper == me) {
+ nf_conntrack_event(IPCT_HELPER, ct);
+ help->helper = NULL;
+ }
+ return 0;
+}
+
+int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
+{
+ int size, ret;
+
+ BUG_ON(me->timeout == 0);
+
+ size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_help)) +
+ sizeof(struct nf_conn_help);
+ ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
+ size);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_conntrack_helper_register: Unable to create slab cache for conntracks\n");
+ return ret;
+ }
+ write_lock_bh(&nf_conntrack_lock);
+ list_add(&me->list, &helpers);
+ write_unlock_bh(&nf_conntrack_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+ unsigned int i;
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_expect *exp, *tmp;
+
+ /* Need write lock here, to delete helper. */
+ write_lock_bh(&nf_conntrack_lock);
+ list_del(&me->list);
+
+ /* Get rid of expectations */
+ list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
+ struct nf_conn_help *help = nfct_help(exp->master);
+ if ((help->helper == me || exp->helper == me) &&
+ del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_conntrack_expect_put(exp);
+ }
+ }
+
+ /* Get rid of expecteds, set helpers to NULL. */
+ list_for_each_entry(h, &unconfirmed, list)
+ unhelp(h, me);
+ for (i = 0; i < nf_conntrack_htable_size; i++) {
+ list_for_each_entry(h, &nf_conntrack_hash[i], list)
+ unhelp(h, me);
+ }
+ write_unlock_bh(&nf_conntrack_lock);
+
+ /* Someone could be still looking at the helper in a bh. */
+ synchronize_net();
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
new file mode 100644
index 00000000000..ed01db63439
--- /dev/null
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -0,0 +1,281 @@
+/* IRC extension for IP connection tracking, Version 1.21
+ * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * based on RR's ip_conntrack_ftp.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static int ports_c;
+static unsigned int max_dcc_channels = 8;
+static unsigned int dcc_timeout __read_mostly = 300;
+/* This is slow, but it's simple. --RR */
+static char *irc_buffer;
+static DEFINE_SPINLOCK(irc_buffer_lock);
+
+unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_irc");
+
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+module_param(max_dcc_channels, uint, 0400);
+MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per "
+ "IRC session");
+module_param(dcc_timeout, uint, 0400);
+MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
+
+static const char *dccprotos[] = {
+ "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT "
+};
+
+#define MINMATCHLEN 5
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
+ __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* tries to get the ip_addr and port out of a dcc command
+ * return value: -1 on failure, 0 on success
+ * data pointer to first byte of DCC command data
+ * data_end pointer to last byte of dcc command data
+ * ip returns parsed ip of dcc command
+ * port returns parsed port of dcc command
+ * ad_beg_p returns pointer to first byte of addr data
+ * ad_end_p returns pointer to last byte of addr data
+ */
+static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
+ u_int16_t *port, char **ad_beg_p, char **ad_end_p)
+{
+ /* at least 12: "AAAAAAAA P\1\n" */
+ while (*data++ != ' ')
+ if (data > data_end - 12)
+ return -1;
+
+ *ad_beg_p = data;
+ *ip = simple_strtoul(data, &data, 10);
+
+ /* skip blanks between ip and port */
+ while (*data == ' ') {
+ if (data >= data_end)
+ return -1;
+ data++;
+ }
+
+ *port = simple_strtoul(data, &data, 10);
+ *ad_end_p = data;
+
+ return 0;
+}
+
+static int help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff;
+ struct tcphdr _tcph, *th;
+ char *data, *data_limit, *ib_ptr;
+ int dir = CTINFO2DIR(ctinfo);
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ u_int32_t dcc_ip;
+ u_int16_t dcc_port;
+ __be16 port;
+ int i, ret = NF_ACCEPT;
+ char *addr_beg_p, *addr_end_p;
+ typeof(nf_nat_irc_hook) nf_nat_irc;
+
+ /* If packet is coming from IRC server */
+ if (dir == IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+
+ /* Until there's been traffic both ways, don't look in packets. */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+ return NF_ACCEPT;
+
+ /* Not a full tcp header? */
+ th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+
+ /* No data? */
+ dataoff = protoff + th->doff*4;
+ if (dataoff >= (*pskb)->len)
+ return NF_ACCEPT;
+
+ spin_lock_bh(&irc_buffer_lock);
+ ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff,
+ irc_buffer);
+ BUG_ON(ib_ptr == NULL);
+
+ data = ib_ptr;
+ data_limit = ib_ptr + (*pskb)->len - dataoff;
+
+ /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
+ * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
+ while (data < data_limit - (19 + MINMATCHLEN)) {
+ if (memcmp(data, "\1DCC ", 5)) {
+ data++;
+ continue;
+ }
+ data += 5;
+ /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+
+ DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
+ NIPQUAD(iph->saddr), ntohs(th->source),
+ NIPQUAD(iph->daddr), ntohs(th->dest));
+
+ for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
+ if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
+ /* no match */
+ continue;
+ }
+ data += strlen(dccprotos[i]);
+ DEBUGP("DCC %s detected\n", dccprotos[i]);
+
+ /* we have at least
+ * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+ * data left (== 14/13 bytes) */
+ if (parse_dcc((char *)data, data_limit, &dcc_ip,
+ &dcc_port, &addr_beg_p, &addr_end_p)) {
+ DEBUGP("unable to parse dcc command\n");
+ continue;
+ }
+ DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
+ HIPQUAD(dcc_ip), dcc_port);
+
+ /* dcc_ip can be the internal OR external (NAT'ed) IP */
+ tuple = &ct->tuplehash[dir].tuple;
+ if (tuple->src.u3.ip != htonl(dcc_ip) &&
+ tuple->dst.u3.ip != htonl(dcc_ip)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "Forged DCC command from "
+ "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
+ NIPQUAD(tuple->src.u3.ip),
+ HIPQUAD(dcc_ip), dcc_port);
+ continue;
+ }
+
+ exp = nf_conntrack_expect_alloc(ct);
+ if (exp == NULL) {
+ ret = NF_DROP;
+ goto out;
+ }
+ tuple = &ct->tuplehash[!dir].tuple;
+ port = htons(dcc_port);
+ nf_conntrack_expect_init(exp, tuple->src.l3num,
+ NULL, &tuple->dst.u3,
+ IPPROTO_TCP, NULL, &port);
+
+ nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
+ if (nf_nat_irc && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_irc(pskb, ctinfo,
+ addr_beg_p - ib_ptr,
+ addr_end_p - addr_beg_p,
+ exp);
+ else if (nf_conntrack_expect_related(exp) != 0)
+ ret = NF_DROP;
+ nf_conntrack_expect_put(exp);
+ goto out;
+ }
+ }
+ out:
+ spin_unlock_bh(&irc_buffer_lock);
+ return ret;
+}
+
+static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
+static char irc_names[MAX_PORTS][sizeof("irc-65535")] __read_mostly;
+
+static void nf_conntrack_irc_fini(void);
+
+static int __init nf_conntrack_irc_init(void)
+{
+ int i, ret;
+ char *tmpname;
+
+ if (max_dcc_channels < 1) {
+ printk("nf_ct_irc: max_dcc_channels must not be zero\n");
+ return -EINVAL;
+ }
+
+ irc_buffer = kmalloc(65536, GFP_KERNEL);
+ if (!irc_buffer)
+ return -ENOMEM;
+
+ /* If no port given, default to standard irc port */
+ if (ports_c == 0)
+ ports[ports_c++] = IRC_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ irc[i].tuple.src.l3num = AF_INET;
+ irc[i].tuple.src.u.tcp.port = htons(ports[i]);
+ irc[i].tuple.dst.protonum = IPPROTO_TCP;
+ irc[i].mask.src.l3num = 0xFFFF;
+ irc[i].mask.src.u.tcp.port = htons(0xFFFF);
+ irc[i].mask.dst.protonum = 0xFF;
+ irc[i].max_expected = max_dcc_channels;
+ irc[i].timeout = dcc_timeout;
+ irc[i].me = THIS_MODULE;
+ irc[i].help = help;
+
+ tmpname = &irc_names[i][0];
+ if (ports[i] == IRC_PORT)
+ sprintf(tmpname, "irc");
+ else
+ sprintf(tmpname, "irc-%u", i);
+ irc[i].name = tmpname;
+
+ ret = nf_conntrack_helper_register(&irc[i]);
+ if (ret) {
+ printk("nf_ct_irc: failed to register helper "
+ "for pf: %u port: %u\n",
+ irc[i].tuple.src.l3num, ports[i]);
+ nf_conntrack_irc_fini();
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* This function is intentionally _NOT_ defined as __exit, because
+ * it is needed by the init function */
+static void nf_conntrack_irc_fini(void)
+{
+ int i;
+
+ for (i = 0; i < ports_c; i++)
+ nf_conntrack_helper_unregister(&irc[i]);
+ kfree(irc_buffer);
+}
+
+module_init(nf_conntrack_irc_init);
+module_exit(nf_conntrack_irc_fini);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 21e0bc91cf2..a3d31c3ac8e 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -26,7 +26,7 @@
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
@@ -37,8 +37,6 @@
#define DEBUGP(format, args...)
#endif
-DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
-
static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
@@ -84,7 +82,7 @@ static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple)
return NF_CT_F_BASIC;
}
-struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = {
+struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = {
.l3proto = PF_UNSPEC,
.name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
@@ -94,3 +92,4 @@ struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = {
.prepare = generic_prepare,
.get_features = generic_get_features,
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
new file mode 100644
index 00000000000..a5b234e444d
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -0,0 +1,126 @@
+/*
+ * NetBIOS name service broadcast connection tracking helper
+ *
+ * (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+/*
+ * This helper tracks locally originating NetBIOS name service
+ * requests by issuing permanent expectations (valid until
+ * timing out) matching all reply connections from the
+ * destination network. The only NetBIOS specific thing is
+ * actually the port number.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_addr.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/route.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define NMBD_PORT 137
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_netbios_ns");
+
+static unsigned int timeout __read_mostly = 3;
+module_param(timeout, uint, 0400);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+static int help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ struct nf_conntrack_expect *exp;
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct in_device *in_dev;
+ __be32 mask = 0;
+
+ /* we're only interested in locally generated packets */
+ if ((*pskb)->sk == NULL)
+ goto out;
+ if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+ goto out;
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ goto out;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+ if (in_dev != NULL) {
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_broadcast == iph->daddr) {
+ mask = ifa->ifa_mask;
+ break;
+ }
+ } endfor_ifa(in_dev);
+ }
+ rcu_read_unlock();
+
+ if (mask == 0)
+ goto out;
+
+ exp = nf_conntrack_expect_alloc(ct);
+ if (exp == NULL)
+ goto out;
+
+ exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ exp->tuple.src.u.udp.port = htons(NMBD_PORT);
+
+ exp->mask.src.u3.ip = mask;
+ exp->mask.src.u.udp.port = htons(0xFFFF);
+ exp->mask.dst.u3.ip = htonl(0xFFFFFFFF);
+ exp->mask.dst.u.udp.port = htons(0xFFFF);
+ exp->mask.dst.protonum = 0xFF;
+
+ exp->expectfn = NULL;
+ exp->flags = NF_CT_EXPECT_PERMANENT;
+
+ nf_conntrack_expect_related(exp);
+ nf_conntrack_expect_put(exp);
+
+ nf_ct_refresh(ct, *pskb, timeout * HZ);
+out:
+ return NF_ACCEPT;
+}
+
+static struct nf_conntrack_helper helper __read_mostly = {
+ .name = "netbios-ns",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = __constant_htons(NMBD_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+ .mask.src.l3num = 0xFFFF,
+ .mask.src.u.udp.port = __constant_htons(0xFFFF),
+ .mask.dst.protonum = 0xFF,
+ .max_expected = 1,
+ .me = THIS_MODULE,
+ .help = help,
+};
+
+static int __init nf_conntrack_netbios_ns_init(void)
+{
+ helper.timeout = timeout;
+ return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_netbios_ns_fini(void)
+{
+ nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_netbios_ns_init);
+module_exit(nf_conntrack_netbios_ns_fini);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bd0156a28ec..bd1d2de75e4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -35,10 +35,15 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#ifdef CONFIG_NF_NAT_NEEDED
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#endif
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -50,15 +55,15 @@ static char __initdata version[] = "0.93";
static inline int
ctnetlink_dump_tuples_proto(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_protocol *proto)
+ struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
- if (likely(proto->tuple_to_nfattr))
- ret = proto->tuple_to_nfattr(skb, tuple);
+ if (likely(l4proto->tuple_to_nfattr))
+ ret = l4proto->tuple_to_nfattr(skb, tuple);
NFA_NEST_END(skb, nest_parms);
@@ -93,7 +98,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
{
int ret;
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
@@ -102,9 +107,9 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
if (unlikely(ret < 0))
return ret;
- proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, tuple, proto);
- nf_ct_proto_put(proto);
+ l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
+ nf_ct_l4proto_put(l4proto);
return ret;
}
@@ -112,7 +117,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
static inline int
ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
{
- u_int32_t status = htonl((u_int32_t) ct->status);
+ __be32 status = htonl((u_int32_t) ct->status);
NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
return 0;
@@ -124,7 +129,7 @@ static inline int
ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
{
long timeout_l = ct->timeout.expires - jiffies;
- u_int32_t timeout;
+ __be32 timeout;
if (timeout_l < 0)
timeout = 0;
@@ -141,26 +146,27 @@ nfattr_failure:
static inline int
ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
{
- struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+ struct nf_conntrack_l4proto *l4proto = nf_ct_l4proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
struct nfattr *nest_proto;
int ret;
- if (!proto->to_nfattr) {
- nf_ct_proto_put(proto);
+ if (!l4proto->to_nfattr) {
+ nf_ct_l4proto_put(l4proto);
return 0;
}
nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
- ret = proto->to_nfattr(skb, nest_proto, ct);
+ ret = l4proto->to_nfattr(skb, nest_proto, ct);
- nf_ct_proto_put(proto);
+ nf_ct_l4proto_put(l4proto);
NFA_NEST_END(skb, nest_proto);
return ret;
nfattr_failure:
+ nf_ct_l4proto_put(l4proto);
return -1;
}
@@ -194,7 +200,7 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
{
enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
struct nfattr *nest_count = NFA_NEST(skb, type);
- u_int32_t tmp;
+ __be32 tmp;
tmp = htonl(ct->counters[dir].packets);
NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
@@ -217,7 +223,7 @@ nfattr_failure:
static inline int
ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
{
- u_int32_t mark = htonl(ct->mark);
+ __be32 mark = htonl(ct->mark);
NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
return 0;
@@ -232,7 +238,7 @@ nfattr_failure:
static inline int
ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
{
- u_int32_t id = htonl(ct->id);
+ __be32 id = htonl(ct->id);
NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
return 0;
@@ -243,7 +249,7 @@ nfattr_failure:
static inline int
ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
{
- u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
+ __be32 use = htonl(atomic_read(&ct->ct_general.use));
NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
return 0;
@@ -329,8 +335,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
} else if (events & (IPCT_NEW | IPCT_RELATED)) {
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
- /* dump everything */
- events = ~0UL;
group = NFNLGRP_CONNTRACK_NEW;
} else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
type = IPCTNL_MSG_CT_NEW;
@@ -365,28 +369,35 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nfattr_failure;
NFA_NEST_END(skb, nest_parms);
-
- /* NAT stuff is now a status flag */
- if ((events & IPCT_STATUS || events & IPCT_NATINFO)
- && ctnetlink_dump_status(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_REFRESH
- && ctnetlink_dump_timeout(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_PROTOINFO
- && ctnetlink_dump_protoinfo(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_HELPINFO
- && ctnetlink_dump_helpinfo(skb, ct) < 0)
- goto nfattr_failure;
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
- goto nfattr_failure;
+ if (events & IPCT_DESTROY) {
+ if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+ goto nfattr_failure;
+ } else {
+ if (ctnetlink_dump_status(skb, ct) < 0)
+ goto nfattr_failure;
- if (events & IPCT_MARK
- && ctnetlink_dump_mark(skb, ct) < 0)
- goto nfattr_failure;
+ if (ctnetlink_dump_timeout(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if (events & IPCT_PROTOINFO
+ && ctnetlink_dump_protoinfo(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if ((events & IPCT_HELPER || nfct_help(ct))
+ && ctnetlink_dump_helpinfo(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if ((events & IPCT_MARK || ct->mark)
+ && ctnetlink_dump_mark(skb, ct) < 0)
+ goto nfattr_failure;
+
+ if (events & IPCT_COUNTER_FILLING &&
+ (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
+ goto nfattr_failure;
+ }
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, group, 0);
@@ -423,7 +434,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
restart:
list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
h = (struct nf_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
/* Dump entries of a given L3 protocol number.
@@ -491,7 +502,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
struct nf_conntrack_tuple *tuple)
{
struct nfattr *tb[CTA_PROTO_MAX];
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
int ret = 0;
nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
@@ -503,12 +514,12 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
return -EINVAL;
tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
- proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
- if (likely(proto->nfattr_to_tuple))
- ret = proto->nfattr_to_tuple(tb, tuple);
+ if (likely(l4proto->nfattr_to_tuple))
+ ret = l4proto->nfattr_to_tuple(tb, tuple);
- nf_ct_proto_put(proto);
+ nf_ct_l4proto_put(l4proto);
return ret;
}
@@ -549,28 +560,28 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple,
return 0;
}
-#ifdef CONFIG_IP_NF_NAT_NEEDED
+#ifdef CONFIG_NF_NAT_NEEDED
static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
[CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
[CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
};
-static int ctnetlink_parse_nat_proto(struct nfattr *attr,
+static int nfnetlink_parse_nat_proto(struct nfattr *attr,
const struct nf_conn *ct,
- struct ip_nat_range *range)
+ struct nf_nat_range *range)
{
struct nfattr *tb[CTA_PROTONAT_MAX];
- struct ip_nat_protocol *npt;
+ struct nf_nat_protocol *npt;
nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
return -EINVAL;
- npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+ npt = nf_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
if (!npt->nfattr_to_range) {
- ip_nat_proto_put(npt);
+ nf_nat_proto_put(npt);
return 0;
}
@@ -578,7 +589,7 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
if (npt->nfattr_to_range(tb, range) > 0)
range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- ip_nat_proto_put(npt);
+ nf_nat_proto_put(npt);
return 0;
}
@@ -589,8 +600,8 @@ static const size_t cta_min_nat[CTA_NAT_MAX] = {
};
static inline int
-ctnetlink_parse_nat(struct nfattr *nat,
- const struct nf_conn *ct, struct ip_nat_range *range)
+nfnetlink_parse_nat(struct nfattr *nat,
+ const struct nf_conn *ct, struct nf_nat_range *range)
{
struct nfattr *tb[CTA_NAT_MAX];
int err;
@@ -603,12 +614,12 @@ ctnetlink_parse_nat(struct nfattr *nat,
return -EINVAL;
if (tb[CTA_NAT_MINIP-1])
- range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+ range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
if (!tb[CTA_NAT_MAXIP-1])
range->max_ip = range->min_ip;
else
- range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+ range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
if (range->min_ip)
range->flags |= IP_NAT_RANGE_MAP_IPS;
@@ -616,7 +627,7 @@ ctnetlink_parse_nat(struct nfattr *nat,
if (!tb[CTA_NAT_PROTO-1])
return 0;
- err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
+ err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
if (err < 0)
return err;
@@ -681,7 +692,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
ct = nf_ct_tuplehash_to_ctrack(h);
if (cda[CTA_ID-1]) {
- u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+ u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
if (ct->id != id) {
nf_ct_put(ct);
return -ENOENT;
@@ -751,7 +762,6 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
nf_ct_put(ct);
return -ENOMEM;
}
- NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
IPCTNL_MSG_CT_NEW, 1, ct);
@@ -775,7 +785,7 @@ static inline int
ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[])
{
unsigned long d;
- unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
+ unsigned int status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
d = ct->status ^ status;
if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -792,35 +802,35 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[])
return -EINVAL;
if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-#ifndef CONFIG_IP_NF_NAT_NEEDED
+#ifndef CONFIG_NF_NAT_NEEDED
return -EINVAL;
#else
- struct ip_nat_range range;
+ struct nf_nat_range range;
if (cda[CTA_NAT_DST-1]) {
- if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
+ if (nfnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
&range) < 0)
return -EINVAL;
- if (ip_nat_initialized(ct,
+ if (nf_nat_initialized(ct,
HOOK2MANIP(NF_IP_PRE_ROUTING)))
return -EEXIST;
- ip_nat_setup_info(ct, &range, hooknum);
+ nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
}
if (cda[CTA_NAT_SRC-1]) {
- if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
+ if (nfnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
&range) < 0)
return -EINVAL;
- if (ip_nat_initialized(ct,
+ if (nf_nat_initialized(ct,
HOOK2MANIP(NF_IP_POST_ROUTING)))
return -EEXIST;
- ip_nat_setup_info(ct, &range, hooknum);
+ nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
}
#endif
}
/* Be careful here, modifying NAT bits can screw up things,
* so don't let users modify them directly if they don't pass
- * ip_nat_range. */
+ * nf_nat_range. */
ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
return 0;
}
@@ -874,7 +884,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
static inline int
ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[])
{
- u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+ u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
if (!del_timer(&ct->timeout))
return -ETIME;
@@ -889,18 +899,18 @@ static inline int
ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[])
{
struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
int err = 0;
nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
- proto = nf_ct_proto_find_get(l3num, npt);
+ l4proto = nf_ct_l4proto_find_get(l3num, npt);
- if (proto->from_nfattr)
- err = proto->from_nfattr(tb, ct);
- nf_ct_proto_put(proto);
+ if (l4proto->from_nfattr)
+ err = l4proto->from_nfattr(tb, ct);
+ nf_ct_l4proto_put(l4proto);
return err;
}
@@ -936,7 +946,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[])
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+ ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
#endif
return 0;
@@ -949,6 +959,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
{
struct nf_conn *ct;
int err = -EINVAL;
+ struct nf_conn_help *help;
ct = nf_conntrack_alloc(otuple, rtuple);
if (ct == NULL || IS_ERR(ct))
@@ -956,14 +967,16 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
if (!cda[CTA_TIMEOUT-1])
goto err;
- ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+ ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
ct->status |= IPS_CONFIRMED;
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- goto err;
+ if (cda[CTA_STATUS-1]) {
+ err = ctnetlink_change_status(ct, cda);
+ if (err < 0)
+ goto err;
+ }
if (cda[CTA_PROTOINFO-1]) {
err = ctnetlink_change_protoinfo(ct, cda);
@@ -973,12 +986,19 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+ ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
#endif
+ help = nfct_help(ct);
+ if (help)
+ help->helper = nf_ct_helper_find_get(rtuple);
+
add_timer(&ct->timeout);
nf_conntrack_hash_insert(ct);
+ if (help && help->helper)
+ nf_ct_helper_put(help->helper);
+
return 0;
err:
@@ -1072,7 +1092,7 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb,
{
int ret;
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
@@ -1082,9 +1102,9 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb,
if (unlikely(ret < 0))
goto nfattr_failure;
- proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, mask, proto);
- nf_ct_proto_put(proto);
+ l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ ret = ctnetlink_dump_tuples_proto(skb, mask, l4proto);
+ nf_ct_l4proto_put(l4proto);
if (unlikely(ret < 0))
goto nfattr_failure;
@@ -1101,8 +1121,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
struct nf_conn *master = exp->master;
- u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
- u_int32_t id = htonl(exp->id);
+ __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+ __be32 id = htonl(exp->id);
if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
goto nfattr_failure;
@@ -1275,12 +1295,12 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- exp = nf_conntrack_expect_find(&tuple);
+ exp = nf_conntrack_expect_find_get(&tuple);
if (!exp)
return -ENOENT;
if (cda[CTA_EXPECT_ID-1]) {
- u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+ __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
if (exp->id != ntohl(id)) {
nf_conntrack_expect_put(exp);
return -ENOENT;
@@ -1291,8 +1311,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
goto out;
- NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
-
+
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1, exp);
@@ -1331,13 +1350,12 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
/* bump usage count to 2 */
- exp = nf_conntrack_expect_find(&tuple);
+ exp = nf_conntrack_expect_find_get(&tuple);
if (!exp)
return -ENOENT;
if (cda[CTA_EXPECT_ID-1]) {
- u_int32_t id =
- *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+ __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
if (exp->id != ntohl(id)) {
nf_conntrack_expect_put(exp);
return -ENOENT;
@@ -1433,6 +1451,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
exp->expectfn = NULL;
exp->flags = 0;
exp->master = ct;
+ exp->helper = NULL;
memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
@@ -1529,6 +1548,7 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = {
.cb = ctnl_exp_cb,
};
+MODULE_ALIAS("ip_conntrack_netlink");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
new file mode 100644
index 00000000000..f0ff00e0d05
--- /dev/null
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -0,0 +1,607 @@
+/*
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * Limitations:
+ * - We blindly assume that control connections are always
+ * established in PNS->PAC direction. This is a violation
+ * of RFFC2673
+ * - We can only support one single call within each session
+ * TODO:
+ * - testing of incoming PPTP calls
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+MODULE_ALIAS("ip_conntrack_pptp");
+
+static DEFINE_SPINLOCK(nf_pptp_lock);
+
+int
+(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
+
+int
+(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound);
+
+void
+(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *expect_orig,
+ struct nf_conntrack_expect *expect_reply)
+ __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_exp_gre);
+
+void
+(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
+
+#if 0
+/* PptpControlMessageType names */
+const char *pptp_msg_name[] = {
+ "UNKNOWN_MESSAGE",
+ "START_SESSION_REQUEST",
+ "START_SESSION_REPLY",
+ "STOP_SESSION_REQUEST",
+ "STOP_SESSION_REPLY",
+ "ECHO_REQUEST",
+ "ECHO_REPLY",
+ "OUT_CALL_REQUEST",
+ "OUT_CALL_REPLY",
+ "IN_CALL_REQUEST",
+ "IN_CALL_REPLY",
+ "IN_CALL_CONNECT",
+ "CALL_CLEAR_REQUEST",
+ "CALL_DISCONNECT_NOTIFY",
+ "WAN_ERROR_NOTIFY",
+ "SET_LINK_INFO"
+};
+EXPORT_SYMBOL(pptp_msg_name);
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT (10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
+
+static void pptp_expectfn(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
+ DEBUGP("increasing timeouts\n");
+
+ /* increase timeout of GRE data channel conntrack entry */
+ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
+ ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+ /* Can you see how rusty this code is, compared with the pre-2.6.11
+ * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+ rcu_read_lock();
+ nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
+ if (nf_nat_pptp_expectfn && ct->status & IPS_NAT_MASK)
+ nf_nat_pptp_expectfn(ct, exp);
+ else {
+ struct nf_conntrack_tuple inv_t;
+ struct nf_conntrack_expect *exp_other;
+
+ /* obviously this tuple inversion only works until you do NAT */
+ nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
+ DEBUGP("trying to unexpect other dir: ");
+ NF_CT_DUMP_TUPLE(&inv_t);
+
+ exp_other = nf_conntrack_expect_find_get(&inv_t);
+ if (exp_other) {
+ /* delete other expectation. */
+ DEBUGP("found\n");
+ nf_conntrack_unexpect_related(exp_other);
+ nf_conntrack_expect_put(exp_other);
+ } else {
+ DEBUGP("not found\n");
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_expect *exp;
+ struct nf_conn *sibling;
+
+ DEBUGP("trying to timeout ct or exp for tuple ");
+ NF_CT_DUMP_TUPLE(t);
+
+ h = nf_conntrack_find_get(t, NULL);
+ if (h) {
+ sibling = nf_ct_tuplehash_to_ctrack(h);
+ DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
+ sibling->proto.gre.timeout = 0;
+ sibling->proto.gre.stream_timeout = 0;
+ if (del_timer(&sibling->timeout))
+ sibling->timeout.function((unsigned long)sibling);
+ nf_ct_put(sibling);
+ return 1;
+ } else {
+ exp = nf_conntrack_expect_find_get(t);
+ if (exp) {
+ DEBUGP("unexpect_related of expect %p\n", exp);
+ nf_conntrack_unexpect_related(exp);
+ nf_conntrack_expect_put(exp);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct nf_conn *ct)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_tuple t;
+
+ nf_ct_gre_keymap_destroy(ct);
+
+ /* try original (pns->pac) tuple */
+ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+ t.dst.protonum = IPPROTO_GRE;
+ t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
+ t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
+ if (!destroy_sibling_or_exp(&t))
+ DEBUGP("failed to timeout original pns->pac ct/exp\n");
+
+ /* try reply (pac->pns) tuple */
+ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+ t.dst.protonum = IPPROTO_GRE;
+ t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
+ t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
+ if (!destroy_sibling_or_exp(&t))
+ DEBUGP("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
+{
+ struct nf_conntrack_expect *exp_orig, *exp_reply;
+ enum ip_conntrack_dir dir;
+ int ret = 1;
+ typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre;
+
+ exp_orig = nf_conntrack_expect_alloc(ct);
+ if (exp_orig == NULL)
+ goto out;
+
+ exp_reply = nf_conntrack_expect_alloc(ct);
+ if (exp_reply == NULL)
+ goto out_put_orig;
+
+ /* original direction, PNS->PAC */
+ dir = IP_CT_DIR_ORIGINAL;
+ nf_conntrack_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num,
+ &ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[dir].tuple.dst.u3,
+ IPPROTO_GRE, &peer_callid, &callid);
+ exp_orig->expectfn = pptp_expectfn;
+
+ /* reply direction, PAC->PNS */
+ dir = IP_CT_DIR_REPLY;
+ nf_conntrack_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num,
+ &ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[dir].tuple.dst.u3,
+ IPPROTO_GRE, &callid, &peer_callid);
+ exp_reply->expectfn = pptp_expectfn;
+
+ nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
+ if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
+ nf_nat_pptp_exp_gre(exp_orig, exp_reply);
+ if (nf_conntrack_expect_related(exp_orig) != 0)
+ goto out_put_both;
+ if (nf_conntrack_expect_related(exp_reply) != 0)
+ goto out_unexpect_orig;
+
+ /* Add GRE keymap entries */
+ if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0)
+ goto out_unexpect_both;
+ if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) {
+ nf_ct_gre_keymap_destroy(ct);
+ goto out_unexpect_both;
+ }
+ ret = 0;
+
+out_put_both:
+ nf_conntrack_expect_put(exp_reply);
+out_put_orig:
+ nf_conntrack_expect_put(exp_orig);
+out:
+ return ret;
+
+out_unexpect_both:
+ nf_conntrack_unexpect_related(exp_reply);
+out_unexpect_orig:
+ nf_conntrack_unexpect_related(exp_orig);
+ goto out_put_both;
+}
+
+static inline int
+pptp_inbound_pkt(struct sk_buff **pskb,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
+ u_int16_t msg;
+ __be16 cid = 0, pcid = 0;
+ typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
+
+ msg = ntohs(ctlh->messageType);
+ DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
+
+ switch (msg) {
+ case PPTP_START_SESSION_REPLY:
+ /* server confirms new control session */
+ if (info->sstate < PPTP_SESSION_REQUESTED)
+ goto invalid;
+ if (pptpReq->srep.resultCode == PPTP_START_OK)
+ info->sstate = PPTP_SESSION_CONFIRMED;
+ else
+ info->sstate = PPTP_SESSION_ERROR;
+ break;
+
+ case PPTP_STOP_SESSION_REPLY:
+ /* server confirms end of control session */
+ if (info->sstate > PPTP_SESSION_STOPREQ)
+ goto invalid;
+ if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+ info->sstate = PPTP_SESSION_NONE;
+ else
+ info->sstate = PPTP_SESSION_ERROR;
+ break;
+
+ case PPTP_OUT_CALL_REPLY:
+ /* server accepted call, we now expect GRE frames */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ if (info->cstate != PPTP_CALL_OUT_REQ &&
+ info->cstate != PPTP_CALL_OUT_CONF)
+ goto invalid;
+
+ cid = pptpReq->ocack.callID;
+ pcid = pptpReq->ocack.peersCallID;
+ if (info->pns_call_id != pcid)
+ goto invalid;
+ DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+ info->cstate = PPTP_CALL_OUT_CONF;
+ info->pac_call_id = cid;
+ exp_gre(ct, cid, pcid);
+ } else
+ info->cstate = PPTP_CALL_NONE;
+ break;
+
+ case PPTP_IN_CALL_REQUEST:
+ /* server tells us about incoming call request */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+
+ cid = pptpReq->icreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->cstate = PPTP_CALL_IN_REQ;
+ info->pac_call_id = cid;
+ break;
+
+ case PPTP_IN_CALL_CONNECT:
+ /* server tells us about incoming call established */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ if (info->cstate != PPTP_CALL_IN_REP &&
+ info->cstate != PPTP_CALL_IN_CONF)
+ goto invalid;
+
+ pcid = pptpReq->iccon.peersCallID;
+ cid = info->pac_call_id;
+
+ if (info->pns_call_id != pcid)
+ goto invalid;
+
+ DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+ info->cstate = PPTP_CALL_IN_CONF;
+
+ /* we expect a GRE connection from PAC to PNS */
+ exp_gre(ct, cid, pcid);
+ break;
+
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ /* server confirms disconnect */
+ cid = pptpReq->disc.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->cstate = PPTP_CALL_NONE;
+
+ /* untrack this call id, unexpect GRE packets */
+ pptp_destroy_siblings(ct);
+ break;
+
+ case PPTP_WAN_ERROR_NOTIFY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* I don't have to explain these ;) */
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
+ if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
+ return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
+ return NF_ACCEPT;
+
+invalid:
+ DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+ return NF_ACCEPT;
+}
+
+static inline int
+pptp_outbound_pkt(struct sk_buff **pskb,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq,
+ unsigned int reqlen,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
+ u_int16_t msg;
+ __be16 cid = 0, pcid = 0;
+ typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
+
+ msg = ntohs(ctlh->messageType);
+ DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
+
+ switch (msg) {
+ case PPTP_START_SESSION_REQUEST:
+ /* client requests for new control session */
+ if (info->sstate != PPTP_SESSION_NONE)
+ goto invalid;
+ info->sstate = PPTP_SESSION_REQUESTED;
+ break;
+
+ case PPTP_STOP_SESSION_REQUEST:
+ /* client requests end of control session */
+ info->sstate = PPTP_SESSION_STOPREQ;
+ break;
+
+ case PPTP_OUT_CALL_REQUEST:
+ /* client initiating connection to server */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ info->cstate = PPTP_CALL_OUT_REQ;
+ /* track PNS call id */
+ cid = pptpReq->ocreq.callID;
+ DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ info->pns_call_id = cid;
+ break;
+
+ case PPTP_IN_CALL_REPLY:
+ /* client answers incoming call */
+ if (info->cstate != PPTP_CALL_IN_REQ &&
+ info->cstate != PPTP_CALL_IN_REP)
+ goto invalid;
+
+ cid = pptpReq->icack.callID;
+ pcid = pptpReq->icack.peersCallID;
+ if (info->pac_call_id != pcid)
+ goto invalid;
+ DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+ ntohs(cid), ntohs(pcid));
+
+ if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+ /* part two of the three-way handshake */
+ info->cstate = PPTP_CALL_IN_REP;
+ info->pns_call_id = cid;
+ } else
+ info->cstate = PPTP_CALL_NONE;
+ break;
+
+ case PPTP_CALL_CLEAR_REQUEST:
+ /* client requests hangup of call */
+ if (info->sstate != PPTP_SESSION_CONFIRMED)
+ goto invalid;
+ /* FUTURE: iterate over all calls and check if
+ * call ID is valid. We don't do this without newnat,
+ * because we only know about last call */
+ info->cstate = PPTP_CALL_CLEAR_REQ;
+ break;
+
+ case PPTP_SET_LINK_INFO:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* I don't have to explain these ;) */
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
+ if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
+ return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
+ return NF_ACCEPT;
+
+invalid:
+ DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+ "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
+ ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+ return NF_ACCEPT;
+}
+
+static const unsigned int pptp_msg_size[] = {
+ [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
+ [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
+ [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
+ [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
+ [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
+ [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
+ [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
+ [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
+ [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
+ [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
+ [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+ [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
+ [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
+};
+
+/* track caller id inside control connection, call expect_related */
+static int
+conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+
+{
+ int dir = CTINFO2DIR(ctinfo);
+ struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info;
+ struct tcphdr _tcph, *tcph;
+ struct pptp_pkt_hdr _pptph, *pptph;
+ struct PptpControlHeader _ctlh, *ctlh;
+ union pptp_ctrl_union _pptpReq, *pptpReq;
+ unsigned int tcplen = (*pskb)->len - protoff;
+ unsigned int datalen, reqlen, nexthdr_off;
+ int oldsstate, oldcstate;
+ int ret;
+ u_int16_t msg;
+
+ /* don't do any tracking before tcp handshake complete */
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+ return NF_ACCEPT;
+
+ nexthdr_off = protoff;
+ tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+ BUG_ON(!tcph);
+ nexthdr_off += tcph->doff * 4;
+ datalen = tcplen - tcph->doff * 4;
+
+ pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+ if (!pptph) {
+ DEBUGP("no full PPTP header, can't track\n");
+ return NF_ACCEPT;
+ }
+ nexthdr_off += sizeof(_pptph);
+ datalen -= sizeof(_pptph);
+
+ /* if it's not a control message we can't do anything with it */
+ if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+ ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+ DEBUGP("not a control packet\n");
+ return NF_ACCEPT;
+ }
+
+ ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ if (!ctlh)
+ return NF_ACCEPT;
+ nexthdr_off += sizeof(_ctlh);
+ datalen -= sizeof(_ctlh);
+
+ reqlen = datalen;
+ msg = ntohs(ctlh->messageType);
+ if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+ return NF_ACCEPT;
+ if (reqlen > sizeof(*pptpReq))
+ reqlen = sizeof(*pptpReq);
+
+ pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ if (!pptpReq)
+ return NF_ACCEPT;
+
+ oldsstate = info->sstate;
+ oldcstate = info->cstate;
+
+ spin_lock_bh(&nf_pptp_lock);
+
+ /* FIXME: We just blindly assume that the control connection is always
+ * established from PNS->PAC. However, RFC makes no guarantee */
+ if (dir == IP_CT_DIR_ORIGINAL)
+ /* client -> server (PNS -> PAC) */
+ ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+ ctinfo);
+ else
+ /* server -> client (PAC -> PNS) */
+ ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+ ctinfo);
+ DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
+ oldsstate, info->sstate, oldcstate, info->cstate);
+ spin_unlock_bh(&nf_pptp_lock);
+
+ return ret;
+}
+
+/* control protocol helper */
+static struct nf_conntrack_helper pptp __read_mostly = {
+ .name = "pptp",
+ .me = THIS_MODULE,
+ .max_expected = 2,
+ .timeout = 5 * 60,
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT),
+ .tuple.dst.protonum = IPPROTO_TCP,
+ .mask.src.l3num = 0xffff,
+ .mask.src.u.tcp.port = __constant_htons(0xffff),
+ .mask.dst.protonum = 0xff,
+ .help = conntrack_pptp_help,
+ .destroy = pptp_destroy_siblings,
+};
+
+static int __init nf_conntrack_pptp_init(void)
+{
+ return nf_conntrack_helper_register(&pptp);
+}
+
+static void __exit nf_conntrack_pptp_fini(void)
+{
+ nf_conntrack_helper_unregister(&pptp);
+ nf_ct_gre_keymap_flush();
+}
+
+module_init(nf_conntrack_pptp_init);
+module_exit(nf_conntrack_pptp_fini);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
new file mode 100644
index 00000000000..1a61b72712c
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -0,0 +1,410 @@
+/* L3/L4 protocol support for nf_conntrack. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/stddef.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_l3protos);
+
+#ifdef CONFIG_SYSCTL
+static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex);
+
+static int
+nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
+ struct ctl_table *table, unsigned int *users)
+{
+ if (*header == NULL) {
+ *header = nf_register_sysctl_table(path, table);
+ if (*header == NULL)
+ return -ENOMEM;
+ }
+ if (users != NULL)
+ (*users)++;
+ return 0;
+}
+
+static void
+nf_ct_unregister_sysctl(struct ctl_table_header **header,
+ struct ctl_table *table, unsigned int *users)
+{
+ if (users != NULL && --*users > 0)
+ return;
+ nf_unregister_sysctl_table(*header, table);
+ *header = NULL;
+}
+#endif
+
+struct nf_conntrack_l4proto *
+__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
+{
+ if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
+ return &nf_conntrack_l4proto_generic;
+
+ return nf_ct_protos[l3proto][l4proto];
+}
+EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
+
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct nf_conntrack_l4proto *
+nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto)
+{
+ struct nf_conntrack_l4proto *p;
+
+ preempt_disable();
+ p = __nf_ct_l4proto_find(l3proto, l4proto);
+ if (!try_module_get(p->me))
+ p = &nf_conntrack_l4proto_generic;
+ preempt_enable();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
+
+void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
+
+struct nf_conntrack_l3proto *
+nf_ct_l3proto_find_get(u_int16_t l3proto)
+{
+ struct nf_conntrack_l3proto *p;
+
+ preempt_disable();
+ p = __nf_ct_l3proto_find(l3proto);
+ if (!try_module_get(p->me))
+ p = &nf_conntrack_l3proto_generic;
+ preempt_enable();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
+
+void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_put);
+
+int
+nf_ct_l3proto_try_module_get(unsigned short l3proto)
+{
+ int ret;
+ struct nf_conntrack_l3proto *p;
+
+retry: p = nf_ct_l3proto_find_get(l3proto);
+ if (p == &nf_conntrack_l3proto_generic) {
+ ret = request_module("nf_conntrack-%d", l3proto);
+ if (!ret)
+ goto retry;
+
+ return -EPROTOTYPE;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
+
+void nf_ct_l3proto_module_put(unsigned short l3proto)
+{
+ struct nf_conntrack_l3proto *p;
+
+ preempt_disable();
+ p = __nf_ct_l3proto_find(l3proto);
+ preempt_enable();
+
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
+
+static int kill_l3proto(struct nf_conn *i, void *data)
+{
+ return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
+ ((struct nf_conntrack_l3proto *)data)->l3proto);
+}
+
+static int kill_l4proto(struct nf_conn *i, void *data)
+{
+ struct nf_conntrack_l4proto *l4proto;
+ l4proto = (struct nf_conntrack_l4proto *)data;
+ return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
+ l4proto->l4proto) &&
+ (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
+ l4proto->l3proto);
+}
+
+static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
+{
+ int err = 0;
+
+#ifdef CONFIG_SYSCTL
+ mutex_lock(&nf_ct_proto_sysctl_mutex);
+ if (l3proto->ctl_table != NULL) {
+ err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
+ l3proto->ctl_table_path,
+ l3proto->ctl_table, NULL);
+ }
+ mutex_unlock(&nf_ct_proto_sysctl_mutex);
+#endif
+ return err;
+}
+
+static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
+{
+#ifdef CONFIG_SYSCTL
+ mutex_lock(&nf_ct_proto_sysctl_mutex);
+ if (l3proto->ctl_table_header != NULL)
+ nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
+ l3proto->ctl_table, NULL);
+ mutex_unlock(&nf_ct_proto_sysctl_mutex);
+#endif
+}
+
+int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
+{
+ int ret = 0;
+
+ if (proto->l3proto >= AF_MAX) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ write_lock_bh(&nf_conntrack_lock);
+ if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ nf_ct_l3protos[proto->l3proto] = proto;
+ write_unlock_bh(&nf_conntrack_lock);
+
+ ret = nf_ct_l3proto_register_sysctl(proto);
+ if (ret < 0)
+ nf_conntrack_l3proto_unregister(proto);
+ return ret;
+
+out_unlock:
+ write_unlock_bh(&nf_conntrack_lock);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
+
+int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+{
+ int ret = 0;
+
+ if (proto->l3proto >= AF_MAX) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ write_lock_bh(&nf_conntrack_lock);
+ if (nf_ct_l3protos[proto->l3proto] != proto) {
+ write_unlock_bh(&nf_conntrack_lock);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ nf_ct_l3protos[proto->l3proto] = &nf_conntrack_l3proto_generic;
+ write_unlock_bh(&nf_conntrack_lock);
+
+ nf_ct_l3proto_unregister_sysctl(proto);
+
+ /* Somebody could be still looking at the proto in bh. */
+ synchronize_net();
+
+ /* Remove all contrack entries for this protocol */
+ nf_ct_iterate_cleanup(kill_l3proto, proto);
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
+
+static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
+{
+ int err = 0;
+
+#ifdef CONFIG_SYSCTL
+ mutex_lock(&nf_ct_proto_sysctl_mutex);
+ if (l4proto->ctl_table != NULL) {
+ err = nf_ct_register_sysctl(l4proto->ctl_table_header,
+ nf_net_netfilter_sysctl_path,
+ l4proto->ctl_table,
+ l4proto->ctl_table_users);
+ if (err < 0)
+ goto out;
+ }
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ if (l4proto->ctl_compat_table != NULL) {
+ err = nf_ct_register_sysctl(&l4proto->ctl_compat_table_header,
+ nf_net_ipv4_netfilter_sysctl_path,
+ l4proto->ctl_compat_table, NULL);
+ if (err == 0)
+ goto out;
+ nf_ct_unregister_sysctl(l4proto->ctl_table_header,
+ l4proto->ctl_table,
+ l4proto->ctl_table_users);
+ }
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+out:
+ mutex_unlock(&nf_ct_proto_sysctl_mutex);
+#endif /* CONFIG_SYSCTL */
+ return err;
+}
+
+static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
+{
+#ifdef CONFIG_SYSCTL
+ mutex_lock(&nf_ct_proto_sysctl_mutex);
+ if (l4proto->ctl_table_header != NULL &&
+ *l4proto->ctl_table_header != NULL)
+ nf_ct_unregister_sysctl(l4proto->ctl_table_header,
+ l4proto->ctl_table,
+ l4proto->ctl_table_users);
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ if (l4proto->ctl_compat_table_header != NULL)
+ nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
+ l4proto->ctl_compat_table, NULL);
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+ mutex_unlock(&nf_ct_proto_sysctl_mutex);
+#endif /* CONFIG_SYSCTL */
+}
+
+/* FIXME: Allow NULL functions and sub in pointers to generic for
+ them. --RR */
+int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
+{
+ int ret = 0;
+
+ if (l4proto->l3proto >= PF_MAX) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (l4proto == &nf_conntrack_l4proto_generic)
+ return nf_ct_l4proto_register_sysctl(l4proto);
+
+retry:
+ write_lock_bh(&nf_conntrack_lock);
+ if (nf_ct_protos[l4proto->l3proto]) {
+ if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
+ != &nf_conntrack_l4proto_generic) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ } else {
+ /* l3proto may be loaded latter. */
+ struct nf_conntrack_l4proto **proto_array;
+ int i;
+
+ write_unlock_bh(&nf_conntrack_lock);
+
+ proto_array = (struct nf_conntrack_l4proto **)
+ kmalloc(MAX_NF_CT_PROTO *
+ sizeof(struct nf_conntrack_l4proto *),
+ GFP_KERNEL);
+ if (proto_array == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < MAX_NF_CT_PROTO; i++)
+ proto_array[i] = &nf_conntrack_l4proto_generic;
+
+ write_lock_bh(&nf_conntrack_lock);
+ if (nf_ct_protos[l4proto->l3proto]) {
+ /* bad timing, but no problem */
+ write_unlock_bh(&nf_conntrack_lock);
+ kfree(proto_array);
+ } else {
+ nf_ct_protos[l4proto->l3proto] = proto_array;
+ write_unlock_bh(&nf_conntrack_lock);
+ }
+
+ /*
+ * Just once because array is never freed until unloading
+ * nf_conntrack.ko
+ */
+ goto retry;
+ }
+
+ nf_ct_protos[l4proto->l3proto][l4proto->l4proto] = l4proto;
+ write_unlock_bh(&nf_conntrack_lock);
+
+ ret = nf_ct_l4proto_register_sysctl(l4proto);
+ if (ret < 0)
+ nf_conntrack_l4proto_unregister(l4proto);
+ return ret;
+
+out_unlock:
+ write_unlock_bh(&nf_conntrack_lock);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
+
+int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
+{
+ int ret = 0;
+
+ if (l4proto->l3proto >= PF_MAX) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (l4proto == &nf_conntrack_l4proto_generic) {
+ nf_ct_l4proto_unregister_sysctl(l4proto);
+ goto out;
+ }
+
+ write_lock_bh(&nf_conntrack_lock);
+ if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
+ != l4proto) {
+ write_unlock_bh(&nf_conntrack_lock);
+ ret = -EBUSY;
+ goto out;
+ }
+ nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
+ = &nf_conntrack_l4proto_generic;
+ write_unlock_bh(&nf_conntrack_lock);
+
+ nf_ct_l4proto_unregister_sysctl(l4proto);
+
+ /* Somebody could be still looking at the proto in bh. */
+ synchronize_net();
+
+ /* Remove all contrack entries for this protocol */
+ nf_ct_iterate_cleanup(kill_l4proto, l4proto);
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 26408bb0955..69902531c23 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -15,9 +15,9 @@
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
-unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
static int generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
@@ -71,10 +71,42 @@ static int new(struct nf_conn *conntrack, const struct sk_buff *skb,
return 1;
}
-struct nf_conntrack_protocol nf_conntrack_generic_protocol =
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *generic_sysctl_header;
+static struct ctl_table generic_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT,
+ .procname = "nf_conntrack_generic_timeout",
+ .data = &nf_ct_generic_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table generic_compat_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
+ .procname = "ip_conntrack_generic_timeout",
+ .data = &nf_ct_generic_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
.l3proto = PF_UNSPEC,
- .proto = 0,
+ .l4proto = 0,
.name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
@@ -82,4 +114,11 @@ struct nf_conntrack_protocol nf_conntrack_generic_protocol =
.print_conntrack = generic_print_conntrack,
.packet = packet,
.new = new,
+#ifdef CONFIG_SYSCTL
+ .ctl_table_header = &generic_sysctl_header,
+ .ctl_table = generic_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = generic_compat_sysctl_table,
+#endif
+#endif
};
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
new file mode 100644
index 00000000000..ac193ce7024
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -0,0 +1,305 @@
+/*
+ * ip_conntrack_proto_gre.c - Version 3.0
+ *
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/in.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define GRE_TIMEOUT (30 * HZ)
+#define GRE_STREAM_TIMEOUT (180 * HZ)
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+static DEFINE_RWLOCK(nf_ct_gre_lock);
+static LIST_HEAD(gre_keymap_list);
+
+void nf_ct_gre_keymap_flush(void)
+{
+ struct list_head *pos, *n;
+
+ write_lock_bh(&nf_ct_gre_lock);
+ list_for_each_safe(pos, n, &gre_keymap_list) {
+ list_del(pos);
+ kfree(pos);
+ }
+ write_unlock_bh(&nf_ct_gre_lock);
+}
+EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
+
+static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
+ const struct nf_conntrack_tuple *t)
+{
+ return km->tuple.src.l3num == t->src.l3num &&
+ !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) &&
+ !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) &&
+ km->tuple.dst.protonum == t->dst.protonum &&
+ km->tuple.dst.u.all == t->dst.u.all;
+}
+
+/* look up the source key for a given tuple */
+static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
+{
+ struct nf_ct_gre_keymap *km;
+ __be16 key = 0;
+
+ read_lock_bh(&nf_ct_gre_lock);
+ list_for_each_entry(km, &gre_keymap_list, list) {
+ if (gre_key_cmpfn(km, t)) {
+ key = km->tuple.src.u.gre.key;
+ break;
+ }
+ }
+ read_unlock_bh(&nf_ct_gre_lock);
+
+ DEBUGP("lookup src key 0x%x for ", key);
+ NF_CT_DUMP_TUPLE(t);
+
+ return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
+ struct nf_conntrack_tuple *t)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_ct_gre_keymap **kmp, *km;
+
+ BUG_ON(strcmp(help->helper->name, "pptp"));
+ kmp = &help->help.ct_pptp_info.keymap[dir];
+ if (*kmp) {
+ /* check whether it's a retransmission */
+ list_for_each_entry(km, &gre_keymap_list, list) {
+ if (gre_key_cmpfn(km, t) && km == *kmp)
+ return 0;
+ }
+ DEBUGP("trying to override keymap_%s for ct %p\n",
+ dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
+ return -EEXIST;
+ }
+
+ km = kmalloc(sizeof(*km), GFP_ATOMIC);
+ if (!km)
+ return -ENOMEM;
+ memcpy(&km->tuple, t, sizeof(*t));
+ *kmp = km;
+
+ DEBUGP("adding new entry %p: ", km);
+ NF_CT_DUMP_TUPLE(&km->tuple);
+
+ write_lock_bh(&nf_ct_gre_lock);
+ list_add_tail(&km->list, &gre_keymap_list);
+ write_unlock_bh(&nf_ct_gre_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
+
+/* destroy the keymap entries associated with specified master ct */
+void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ enum ip_conntrack_dir dir;
+
+ DEBUGP("entering for ct %p\n", ct);
+ BUG_ON(strcmp(help->helper->name, "pptp"));
+
+ write_lock_bh(&nf_ct_gre_lock);
+ for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
+ if (help->help.ct_pptp_info.keymap[dir]) {
+ DEBUGP("removing %p from list\n",
+ help->help.ct_pptp_info.keymap[dir]);
+ list_del(&help->help.ct_pptp_info.keymap[dir]->list);
+ kfree(help->help.ct_pptp_info.keymap[dir]);
+ help->help.ct_pptp_info.keymap[dir] = NULL;
+ }
+ }
+ write_unlock_bh(&nf_ct_gre_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* invert gre part of tuple */
+static int gre_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->dst.u.gre.key = orig->src.u.gre.key;
+ tuple->src.u.gre.key = orig->dst.u.gre.key;
+ return 1;
+}
+
+/* gre hdr info to tuple */
+static int gre_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ struct gre_hdr_pptp _pgrehdr, *pgrehdr;
+ __be16 srckey;
+ struct gre_hdr _grehdr, *grehdr;
+
+ /* first only delinearize old RFC1701 GRE header */
+ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+ if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+ /* try to behave like "nf_conntrack_proto_generic" */
+ tuple->src.u.all = 0;
+ tuple->dst.u.all = 0;
+ return 1;
+ }
+
+ /* PPTP header is variable length, only need up to the call_id field */
+ pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+ if (!pgrehdr)
+ return 1;
+
+ if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+ DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
+ return 0;
+ }
+
+ tuple->dst.u.gre.key = pgrehdr->call_id;
+ srckey = gre_keymap_lookup(tuple);
+ tuple->src.u.gre.key = srckey;
+
+ return 1;
+}
+
+/* print gre part of tuple */
+static int gre_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+}
+
+/* print private data for conntrack */
+static int gre_print_conntrack(struct seq_file *s,
+ const struct nf_conn *ct)
+{
+ return seq_printf(s, "timeout=%u, stream_timeout=%u ",
+ (ct->proto.gre.timeout / HZ),
+ (ct->proto.gre.stream_timeout / HZ));
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+static int gre_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ int pf,
+ unsigned int hooknum)
+{
+ /* If we've seen traffic both ways, this is a GRE connection.
+ * Extend timeout. */
+ if (ct->status & IPS_SEEN_REPLY) {
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ ct->proto.gre.stream_timeout);
+ /* Also, more likely to be important, and not a probe. */
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ nf_conntrack_event_cache(IPCT_STATUS, skb);
+ } else
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ ct->proto.gre.timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int gre_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ DEBUGP(": ");
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+ /* initialize to sane value. Ideally a conntrack helper
+ * (e.g. in case of pptp) is increasing them */
+ ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
+ ct->proto.gre.timeout = GRE_TIMEOUT;
+
+ return 1;
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory */
+static void gre_destroy(struct nf_conn *ct)
+{
+ struct nf_conn *master = ct->master;
+ DEBUGP(" entering\n");
+
+ if (!master)
+ DEBUGP("no master !?!\n");
+ else
+ nf_ct_gre_keymap_destroy(master);
+}
+
+/* protocol helper struct */
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
+ .l3proto = AF_INET,
+ .l4proto = IPPROTO_GRE,
+ .name = "gre",
+ .pkt_to_tuple = gre_pkt_to_tuple,
+ .invert_tuple = gre_invert_tuple,
+ .print_tuple = gre_print_tuple,
+ .print_conntrack = gre_print_conntrack,
+ .packet = gre_packet,
+ .new = gre_new,
+ .destroy = gre_destroy,
+ .me = THIS_MODULE,
+#if defined(CONFIG_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_NF_CONNTRACK_NETLINK_MODULE)
+ .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
+ .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
+#endif
+};
+
+static int __init nf_ct_proto_gre_init(void)
+{
+ return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+}
+
+static void nf_ct_proto_gre_fini(void)
+{
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+ nf_ct_gre_keymap_flush();
+}
+
+module_init(nf_ct_proto_gre_init);
+module_exit(nf_ct_proto_gre_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index af568777372..76e26366822 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -32,7 +32,8 @@
#include <linux/interrupt.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#if 0
#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
@@ -216,7 +217,7 @@ static int sctp_print_conntrack(struct seq_file *s,
for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \
offset < skb->len && \
(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
- offset += (htons(sch->length) + 3) & ~3, count++)
+ offset += (ntohs(sch->length) + 3) & ~3, count++)
/* Some validity checks to make sure the chunks are fine */
static int do_basic_checks(struct nf_conn *conntrack,
@@ -508,36 +509,10 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
return 1;
}
-struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = {
- .l3proto = PF_INET,
- .proto = IPPROTO_SCTP,
- .name = "sctp",
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
- .print_conntrack = sctp_print_conntrack,
- .packet = sctp_packet,
- .new = sctp_new,
- .destroy = NULL,
- .me = THIS_MODULE
-};
-
-struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = {
- .l3proto = PF_INET6,
- .proto = IPPROTO_SCTP,
- .name = "sctp",
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
- .print_conntrack = sctp_print_conntrack,
- .packet = sctp_packet,
- .new = sctp_new,
- .destroy = NULL,
- .me = THIS_MODULE
-};
-
#ifdef CONFIG_SYSCTL
-static ctl_table nf_ct_sysctl_table[] = {
+static unsigned int sctp_sysctl_table_users;
+static struct ctl_table_header *sctp_sysctl_header;
+static struct ctl_table sctp_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
.procname = "nf_conntrack_sctp_timeout_closed",
@@ -594,63 +569,134 @@ static ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
- { .ctl_name = 0 }
+ {
+ .ctl_name = 0
+ }
};
-static ctl_table nf_ct_netfilter_table[] = {
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table sctp_compat_sysctl_table[] = {
{
- .ctl_name = NET_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = nf_ct_sysctl_table,
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
+ .procname = "ip_conntrack_sctp_timeout_closed",
+ .data = &nf_ct_sctp_timeout_closed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
+ .procname = "ip_conntrack_sctp_timeout_cookie_wait",
+ .data = &nf_ct_sctp_timeout_cookie_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
+ .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
+ .data = &nf_ct_sctp_timeout_cookie_echoed,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
+ .procname = "ip_conntrack_sctp_timeout_established",
+ .data = &nf_ct_sctp_timeout_established,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
+ .data = &nf_ct_sctp_timeout_shutdown_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
+ .data = &nf_ct_sctp_timeout_shutdown_recd,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
},
- { .ctl_name = 0 }
-};
-
-static ctl_table nf_ct_net_table[] = {
{
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = nf_ct_netfilter_table,
+ .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
+ .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
+ .data = &nf_ct_sctp_timeout_shutdown_ack_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
},
- { .ctl_name = 0 }
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_SCTP,
+ .name = "sctp",
+ .pkt_to_tuple = sctp_pkt_to_tuple,
+ .invert_tuple = sctp_invert_tuple,
+ .print_tuple = sctp_print_tuple,
+ .print_conntrack = sctp_print_conntrack,
+ .packet = sctp_packet,
+ .new = sctp_new,
+ .me = THIS_MODULE,
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &sctp_sysctl_table_users,
+ .ctl_table_header = &sctp_sysctl_header,
+ .ctl_table = sctp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = sctp_compat_sysctl_table,
+#endif
+#endif
};
-static struct ctl_table_header *nf_ct_sysctl_header;
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_SCTP,
+ .name = "sctp",
+ .pkt_to_tuple = sctp_pkt_to_tuple,
+ .invert_tuple = sctp_invert_tuple,
+ .print_tuple = sctp_print_tuple,
+ .print_conntrack = sctp_print_conntrack,
+ .packet = sctp_packet,
+ .new = sctp_new,
+ .me = THIS_MODULE,
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &sctp_sysctl_table_users,
+ .ctl_table_header = &sctp_sysctl_header,
+ .ctl_table = sctp_sysctl_table,
#endif
+};
int __init nf_conntrack_proto_sctp_init(void)
{
int ret;
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4);
if (ret) {
- printk("nf_conntrack_proto_sctp4: protocol register failed\n");
+ printk("nf_conntrack_l4proto_sctp4: protocol register failed\n");
goto out;
}
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6);
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6);
if (ret) {
- printk("nf_conntrack_proto_sctp6: protocol register failed\n");
+ printk("nf_conntrack_l4proto_sctp6: protocol register failed\n");
goto cleanup_sctp4;
}
-#ifdef CONFIG_SYSCTL
- nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
- if (nf_ct_sysctl_header == NULL) {
- printk("nf_conntrack_proto_sctp: can't register to sysctl.\n");
- goto cleanup;
- }
-#endif
-
return ret;
-#ifdef CONFIG_SYSCTL
- cleanup:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
-#endif
cleanup_sctp4:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
out:
DEBUGP("SCTP conntrack module loading %s\n",
ret ? "failed": "succeeded");
@@ -659,11 +705,8 @@ int __init nf_conntrack_proto_sctp_init(void)
void __exit nf_conntrack_proto_sctp_fini(void)
{
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(nf_ct_sysctl_header);
-#endif
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
DEBUGP("SCTP conntrack module unloaded\n");
}
@@ -673,3 +716,4 @@ module_exit(nf_conntrack_proto_sctp_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kiran Kumar Immidi");
MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
+MODULE_ALIAS("ip_conntrack_proto_sctp");
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 238bbb5b72e..626b0011dd8 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -42,7 +42,8 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#if 0
#define DEBUGP printk
@@ -92,22 +93,22 @@ static const char *tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
-unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
-unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS;
-unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
-unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
-unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
-unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
-unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS;
+static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
+static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
+static unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS;
+static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
+static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
+static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
+static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
+static unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS;
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
-unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
+static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
-static unsigned int * tcp_timeouts[]
-= { NULL, /* TCP_CONNTRACK_NONE */
+static unsigned int * tcp_timeouts[] = {
+ NULL, /* TCP_CONNTRACK_NONE */
&nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
&nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
&nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
@@ -473,8 +474,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__u32 *)ptr ==
- __constant_ntohl((TCPOPT_NOP << 24)
+ && *(__be32 *)ptr ==
+ __constant_htonl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
| TCPOLEN_TIMESTAMP))
@@ -505,9 +506,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
for (i = 0;
i < (opsize - TCPOLEN_SACK_BASE);
i += TCPOLEN_SACK_PERBLOCK) {
- memcpy(&tmp, (__u32 *)(ptr + i) + 1,
- sizeof(__u32));
- tmp = ntohl(tmp);
+ tmp = ntohl(*((__be32 *)(ptr+i)+1));
if (after(tmp, *sack))
*sack = tmp;
@@ -731,7 +730,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
return res;
}
-#ifdef CONFIG_IP_NF_NAT_NEEDED
+#ifdef CONFIG_NF_NAT_NEEDED
/* Update sender->td_end after NAT successfully mangled the packet */
/* Caller must linearize skb at tcp header. */
void nf_conntrack_tcp_update(struct sk_buff *skb,
@@ -763,7 +762,7 @@ void nf_conntrack_tcp_update(struct sk_buff *skb,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
}
-
+EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
#endif
#define TH_FIN 0x01
@@ -1167,11 +1166,221 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
return 0;
}
#endif
-
-struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
+
+#ifdef CONFIG_SYSCTL
+static unsigned int tcp_sysctl_table_users;
+static struct ctl_table_header *tcp_sysctl_header;
+static struct ctl_table tcp_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
+ .procname = "nf_conntrack_tcp_timeout_syn_sent",
+ .data = &nf_ct_tcp_timeout_syn_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
+ .procname = "nf_conntrack_tcp_timeout_syn_recv",
+ .data = &nf_ct_tcp_timeout_syn_recv,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
+ .procname = "nf_conntrack_tcp_timeout_established",
+ .data = &nf_ct_tcp_timeout_established,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
+ .procname = "nf_conntrack_tcp_timeout_fin_wait",
+ .data = &nf_ct_tcp_timeout_fin_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
+ .procname = "nf_conntrack_tcp_timeout_close_wait",
+ .data = &nf_ct_tcp_timeout_close_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
+ .procname = "nf_conntrack_tcp_timeout_last_ack",
+ .data = &nf_ct_tcp_timeout_last_ack,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
+ .procname = "nf_conntrack_tcp_timeout_time_wait",
+ .data = &nf_ct_tcp_timeout_time_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
+ .procname = "nf_conntrack_tcp_timeout_close",
+ .data = &nf_ct_tcp_timeout_close,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+ .procname = "nf_conntrack_tcp_timeout_max_retrans",
+ .data = &nf_ct_tcp_timeout_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
+ .procname = "nf_conntrack_tcp_loose",
+ .data = &nf_ct_tcp_loose,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
+ .procname = "nf_conntrack_tcp_be_liberal",
+ .data = &nf_ct_tcp_be_liberal,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
+ .procname = "nf_conntrack_tcp_max_retrans",
+ .data = &nf_ct_tcp_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table tcp_compat_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
+ .procname = "ip_conntrack_tcp_timeout_syn_sent",
+ .data = &nf_ct_tcp_timeout_syn_sent,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
+ .procname = "ip_conntrack_tcp_timeout_syn_recv",
+ .data = &nf_ct_tcp_timeout_syn_recv,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
+ .procname = "ip_conntrack_tcp_timeout_established",
+ .data = &nf_ct_tcp_timeout_established,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
+ .procname = "ip_conntrack_tcp_timeout_fin_wait",
+ .data = &nf_ct_tcp_timeout_fin_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
+ .procname = "ip_conntrack_tcp_timeout_close_wait",
+ .data = &nf_ct_tcp_timeout_close_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
+ .procname = "ip_conntrack_tcp_timeout_last_ack",
+ .data = &nf_ct_tcp_timeout_last_ack,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
+ .procname = "ip_conntrack_tcp_timeout_time_wait",
+ .data = &nf_ct_tcp_timeout_time_wait,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
+ .procname = "ip_conntrack_tcp_timeout_close",
+ .data = &nf_ct_tcp_timeout_close,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_timeout_max_retrans",
+ .data = &nf_ct_tcp_timeout_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
+ .procname = "ip_conntrack_tcp_loose",
+ .data = &nf_ct_tcp_loose,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
+ .procname = "ip_conntrack_tcp_be_liberal",
+ .data = &nf_ct_tcp_be_liberal,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
+ .procname = "ip_conntrack_tcp_max_retrans",
+ .data = &nf_ct_tcp_max_retrans,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
{
.l3proto = PF_INET,
- .proto = IPPROTO_TCP,
+ .l4proto = IPPROTO_TCP,
.name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
@@ -1187,12 +1396,21 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
.tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &tcp_sysctl_table_users,
+ .ctl_table_header = &tcp_sysctl_header,
+ .ctl_table = tcp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = tcp_compat_sysctl_table,
+#endif
+#endif
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
-struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
+struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
{
.l3proto = PF_INET6,
- .proto = IPPROTO_TCP,
+ .l4proto = IPPROTO_TCP,
.name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
@@ -1208,7 +1426,10 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
.tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &tcp_sysctl_table_users,
+ .ctl_table_header = &tcp_sysctl_header,
+ .ctl_table = tcp_sysctl_table,
+#endif
};
-
-EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
-EXPORT_SYMBOL(nf_conntrack_protocol_tcp6);
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index d28981cf9af..e49cd25998c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -22,13 +22,15 @@
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
#include <net/checksum.h>
+
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
-unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
-unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
+static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
+static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
@@ -146,10 +148,59 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
return NF_ACCEPT;
}
-struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
+#ifdef CONFIG_SYSCTL
+static unsigned int udp_sysctl_table_users;
+static struct ctl_table_header *udp_sysctl_header;
+static struct ctl_table udp_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT,
+ .procname = "nf_conntrack_udp_timeout",
+ .data = &nf_ct_udp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
+ .procname = "nf_conntrack_udp_timeout_stream",
+ .data = &nf_ct_udp_timeout_stream,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table udp_compat_sysctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
+ .procname = "ip_conntrack_udp_timeout",
+ .data = &nf_ct_udp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
+ .procname = "ip_conntrack_udp_timeout_stream",
+ .data = &nf_ct_udp_timeout_stream,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
{
.l3proto = PF_INET,
- .proto = IPPROTO_UDP,
+ .l4proto = IPPROTO_UDP,
.name = "udp",
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
@@ -163,12 +214,21 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
.tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &udp_sysctl_table_users,
+ .ctl_table_header = &udp_sysctl_header,
+ .ctl_table = udp_sysctl_table,
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ .ctl_compat_table = udp_compat_sysctl_table,
+#endif
+#endif
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
-struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
+struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
{
.l3proto = PF_INET6,
- .proto = IPPROTO_UDP,
+ .l4proto = IPPROTO_UDP,
.name = "udp",
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
@@ -182,7 +242,10 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
.tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
#endif
+#ifdef CONFIG_SYSCTL
+ .ctl_table_users = &udp_sysctl_table_users,
+ .ctl_table_header = &udp_sysctl_header,
+ .ctl_table = udp_sysctl_table,
+#endif
};
-
-EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
-EXPORT_SYMBOL(nf_conntrack_protocol_udp6);
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
new file mode 100644
index 00000000000..eb2a2411f97
--- /dev/null
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -0,0 +1,531 @@
+/* SIP extension for IP connection tracking.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_conntrack_ftp.c and other modules.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP connection tracking helper");
+MODULE_ALIAS("ip_conntrack_sip");
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of SIP servers");
+
+static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT;
+module_param(sip_timeout, uint, 0600);
+MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
+
+unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conn *ct,
+ const char **dptr) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
+
+unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_expect *exp,
+ const char *dptr) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sdp_hook);
+
+static int digits_len(struct nf_conn *, const char *, const char *, int *);
+static int epaddr_len(struct nf_conn *, const char *, const char *, int *);
+static int skp_digits_len(struct nf_conn *, const char *, const char *, int *);
+static int skp_epaddr_len(struct nf_conn *, const char *, const char *, int *);
+
+struct sip_header_nfo {
+ const char *lname;
+ const char *sname;
+ const char *ln_str;
+ size_t lnlen;
+ size_t snlen;
+ size_t ln_strlen;
+ int case_sensitive;
+ int (*match_len)(struct nf_conn *, const char *,
+ const char *, int *);
+};
+
+static const struct sip_header_nfo ct_sip_hdrs[] = {
+ [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */
+ .lname = "sip:",
+ .lnlen = sizeof("sip:") - 1,
+ .ln_str = ":",
+ .ln_strlen = sizeof(":") - 1,
+ .match_len = epaddr_len,
+ },
+ [POS_REQ_URI] = { /* SIP request URI */
+ .lname = "sip:",
+ .lnlen = sizeof("sip:") - 1,
+ .ln_str = "@",
+ .ln_strlen = sizeof("@") - 1,
+ .match_len = epaddr_len,
+ },
+ [POS_FROM] = { /* SIP From header */
+ .lname = "From:",
+ .lnlen = sizeof("From:") - 1,
+ .sname = "\r\nf:",
+ .snlen = sizeof("\r\nf:") - 1,
+ .ln_str = "sip:",
+ .ln_strlen = sizeof("sip:") - 1,
+ .match_len = skp_epaddr_len,
+ },
+ [POS_TO] = { /* SIP To header */
+ .lname = "To:",
+ .lnlen = sizeof("To:") - 1,
+ .sname = "\r\nt:",
+ .snlen = sizeof("\r\nt:") - 1,
+ .ln_str = "sip:",
+ .ln_strlen = sizeof("sip:") - 1,
+ .match_len = skp_epaddr_len
+ },
+ [POS_VIA] = { /* SIP Via header */
+ .lname = "Via:",
+ .lnlen = sizeof("Via:") - 1,
+ .sname = "\r\nv:",
+ .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */
+ .ln_str = "UDP ",
+ .ln_strlen = sizeof("UDP ") - 1,
+ .match_len = epaddr_len,
+ },
+ [POS_CONTACT] = { /* SIP Contact header */
+ .lname = "Contact:",
+ .lnlen = sizeof("Contact:") - 1,
+ .sname = "\r\nm:",
+ .snlen = sizeof("\r\nm:") - 1,
+ .ln_str = "sip:",
+ .ln_strlen = sizeof("sip:") - 1,
+ .match_len = skp_epaddr_len
+ },
+ [POS_CONTENT] = { /* SIP Content length header */
+ .lname = "Content-Length:",
+ .lnlen = sizeof("Content-Length:") - 1,
+ .sname = "\r\nl:",
+ .snlen = sizeof("\r\nl:") - 1,
+ .ln_str = ":",
+ .ln_strlen = sizeof(":") - 1,
+ .match_len = skp_digits_len
+ },
+ [POS_MEDIA] = { /* SDP media info */
+ .case_sensitive = 1,
+ .lname = "\nm=",
+ .lnlen = sizeof("\nm=") - 1,
+ .sname = "\rm=",
+ .snlen = sizeof("\rm=") - 1,
+ .ln_str = "audio ",
+ .ln_strlen = sizeof("audio ") - 1,
+ .match_len = digits_len
+ },
+ [POS_OWNER_IP4] = { /* SDP owner address*/
+ .case_sensitive = 1,
+ .lname = "\no=",
+ .lnlen = sizeof("\no=") - 1,
+ .sname = "\ro=",
+ .snlen = sizeof("\ro=") - 1,
+ .ln_str = "IN IP4 ",
+ .ln_strlen = sizeof("IN IP4 ") - 1,
+ .match_len = epaddr_len
+ },
+ [POS_CONNECTION_IP4] = {/* SDP connection info */
+ .case_sensitive = 1,
+ .lname = "\nc=",
+ .lnlen = sizeof("\nc=") - 1,
+ .sname = "\rc=",
+ .snlen = sizeof("\rc=") - 1,
+ .ln_str = "IN IP4 ",
+ .ln_strlen = sizeof("IN IP4 ") - 1,
+ .match_len = epaddr_len
+ },
+ [POS_OWNER_IP6] = { /* SDP owner address*/
+ .case_sensitive = 1,
+ .lname = "\no=",
+ .lnlen = sizeof("\no=") - 1,
+ .sname = "\ro=",
+ .snlen = sizeof("\ro=") - 1,
+ .ln_str = "IN IP6 ",
+ .ln_strlen = sizeof("IN IP6 ") - 1,
+ .match_len = epaddr_len
+ },
+ [POS_CONNECTION_IP6] = {/* SDP connection info */
+ .case_sensitive = 1,
+ .lname = "\nc=",
+ .lnlen = sizeof("\nc=") - 1,
+ .sname = "\rc=",
+ .snlen = sizeof("\rc=") - 1,
+ .ln_str = "IN IP6 ",
+ .ln_strlen = sizeof("IN IP6 ") - 1,
+ .match_len = epaddr_len
+ },
+ [POS_SDP_HEADER] = { /* SDP version header */
+ .case_sensitive = 1,
+ .lname = "\nv=",
+ .lnlen = sizeof("\nv=") - 1,
+ .sname = "\rv=",
+ .snlen = sizeof("\rv=") - 1,
+ .ln_str = "=",
+ .ln_strlen = sizeof("=") - 1,
+ .match_len = digits_len
+ }
+};
+
+/* get line lenght until first CR or LF seen. */
+int ct_sip_lnlen(const char *line, const char *limit)
+{
+ const char *k = line;
+
+ while ((line <= limit) && (*line == '\r' || *line == '\n'))
+ line++;
+
+ while (line <= limit) {
+ if (*line == '\r' || *line == '\n')
+ break;
+ line++;
+ }
+ return line - k;
+}
+EXPORT_SYMBOL_GPL(ct_sip_lnlen);
+
+/* Linear string search, case sensitive. */
+const char *ct_sip_search(const char *needle, const char *haystack,
+ size_t needle_len, size_t haystack_len,
+ int case_sensitive)
+{
+ const char *limit = haystack + (haystack_len - needle_len);
+
+ while (haystack <= limit) {
+ if (case_sensitive) {
+ if (strncmp(haystack, needle, needle_len) == 0)
+ return haystack;
+ } else {
+ if (strnicmp(haystack, needle, needle_len) == 0)
+ return haystack;
+ }
+ haystack++;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ct_sip_search);
+
+static int digits_len(struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ int len = 0;
+ while (dptr <= limit && isdigit(*dptr)) {
+ dptr++;
+ len++;
+ }
+ return len;
+}
+
+/* get digits lenght, skiping blank spaces. */
+static int skp_digits_len(struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ for (; dptr <= limit && *dptr == ' '; dptr++)
+ (*shift)++;
+
+ return digits_len(ct, dptr, limit, shift);
+}
+
+static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp,
+ union nf_conntrack_address *addr, const char *limit)
+{
+ const char *end;
+ int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ int ret = 0;
+
+ switch (family) {
+ case AF_INET:
+ ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+ break;
+ case AF_INET6:
+ ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+ break;
+ default:
+ BUG();
+ }
+
+ if (ret == 0 || end == cp)
+ return 0;
+ if (endp)
+ *endp = end;
+ return 1;
+}
+
+/* skip ip address. returns its length. */
+static int epaddr_len(struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ union nf_conntrack_address addr;
+ const char *aux = dptr;
+
+ if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+ DEBUGP("ip: %s parse failed.!\n", dptr);
+ return 0;
+ }
+
+ /* Port number */
+ if (*dptr == ':') {
+ dptr++;
+ dptr += digits_len(ct, dptr, limit, shift);
+ }
+ return dptr - aux;
+}
+
+/* get address length, skiping user info. */
+static int skp_epaddr_len(struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ int s = *shift;
+
+ for (; dptr <= limit && *dptr != '@'; dptr++)
+ (*shift)++;
+
+ if (*dptr == '@') {
+ dptr++;
+ (*shift)++;
+ } else
+ *shift = s;
+
+ return epaddr_len(ct, dptr, limit, shift);
+}
+
+/* Returns 0 if not found, -1 error parsing. */
+int ct_sip_get_info(struct nf_conn *ct,
+ const char *dptr, size_t dlen,
+ unsigned int *matchoff,
+ unsigned int *matchlen,
+ enum sip_header_pos pos)
+{
+ const struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
+ const char *limit, *aux, *k = dptr;
+ int shift = 0;
+
+ limit = dptr + (dlen - hnfo->lnlen);
+
+ while (dptr <= limit) {
+ if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
+ (strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
+ dptr++;
+ continue;
+ }
+ aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
+ ct_sip_lnlen(dptr, limit),
+ hnfo->case_sensitive);
+ if (!aux) {
+ DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
+ hnfo->lname);
+ return -1;
+ }
+ aux += hnfo->ln_strlen;
+
+ *matchlen = hnfo->match_len(ct, aux, limit, &shift);
+ if (!*matchlen)
+ return -1;
+
+ *matchoff = (aux - k) + shift;
+
+ DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
+ *matchlen);
+ return 1;
+ }
+ DEBUGP("%s header not found.\n", hnfo->lname);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ct_sip_get_info);
+
+static int set_expected_rtp(struct sk_buff **pskb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ union nf_conntrack_address *addr,
+ __be16 port,
+ const char *dptr)
+{
+ struct nf_conntrack_expect *exp;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ int family = ct->tuplehash[!dir].tuple.src.l3num;
+ int ret;
+ typeof(nf_nat_sdp_hook) nf_nat_sdp;
+
+ exp = nf_conntrack_expect_alloc(ct);
+ if (exp == NULL)
+ return NF_DROP;
+ nf_conntrack_expect_init(exp, family,
+ &ct->tuplehash[!dir].tuple.src.u3, addr,
+ IPPROTO_UDP, NULL, &port);
+
+ nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook);
+ if (nf_nat_sdp && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_sdp(pskb, ctinfo, exp, dptr);
+ else {
+ if (nf_conntrack_expect_related(exp) != 0)
+ ret = NF_DROP;
+ else
+ ret = NF_ACCEPT;
+ }
+ nf_conntrack_expect_put(exp);
+
+ return ret;
+}
+
+static int sip_help(struct sk_buff **pskb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ union nf_conntrack_address addr;
+ unsigned int dataoff, datalen;
+ const char *dptr;
+ int ret = NF_ACCEPT;
+ int matchoff, matchlen;
+ u_int16_t port;
+ enum sip_header_pos pos;
+ typeof(nf_nat_sip_hook) nf_nat_sip;
+
+ /* No Data ? */
+ dataoff = protoff + sizeof(struct udphdr);
+ if (dataoff >= (*pskb)->len)
+ return NF_ACCEPT;
+
+ nf_ct_refresh(ct, *pskb, sip_timeout * HZ);
+
+ if (!skb_is_nonlinear(*pskb))
+ dptr = (*pskb)->data + dataoff;
+ else {
+ DEBUGP("Copy of skbuff not supported yet.\n");
+ goto out;
+ }
+
+ nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
+ if (nf_nat_sip && ct->status & IPS_NAT_MASK) {
+ if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) {
+ ret = NF_DROP;
+ goto out;
+ }
+ }
+
+ datalen = (*pskb)->len - dataoff;
+ if (datalen < sizeof("SIP/2.0 200") - 1)
+ goto out;
+
+ /* RTP info only in some SDP pkts */
+ if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 &&
+ memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) {
+ goto out;
+ }
+ /* Get address and port from SDP packet. */
+ pos = family == AF_INET ? POS_CONNECTION_IP4 : POS_CONNECTION_IP6;
+ if (ct_sip_get_info(ct, dptr, datalen, &matchoff, &matchlen, pos) > 0) {
+
+ /* We'll drop only if there are parse problems. */
+ if (!parse_addr(ct, dptr + matchoff, NULL, &addr,
+ dptr + datalen)) {
+ ret = NF_DROP;
+ goto out;
+ }
+ if (ct_sip_get_info(ct, dptr, datalen, &matchoff, &matchlen,
+ POS_MEDIA) > 0) {
+
+ port = simple_strtoul(dptr + matchoff, NULL, 10);
+ if (port < 1024) {
+ ret = NF_DROP;
+ goto out;
+ }
+ ret = set_expected_rtp(pskb, ct, ctinfo, &addr,
+ htons(port), dptr);
+ }
+ }
+out:
+ return ret;
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly;
+static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
+
+static void nf_conntrack_sip_fini(void)
+{
+ int i, j;
+
+ for (i = 0; i < ports_c; i++) {
+ for (j = 0; j < 2; j++) {
+ if (sip[i][j].me == NULL)
+ continue;
+ nf_conntrack_helper_unregister(&sip[i][j]);
+ }
+ }
+}
+
+static int __init nf_conntrack_sip_init(void)
+{
+ int i, j, ret;
+ char *tmpname;
+
+ if (ports_c == 0)
+ ports[ports_c++] = SIP_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ memset(&sip[i], 0, sizeof(sip[i]));
+
+ sip[i][0].tuple.src.l3num = AF_INET;
+ sip[i][1].tuple.src.l3num = AF_INET6;
+ for (j = 0; j < 2; j++) {
+ sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
+ sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
+ sip[i][j].mask.src.l3num = 0xFFFF;
+ sip[i][j].mask.src.u.udp.port = htons(0xFFFF);
+ sip[i][j].mask.dst.protonum = 0xFF;
+ sip[i][j].max_expected = 2;
+ sip[i][j].timeout = 3 * 60; /* 3 minutes */
+ sip[i][j].me = THIS_MODULE;
+ sip[i][j].help = sip_help;
+
+ tmpname = &sip_names[i][j][0];
+ if (ports[i] == SIP_PORT)
+ sprintf(tmpname, "sip");
+ else
+ sprintf(tmpname, "sip-%u", i);
+ sip[i][j].name = tmpname;
+
+ DEBUGP("port #%u: %u\n", i, ports[i]);
+
+ ret = nf_conntrack_helper_register(&sip[i][j]);
+ if (ret) {
+ printk("nf_ct_sip: failed to register helper "
+ "for pf: %u port: %u\n",
+ sip[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_sip_fini();
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+module_init(nf_conntrack_sip_init);
+module_exit(nf_conntrack_sip_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5954f677381..f1cb60ff931 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,13 +29,11 @@
#include <linux/sysctl.h>
#endif
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#if 0
@@ -46,33 +44,15 @@
MODULE_LICENSE("GPL");
-extern atomic_t nf_conntrack_count;
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-
-static int kill_l3proto(struct nf_conn *i, void *data)
-{
- return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
- ((struct nf_conntrack_l3proto *)data)->l3proto);
-}
-
-static int kill_proto(struct nf_conn *i, void *data)
-{
- struct nf_conntrack_protocol *proto;
- proto = (struct nf_conntrack_protocol *)data;
- return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
- proto->proto) &&
- (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
- proto->l3proto);
-}
-
#ifdef CONFIG_PROC_FS
-static int
+int
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_protocol *proto)
+ struct nf_conntrack_l4proto *l4proto)
{
- return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple);
+ return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple);
}
+EXPORT_SYMBOL_GPL(print_tuple);
#ifdef CONFIG_NF_CT_ACCT
static unsigned int
@@ -150,9 +130,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
const struct nf_conntrack_tuple_hash *hash = v;
const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash);
struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_protocol *proto;
+ struct nf_conntrack_l4proto *l4proto;
- ASSERT_READ_LOCK(&nf_conntrack_lock);
NF_CT_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */
@@ -163,16 +142,16 @@ static int ct_seq_show(struct seq_file *s, void *v)
.tuple.src.l3num);
NF_CT_ASSERT(l3proto);
- proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ l4proto = __nf_ct_l4proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.src.l3num,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.protonum);
- NF_CT_ASSERT(proto);
+ NF_CT_ASSERT(l4proto);
if (seq_printf(s, "%-8s %u %-8s %u %ld ",
l3proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num,
- proto->name,
+ l4proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
timer_pending(&conntrack->timeout)
? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
@@ -181,11 +160,11 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (l3proto->print_conntrack(s, conntrack))
return -ENOSPC;
- if (proto->print_conntrack(s, conntrack))
+ if (l4proto->print_conntrack(s, conntrack))
return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- l3proto, proto))
+ l3proto, l4proto))
return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
@@ -196,7 +175,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
return -ENOSPC;
if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
- l3proto, proto))
+ l3proto, l4proto))
return -ENOSPC;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
@@ -258,84 +237,6 @@ static struct file_operations ct_file_ops = {
.release = seq_release_private,
};
-/* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct list_head *e = &nf_conntrack_expect_list;
- loff_t i;
-
- /* strange seq_file api calls stop even if we fail,
- * thus we need to grab lock since stop unlocks */
- read_lock_bh(&nf_conntrack_lock);
-
- if (list_empty(e))
- return NULL;
-
- for (i = 0; i <= *pos; i++) {
- e = e->next;
- if (e == &nf_conntrack_expect_list)
- return NULL;
- }
- return e;
-}
-
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct list_head *e = v;
-
- ++*pos;
- e = e->next;
-
- if (e == &nf_conntrack_expect_list)
- return NULL;
-
- return e;
-}
-
-static void exp_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&nf_conntrack_lock);
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
- struct nf_conntrack_expect *expect = v;
-
- if (expect->timeout.function)
- seq_printf(s, "%ld ", timer_pending(&expect->timeout)
- ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
- else
- seq_printf(s, "- ");
- seq_printf(s, "l3proto = %u proto=%u ",
- expect->tuple.src.l3num,
- expect->tuple.dst.protonum);
- print_tuple(s, &expect->tuple,
- __nf_ct_l3proto_find(expect->tuple.src.l3num),
- __nf_ct_proto_find(expect->tuple.src.l3num,
- expect->tuple.dst.protonum));
- return seq_putc(s, '\n');
-}
-
-static struct seq_operations exp_seq_ops = {
- .start = exp_seq_start,
- .next = exp_seq_next,
- .stop = exp_seq_stop,
- .show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &exp_seq_ops);
-}
-
-static struct file_operations exp_file_ops = {
- .owner = THIS_MODULE,
- .open = exp_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
int cpu;
@@ -428,34 +329,9 @@ static struct file_operations ct_cpu_seq_fops = {
/* Sysctl support */
int nf_conntrack_checksum __read_mostly = 1;
+EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
#ifdef CONFIG_SYSCTL
-
-/* From nf_conntrack_core.c */
-extern int nf_conntrack_max;
-extern unsigned int nf_conntrack_htable_size;
-
-/* From nf_conntrack_proto_tcp.c */
-extern unsigned int nf_ct_tcp_timeout_syn_sent;
-extern unsigned int nf_ct_tcp_timeout_syn_recv;
-extern unsigned int nf_ct_tcp_timeout_established;
-extern unsigned int nf_ct_tcp_timeout_fin_wait;
-extern unsigned int nf_ct_tcp_timeout_close_wait;
-extern unsigned int nf_ct_tcp_timeout_last_ack;
-extern unsigned int nf_ct_tcp_timeout_time_wait;
-extern unsigned int nf_ct_tcp_timeout_close;
-extern unsigned int nf_ct_tcp_timeout_max_retrans;
-extern int nf_ct_tcp_loose;
-extern int nf_ct_tcp_be_liberal;
-extern int nf_ct_tcp_max_retrans;
-
-/* From nf_conntrack_proto_udp.c */
-extern unsigned int nf_ct_udp_timeout;
-extern unsigned int nf_ct_udp_timeout_stream;
-
-/* From nf_conntrack_proto_generic.c */
-extern unsigned int nf_ct_generic_timeout;
-
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
@@ -496,94 +372,6 @@ static ctl_table nf_ct_sysctl_table[] = {
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
- .procname = "nf_conntrack_tcp_timeout_syn_sent",
- .data = &nf_ct_tcp_timeout_syn_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
- .procname = "nf_conntrack_tcp_timeout_syn_recv",
- .data = &nf_ct_tcp_timeout_syn_recv,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
- .procname = "nf_conntrack_tcp_timeout_established",
- .data = &nf_ct_tcp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
- .procname = "nf_conntrack_tcp_timeout_fin_wait",
- .data = &nf_ct_tcp_timeout_fin_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
- .procname = "nf_conntrack_tcp_timeout_close_wait",
- .data = &nf_ct_tcp_timeout_close_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
- .procname = "nf_conntrack_tcp_timeout_last_ack",
- .data = &nf_ct_tcp_timeout_last_ack,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
- .procname = "nf_conntrack_tcp_timeout_time_wait",
- .data = &nf_ct_tcp_timeout_time_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
- .procname = "nf_conntrack_tcp_timeout_close",
- .data = &nf_ct_tcp_timeout_close,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT,
- .procname = "nf_conntrack_udp_timeout",
- .data = &nf_ct_udp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
- .procname = "nf_conntrack_udp_timeout_stream",
- .data = &nf_ct_udp_timeout_stream,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT,
- .procname = "nf_conntrack_generic_timeout",
- .data = &nf_ct_generic_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
.ctl_name = NET_NF_CONNTRACK_LOG_INVALID,
.procname = "nf_conntrack_log_invalid",
.data = &nf_ct_log_invalid,
@@ -594,38 +382,6 @@ static ctl_table nf_ct_sysctl_table[] = {
.extra1 = &log_invalid_proto_min,
.extra2 = &log_invalid_proto_max,
},
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
- .procname = "nf_conntrack_tcp_timeout_max_retrans",
- .data = &nf_ct_tcp_timeout_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
- .procname = "nf_conntrack_tcp_loose",
- .data = &nf_ct_tcp_loose,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
- .procname = "nf_conntrack_tcp_be_liberal",
- .data = &nf_ct_tcp_be_liberal,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
- .procname = "nf_conntrack_tcp_max_retrans",
- .data = &nf_ct_tcp_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
{ .ctl_name = 0 }
};
@@ -659,109 +415,9 @@ static ctl_table nf_ct_net_table[] = {
},
{ .ctl_name = 0 }
};
-EXPORT_SYMBOL(nf_ct_log_invalid);
+EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
#endif /* CONFIG_SYSCTL */
-int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
-{
- int ret = 0;
-
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) {
- ret = -EBUSY;
- goto out;
- }
- nf_ct_l3protos[proto->l3proto] = proto;
-out:
- write_unlock_bh(&nf_conntrack_lock);
-
- return ret;
-}
-
-void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
-{
- write_lock_bh(&nf_conntrack_lock);
- nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto;
- write_unlock_bh(&nf_conntrack_lock);
-
- /* Somebody could be still looking at the proto in bh. */
- synchronize_net();
-
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(kill_l3proto, proto);
-}
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
- them. --RR */
-int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto)
-{
- int ret = 0;
-
-retry:
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_protos[proto->l3proto]) {
- if (nf_ct_protos[proto->l3proto][proto->proto]
- != &nf_conntrack_generic_protocol) {
- ret = -EBUSY;
- goto out_unlock;
- }
- } else {
- /* l3proto may be loaded latter. */
- struct nf_conntrack_protocol **proto_array;
- int i;
-
- write_unlock_bh(&nf_conntrack_lock);
-
- proto_array = (struct nf_conntrack_protocol **)
- kmalloc(MAX_NF_CT_PROTO *
- sizeof(struct nf_conntrack_protocol *),
- GFP_KERNEL);
- if (proto_array == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < MAX_NF_CT_PROTO; i++)
- proto_array[i] = &nf_conntrack_generic_protocol;
-
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_protos[proto->l3proto]) {
- /* bad timing, but no problem */
- write_unlock_bh(&nf_conntrack_lock);
- kfree(proto_array);
- } else {
- nf_ct_protos[proto->l3proto] = proto_array;
- write_unlock_bh(&nf_conntrack_lock);
- }
-
- /*
- * Just once because array is never freed until unloading
- * nf_conntrack.ko
- */
- goto retry;
- }
-
- nf_ct_protos[proto->l3proto][proto->proto] = proto;
-
-out_unlock:
- write_unlock_bh(&nf_conntrack_lock);
-out:
- return ret;
-}
-
-void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto)
-{
- write_lock_bh(&nf_conntrack_lock);
- nf_ct_protos[proto->l3proto][proto->proto]
- = &nf_conntrack_generic_protocol;
- write_unlock_bh(&nf_conntrack_lock);
-
- /* Somebody could be still looking at the proto in bh. */
- synchronize_net();
-
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(kill_proto, proto);
-}
-
static int __init nf_conntrack_standalone_init(void)
{
#ifdef CONFIG_PROC_FS
@@ -834,70 +490,4 @@ module_exit(nf_conntrack_standalone_fini);
void need_conntrack(void)
{
}
-
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
-EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
-#endif
-EXPORT_SYMBOL(nf_ct_l3proto_try_module_get);
-EXPORT_SYMBOL(nf_ct_l3proto_module_put);
-EXPORT_SYMBOL(nf_conntrack_l3proto_register);
-EXPORT_SYMBOL(nf_conntrack_l3proto_unregister);
-EXPORT_SYMBOL(nf_conntrack_protocol_register);
-EXPORT_SYMBOL(nf_conntrack_protocol_unregister);
-EXPORT_SYMBOL(nf_ct_invert_tuplepr);
-EXPORT_SYMBOL(nf_conntrack_destroyed);
-EXPORT_SYMBOL(need_conntrack);
-EXPORT_SYMBOL(nf_conntrack_helper_register);
-EXPORT_SYMBOL(nf_conntrack_helper_unregister);
-EXPORT_SYMBOL(nf_ct_iterate_cleanup);
-EXPORT_SYMBOL(__nf_ct_refresh_acct);
-EXPORT_SYMBOL(nf_ct_protos);
-EXPORT_SYMBOL(__nf_ct_proto_find);
-EXPORT_SYMBOL(nf_ct_proto_find_get);
-EXPORT_SYMBOL(nf_ct_proto_put);
-EXPORT_SYMBOL(nf_ct_l3proto_find_get);
-EXPORT_SYMBOL(nf_ct_l3proto_put);
-EXPORT_SYMBOL(nf_ct_l3protos);
-EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
-EXPORT_SYMBOL(nf_conntrack_expect_alloc);
-EXPORT_SYMBOL(nf_conntrack_expect_put);
-EXPORT_SYMBOL(nf_conntrack_expect_related);
-EXPORT_SYMBOL(nf_conntrack_unexpect_related);
-EXPORT_SYMBOL(nf_conntrack_tuple_taken);
-EXPORT_SYMBOL(nf_conntrack_htable_size);
-EXPORT_SYMBOL(nf_conntrack_lock);
-EXPORT_SYMBOL(nf_conntrack_hash);
-EXPORT_SYMBOL(nf_conntrack_untracked);
-EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-EXPORT_SYMBOL(nf_conntrack_tcp_update);
-#endif
-EXPORT_SYMBOL(__nf_conntrack_confirm);
-EXPORT_SYMBOL(nf_ct_get_tuple);
-EXPORT_SYMBOL(nf_ct_invert_tuple);
-EXPORT_SYMBOL(nf_conntrack_in);
-EXPORT_SYMBOL(__nf_conntrack_attach);
-EXPORT_SYMBOL(nf_conntrack_alloc);
-EXPORT_SYMBOL(nf_conntrack_free);
-EXPORT_SYMBOL(nf_conntrack_flush);
-EXPORT_SYMBOL(nf_ct_remove_expectations);
-EXPORT_SYMBOL(nf_ct_helper_find_get);
-EXPORT_SYMBOL(nf_ct_helper_put);
-EXPORT_SYMBOL(__nf_conntrack_helper_find_byname);
-EXPORT_SYMBOL(__nf_conntrack_find);
-EXPORT_SYMBOL(nf_ct_unlink_expect);
-EXPORT_SYMBOL(nf_conntrack_hash_insert);
-EXPORT_SYMBOL(__nf_conntrack_expect_find);
-EXPORT_SYMBOL(nf_conntrack_expect_find);
-EXPORT_SYMBOL(nf_conntrack_expect_list);
-#if defined(CONFIG_NF_CT_NETLINK) || \
- defined(CONFIG_NF_CT_NETLINK_MODULE)
-EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr);
-EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple);
-#endif
+EXPORT_SYMBOL_GPL(need_conntrack);
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
new file mode 100644
index 00000000000..f5bffe24b0a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -0,0 +1,160 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_tftp");
+
+#define MAX_PORTS 8
+static unsigned short ports[MAX_PORTS];
+static int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
+
+#if 0
+#define DEBUGP(format, args...) printk("%s:%s:" format, \
+ __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_expect *exp) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
+
+static int tftp_help(struct sk_buff **pskb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ struct tftphdr _tftph, *tfh;
+ struct nf_conntrack_expect *exp;
+ struct nf_conntrack_tuple *tuple;
+ unsigned int ret = NF_ACCEPT;
+ int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ typeof(nf_nat_tftp_hook) nf_nat_tftp;
+
+ tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr),
+ sizeof(_tftph), &_tftph);
+ if (tfh == NULL)
+ return NF_ACCEPT;
+
+ switch (ntohs(tfh->opcode)) {
+ case TFTP_OPCODE_READ:
+ case TFTP_OPCODE_WRITE:
+ /* RRQ and WRQ works the same way */
+ DEBUGP("");
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ exp = nf_conntrack_expect_alloc(ct);
+ if (exp == NULL)
+ return NF_DROP;
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ nf_conntrack_expect_init(exp, family,
+ &tuple->src.u3, &tuple->dst.u3,
+ IPPROTO_UDP,
+ NULL, &tuple->dst.u.udp.port);
+
+ DEBUGP("expect: ");
+ NF_CT_DUMP_TUPLE(&exp->tuple);
+ NF_CT_DUMP_TUPLE(&exp->mask);
+
+ nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
+ if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
+ ret = nf_nat_tftp(pskb, ctinfo, exp);
+ else if (nf_conntrack_expect_related(exp) != 0)
+ ret = NF_DROP;
+ nf_conntrack_expect_put(exp);
+ break;
+ case TFTP_OPCODE_DATA:
+ case TFTP_OPCODE_ACK:
+ DEBUGP("Data/ACK opcode\n");
+ break;
+ case TFTP_OPCODE_ERROR:
+ DEBUGP("Error opcode\n");
+ break;
+ default:
+ DEBUGP("Unknown opcode\n");
+ }
+ return ret;
+}
+
+static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly;
+static char tftp_names[MAX_PORTS][2][sizeof("tftp-65535")] __read_mostly;
+
+static void nf_conntrack_tftp_fini(void)
+{
+ int i, j;
+
+ for (i = 0; i < ports_c; i++) {
+ for (j = 0; j < 2; j++)
+ nf_conntrack_helper_unregister(&tftp[i][j]);
+ }
+}
+
+static int __init nf_conntrack_tftp_init(void)
+{
+ int i, j, ret;
+ char *tmpname;
+
+ if (ports_c == 0)
+ ports[ports_c++] = TFTP_PORT;
+
+ for (i = 0; i < ports_c; i++) {
+ memset(&tftp[i], 0, sizeof(tftp[i]));
+
+ tftp[i][0].tuple.src.l3num = AF_INET;
+ tftp[i][1].tuple.src.l3num = AF_INET6;
+ for (j = 0; j < 2; j++) {
+ tftp[i][j].tuple.dst.protonum = IPPROTO_UDP;
+ tftp[i][j].tuple.src.u.udp.port = htons(ports[i]);
+ tftp[i][j].mask.src.l3num = 0xFFFF;
+ tftp[i][j].mask.dst.protonum = 0xFF;
+ tftp[i][j].mask.src.u.udp.port = htons(0xFFFF);
+ tftp[i][j].max_expected = 1;
+ tftp[i][j].timeout = 5 * 60; /* 5 minutes */
+ tftp[i][j].me = THIS_MODULE;
+ tftp[i][j].help = tftp_help;
+
+ tmpname = &tftp_names[i][j][0];
+ if (ports[i] == TFTP_PORT)
+ sprintf(tmpname, "tftp");
+ else
+ sprintf(tmpname, "tftp-%u", i);
+ tftp[i][j].name = tmpname;
+
+ ret = nf_conntrack_helper_register(&tftp[i][j]);
+ if (ret) {
+ printk("nf_ct_tftp: failed to register helper "
+ "for pf: %u port: %u\n",
+ tftp[i][j].tuple.src.l3num, ports[i]);
+ nf_conntrack_tftp_fini();
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+module_init(nf_conntrack_tftp_init);
+module_exit(nf_conntrack_tftp_fini);
diff --git a/net/netfilter/nf_sysctl.c b/net/netfilter/nf_sysctl.c
new file mode 100644
index 00000000000..06ddddb2911
--- /dev/null
+++ b/net/netfilter/nf_sysctl.c
@@ -0,0 +1,134 @@
+/* nf_sysctl.c netfilter sysctl registration/unregistation
+ *
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+static void
+path_free(struct ctl_table *path, struct ctl_table *table)
+{
+ struct ctl_table *t, *next;
+
+ for (t = path; t != NULL && t != table; t = next) {
+ next = t->child;
+ kfree(t);
+ }
+}
+
+static struct ctl_table *
+path_dup(struct ctl_table *path, struct ctl_table *table)
+{
+ struct ctl_table *t, *last = NULL, *tmp;
+
+ for (t = path; t != NULL; t = t->child) {
+ /* twice the size since path elements are terminated by an
+ * empty element */
+ tmp = kmemdup(t, 2 * sizeof(*t), GFP_KERNEL);
+ if (tmp == NULL) {
+ if (last != NULL)
+ path_free(path, table);
+ return NULL;
+ }
+
+ if (last != NULL)
+ last->child = tmp;
+ else
+ path = tmp;
+ last = tmp;
+ }
+
+ if (last != NULL)
+ last->child = table;
+ else
+ path = table;
+
+ return path;
+}
+
+struct ctl_table_header *
+nf_register_sysctl_table(struct ctl_table *path, struct ctl_table *table)
+{
+ struct ctl_table_header *header;
+
+ path = path_dup(path, table);
+ if (path == NULL)
+ return NULL;
+ header = register_sysctl_table(path, 0);
+ if (header == NULL)
+ path_free(path, table);
+ return header;
+}
+EXPORT_SYMBOL_GPL(nf_register_sysctl_table);
+
+void
+nf_unregister_sysctl_table(struct ctl_table_header *header,
+ struct ctl_table *table)
+{
+ struct ctl_table *path = header->ctl_table;
+
+ unregister_sysctl_table(header);
+ path_free(path, table);
+}
+EXPORT_SYMBOL_GPL(nf_unregister_sysctl_table);
+
+/* net/netfilter */
+static struct ctl_table nf_net_netfilter_table[] = {
+ {
+ .ctl_name = NET_NETFILTER,
+ .procname = "netfilter",
+ .mode = 0555,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+struct ctl_table nf_net_netfilter_sysctl_path[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = nf_net_netfilter_table,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
+
+/* net/ipv4/netfilter */
+static struct ctl_table nf_net_ipv4_netfilter_table[] = {
+ {
+ .ctl_name = NET_IPV4_NETFILTER,
+ .procname = "netfilter",
+ .mode = 0555,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+static struct ctl_table nf_net_ipv4_table[] = {
+ {
+ .ctl_name = NET_IPV4,
+ .procname = "ipv4",
+ .mode = 0555,
+ .child = nf_net_ipv4_netfilter_table,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+struct ctl_table nf_net_ipv4_netfilter_sysctl_path[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = nf_net_ipv4_table,
+ },
+ {
+ .ctl_name = 0
+ }
+};
+EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b59d3b2bde2..d1505dd25c6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -408,13 +408,13 @@ __build_packet_message(struct nfulnl_instance *inst,
const struct net_device *indev,
const struct net_device *outdev,
const struct nf_loginfo *li,
- const char *prefix)
+ const char *prefix, unsigned int plen)
{
unsigned char *old_tail;
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- u_int32_t tmp_uint;
+ __be32 tmp_uint;
UDEBUG("entered\n");
@@ -427,17 +427,13 @@ __build_packet_message(struct nfulnl_instance *inst,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(inst->group_num);
- pmsg.hw_protocol = htons(skb->protocol);
+ pmsg.hw_protocol = skb->protocol;
pmsg.hook = hooknum;
NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg);
- if (prefix) {
- int slen = strlen(prefix);
- if (slen > NFULNL_PREFIXLEN)
- slen = NFULNL_PREFIXLEN;
- NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix);
- }
+ if (prefix)
+ NFA_PUT(inst->skb, NFULA_PREFIX, plen, prefix);
if (indev) {
tmp_uint = htonl(indev->ifindex);
@@ -501,18 +497,16 @@ __build_packet_message(struct nfulnl_instance *inst,
#endif
}
- if (skb->nfmark) {
- tmp_uint = htonl(skb->nfmark);
+ if (skb->mark) {
+ tmp_uint = htonl(skb->mark);
NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint);
}
if (indev && skb->dev && skb->dev->hard_header_parse) {
struct nfulnl_msg_packet_hw phw;
-
- phw.hw_addrlen =
- skb->dev->hard_header_parse((struct sk_buff *)skb,
+ int len = skb->dev->hard_header_parse((struct sk_buff *)skb,
phw.hw_addr);
- phw.hw_addrlen = htons(phw.hw_addrlen);
+ phw.hw_addrlen = htons(len);
NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
}
@@ -529,7 +523,7 @@ __build_packet_message(struct nfulnl_instance *inst,
if (skb->sk) {
read_lock_bh(&skb->sk->sk_callback_lock);
if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
- u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid);
+ __be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
/* need to unlock here since NFA_PUT may goto */
read_unlock_bh(&skb->sk->sk_callback_lock);
NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid);
@@ -544,7 +538,7 @@ __build_packet_message(struct nfulnl_instance *inst,
}
/* global sequence number */
if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) {
- tmp_uint = atomic_inc_return(&global_seq);
+ tmp_uint = htonl(atomic_inc_return(&global_seq));
NFA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint);
}
@@ -603,6 +597,7 @@ nfulnl_log_packet(unsigned int pf,
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int nlbufsiz;
+ unsigned int plen;
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
@@ -618,6 +613,10 @@ nfulnl_log_packet(unsigned int pf,
return;
}
+ plen = 0;
+ if (prefix)
+ plen = strlen(prefix);
+
/* all macros expand to constant values at compile time */
/* FIXME: do we want to make the size calculation conditional based on
* what is actually present? way more branches and checks, but more
@@ -632,7 +631,7 @@ nfulnl_log_packet(unsigned int pf,
#endif
+ NFA_SPACE(sizeof(u_int32_t)) /* mark */
+ NFA_SPACE(sizeof(u_int32_t)) /* uid */
- + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */
+ + NFA_SPACE(plen) /* prefix */
+ NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw))
+ NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp));
@@ -703,7 +702,7 @@ nfulnl_log_packet(unsigned int pf,
inst->qlen++;
__build_packet_message(inst, skb, data_len, pf,
- hooknum, in, out, li, prefix);
+ hooknum, in, out, li, prefix, plen);
/* timer_pending always called within inst->lock, so there
* is no chance of a race here */
@@ -878,33 +877,33 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
params = NFA_DATA(nfula[NFULA_CFG_MODE-1]);
nfulnl_set_mode(inst, params->copy_mode,
- ntohs(params->copy_range));
+ ntohl(params->copy_range));
}
if (nfula[NFULA_CFG_TIMEOUT-1]) {
- u_int32_t timeout =
- *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]);
+ __be32 timeout =
+ *(__be32 *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]);
nfulnl_set_timeout(inst, ntohl(timeout));
}
if (nfula[NFULA_CFG_NLBUFSIZ-1]) {
- u_int32_t nlbufsiz =
- *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]);
+ __be32 nlbufsiz =
+ *(__be32 *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]);
nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
}
if (nfula[NFULA_CFG_QTHRESH-1]) {
- u_int32_t qthresh =
- *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]);
+ __be32 qthresh =
+ *(__be32 *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]);
nfulnl_set_qthresh(inst, ntohl(qthresh));
}
if (nfula[NFULA_CFG_FLAGS-1]) {
- u_int16_t flags =
- *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_FLAGS-1]);
+ __be16 flags =
+ *(__be16 *)NFA_DATA(nfula[NFULA_CFG_FLAGS-1]);
nfulnl_set_flags(inst, ntohs(flags));
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8eb2473d83e..a88a017da22 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -349,7 +349,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct sk_buff *entskb = entry->skb;
struct net_device *indev;
struct net_device *outdev;
- unsigned int tmp_uint;
+ __be32 tmp_uint;
QDEBUG("entered\n");
@@ -414,7 +414,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
nfmsg->res_id = htons(queue->queue_num);
pmsg.packet_id = htonl(entry->id);
- pmsg.hw_protocol = htons(entskb->protocol);
+ pmsg.hw_protocol = entskb->protocol;
pmsg.hook = entinf->hook;
NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
@@ -480,8 +480,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
#endif
}
- if (entskb->nfmark) {
- tmp_uint = htonl(entskb->nfmark);
+ if (entskb->mark) {
+ tmp_uint = htonl(entskb->mark);
NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
}
@@ -489,10 +489,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
&& entskb->dev->hard_header_parse) {
struct nfqnl_msg_packet_hw phw;
- phw.hw_addrlen =
- entskb->dev->hard_header_parse(entskb,
+ int len = entskb->dev->hard_header_parse(entskb,
phw.hw_addr);
- phw.hw_addrlen = htons(phw.hw_addrlen);
+ phw.hw_addrlen = htons(len);
NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
}
@@ -622,9 +621,10 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
int diff;
diff = data_len - e->skb->len;
- if (diff < 0)
- skb_trim(e->skb, data_len);
- else if (diff > 0) {
+ if (diff < 0) {
+ if (pskb_trim(e->skb, data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
if (data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
@@ -834,8 +834,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
}
if (nfqa[NFQA_MARK-1])
- entry->skb->nfmark = ntohl(*(u_int32_t *)
- NFA_DATA(nfqa[NFQA_MARK-1]));
+ entry->skb->mark = ntohl(*(__be32 *)
+ NFA_DATA(nfqa[NFQA_MARK-1]));
issue_verdict(entry, verdict);
instance_put(queue);
@@ -947,6 +947,14 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ntohl(params->copy_range));
}
+ if (nfqa[NFQA_CFG_QUEUE_MAXLEN-1]) {
+ __be32 *queue_maxlen;
+ queue_maxlen = NFA_DATA(nfqa[NFQA_CFG_QUEUE_MAXLEN-1]);
+ spin_lock_bh(&queue->lock);
+ queue->queue_maxlen = ntohl(*queue_maxlen);
+ spin_unlock_bh(&queue->lock);
+ }
+
out_put:
instance_put(queue);
return ret;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 58522fc65d3..8996584b849 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
+#include <linux/mm.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp.h>
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index c01524f817f..0534bfa65cc 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -31,6 +31,9 @@ MODULE_ALIAS("ipt_CONNMARK");
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CONNMARK.h>
#include <net/netfilter/nf_conntrack_compat.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack_ecache.h>
+#endif
static unsigned int
target(struct sk_buff **pskb,
@@ -42,7 +45,7 @@ target(struct sk_buff **pskb,
{
const struct xt_connmark_target_info *markinfo = targinfo;
u_int32_t diff;
- u_int32_t nfmark;
+ u_int32_t mark;
u_int32_t newmark;
u_int32_t ctinfo;
u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
@@ -62,7 +65,7 @@ target(struct sk_buff **pskb,
break;
case XT_CONNMARK_SAVE:
newmark = (*ctmark & ~markinfo->mask) |
- ((*pskb)->nfmark & markinfo->mask);
+ ((*pskb)->mark & markinfo->mask);
if (*ctmark != newmark) {
*ctmark = newmark;
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
@@ -73,10 +76,10 @@ target(struct sk_buff **pskb,
}
break;
case XT_CONNMARK_RESTORE:
- nfmark = (*pskb)->nfmark;
- diff = (*ctmark ^ nfmark) & markinfo->mask;
+ mark = (*pskb)->mark;
+ diff = (*ctmark ^ mark) & markinfo->mask;
if (diff != 0)
- (*pskb)->nfmark = nfmark ^ diff;
+ (*pskb)->mark = mark ^ diff;
break;
}
}
@@ -93,6 +96,11 @@ checkentry(const char *tablename,
{
struct xt_connmark_target_info *matchinfo = targinfo;
+ if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%d\n", target->family);
+ return 0;
+ }
if (matchinfo->mode == XT_CONNMARK_RESTORE) {
if (strcmp(tablename, "mangle") != 0) {
printk(KERN_WARNING "CONNMARK: restore can only be "
@@ -108,6 +116,12 @@ checkentry(const char *tablename,
return 1;
}
+static void
+destroy(const struct xt_target *target, void *targinfo)
+{
+ nf_ct_l3proto_module_put(target->family);
+}
+
#ifdef CONFIG_COMPAT
struct compat_xt_connmark_target_info {
compat_ulong_t mark, mask;
@@ -144,6 +158,7 @@ static struct xt_target xt_connmark_target[] = {
.name = "CONNMARK",
.family = AF_INET,
.checkentry = checkentry,
+ .destroy = destroy,
.target = target,
.targetsize = sizeof(struct xt_connmark_target_info),
#ifdef CONFIG_COMPAT
@@ -157,6 +172,7 @@ static struct xt_target xt_connmark_target[] = {
.name = "CONNMARK",
.family = AF_INET6,
.checkentry = checkentry,
+ .destroy = destroy,
.target = target,
.targetsize = sizeof(struct xt_connmark_target_info),
.me = THIS_MODULE
@@ -165,7 +181,6 @@ static struct xt_target xt_connmark_target[] = {
static int __init xt_connmark_init(void)
{
- need_conntrack();
return xt_register_targets(xt_connmark_target,
ARRAY_SIZE(xt_connmark_target));
}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 46738626667..a3fe3c334b0 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -93,6 +93,11 @@ static int checkentry(const char *tablename, const void *entry,
{
struct xt_connsecmark_target_info *info = targinfo;
+ if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%d\n", target->family);
+ return 0;
+ }
switch (info->mode) {
case CONNSECMARK_SAVE:
case CONNSECMARK_RESTORE:
@@ -106,11 +111,18 @@ static int checkentry(const char *tablename, const void *entry,
return 1;
}
+static void
+destroy(const struct xt_target *target, void *targinfo)
+{
+ nf_ct_l3proto_module_put(target->family);
+}
+
static struct xt_target xt_connsecmark_target[] = {
{
.name = "CONNSECMARK",
.family = AF_INET,
.checkentry = checkentry,
+ .destroy = destroy,
.target = target,
.targetsize = sizeof(struct xt_connsecmark_target_info),
.table = "mangle",
@@ -120,6 +132,7 @@ static struct xt_target xt_connsecmark_target[] = {
.name = "CONNSECMARK",
.family = AF_INET6,
.checkentry = checkentry,
+ .destroy = destroy,
.target = target,
.targetsize = sizeof(struct xt_connsecmark_target_info),
.table = "mangle",
@@ -129,7 +142,6 @@ static struct xt_target xt_connsecmark_target[] = {
static int __init xt_connsecmark_init(void)
{
- need_conntrack();
return xt_register_targets(xt_connsecmark_target,
ARRAY_SIZE(xt_connsecmark_target));
}
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index c6e860a7114..0b48547e8d6 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -31,8 +31,8 @@ target_v0(struct sk_buff **pskb,
{
const struct xt_mark_target_info *markinfo = targinfo;
- if((*pskb)->nfmark != markinfo->mark)
- (*pskb)->nfmark = markinfo->mark;
+ if((*pskb)->mark != markinfo->mark)
+ (*pskb)->mark = markinfo->mark;
return XT_CONTINUE;
}
@@ -54,16 +54,16 @@ target_v1(struct sk_buff **pskb,
break;
case XT_MARK_AND:
- mark = (*pskb)->nfmark & markinfo->mark;
+ mark = (*pskb)->mark & markinfo->mark;
break;
case XT_MARK_OR:
- mark = (*pskb)->nfmark | markinfo->mark;
+ mark = (*pskb)->mark | markinfo->mark;
break;
}
- if((*pskb)->nfmark != mark)
- (*pskb)->nfmark = mark;
+ if((*pskb)->mark != mark)
+ (*pskb)->mark = mark;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
new file mode 100644
index 00000000000..901ed7abaa1
--- /dev/null
+++ b/net/netfilter/xt_NFLOG.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_NFLOG.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("x_tables NFLOG target");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NFLOG");
+MODULE_ALIAS("ip6t_NFLOG");
+
+static unsigned int
+nflog_target(struct sk_buff **pskb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int hooknum, const struct xt_target *target,
+ const void *targinfo)
+{
+ const struct xt_nflog_info *info = targinfo;
+ struct nf_loginfo li;
+
+ li.type = NF_LOG_TYPE_ULOG;
+ li.u.ulog.copy_len = info->len;
+ li.u.ulog.group = info->group;
+ li.u.ulog.qthreshold = info->threshold;
+
+ nf_log_packet(target->family, hooknum, *pskb, in, out, &li,
+ "%s", info->prefix);
+ return XT_CONTINUE;
+}
+
+static int
+nflog_checkentry(const char *tablename, const void *entry,
+ const struct xt_target *target, void *targetinfo,
+ unsigned int hookmask)
+{
+ struct xt_nflog_info *info = targetinfo;
+
+ if (info->flags & ~XT_NFLOG_MASK)
+ return 0;
+ if (info->prefix[sizeof(info->prefix) - 1] != '\0')
+ return 0;
+ return 1;
+}
+
+static struct xt_target xt_nflog_target[] = {
+ {
+ .name = "NFLOG",
+ .family = AF_INET,
+ .checkentry = nflog_checkentry,
+ .target = nflog_target,
+ .targetsize = sizeof(struct xt_nflog_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NFLOG",
+ .family = AF_INET6,
+ .checkentry = nflog_checkentry,
+ .target = nflog_target,
+ .targetsize = sizeof(struct xt_nflog_info),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init xt_nflog_init(void)
+{
+ return xt_register_targets(xt_nflog_target,
+ ARRAY_SIZE(xt_nflog_target));
+}
+
+static void __exit xt_nflog_fini(void)
+{
+ xt_unregister_targets(xt_nflog_target, ARRAY_SIZE(xt_nflog_target));
+}
+
+module_init(xt_nflog_init);
+module_exit(xt_nflog_fini);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index dcc497ea818..d93cb096a67 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -139,15 +139,28 @@ static int check(const char *tablename,
sinfo->direction != XT_CONNBYTES_DIR_BOTH)
return 0;
+ if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+ printk(KERN_WARNING "can't load conntrack support for "
+ "proto=%d\n", match->family);
+ return 0;
+ }
+
return 1;
}
+static void
+destroy(const struct xt_match *match, void *matchinfo)
+{
+ nf_ct_l3proto_module_put(match->family);
+}
+
static struct xt_match xt_connbytes_match[] = {
{
.name = "connbytes",
.family = AF_INET,
.checkentry = check,
.match = match,
+ .destroy = destroy,
.matchsize = sizeof(struct xt_connbytes_info),
.me = THIS_MODULE
},
@@ -156,6 +169,7 @@ static struct xt_match xt_connbytes_match[] = {
.family = AF_INET6,
.checkentry = check,
.match = match,
+ .destroy = destroy,
.matchsize = sizeof(struct xt_connbytes_info),
.me = THIS_MODULE
},
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index a8f03057dbd..36c2defff23 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -63,22 +63,18 @@ checkentry(const char *tablename,
printk(KERN_WARNING "connmark: only support 32bit mark\n");
return 0;
}
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (nf_ct_l3proto_try_module_get(match->family) < 0) {
- printk(KERN_WARNING "can't load nf_conntrack support for "
+ printk(KERN_WARNING "can't load conntrack support for "
"proto=%d\n", match->family);
return 0;
}
-#endif
return 1;
}
static void
destroy(const struct xt_match *match, void *matchinfo)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
-#endif
}
#ifdef CONFIG_COMPAT
@@ -140,7 +136,6 @@ static struct xt_match xt_connmark_match[] = {
static int __init xt_connmark_init(void)
{
- need_conntrack();
return xt_register_matches(xt_connmark_match,
ARRAY_SIZE(xt_connmark_match));
}
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 0ea501a2fda..3dc2357b8de 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -20,6 +20,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
@@ -228,21 +229,17 @@ checkentry(const char *tablename,
void *matchinfo,
unsigned int hook_mask)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (nf_ct_l3proto_try_module_get(match->family) < 0) {
- printk(KERN_WARNING "can't load nf_conntrack support for "
+ printk(KERN_WARNING "can't load conntrack support for "
"proto=%d\n", match->family);
return 0;
}
-#endif
return 1;
}
static void destroy(const struct xt_match *match, void *matchinfo)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
-#endif
}
static struct xt_match conntrack_match = {
@@ -257,7 +254,6 @@ static struct xt_match conntrack_match = {
static int __init xt_conntrack_init(void)
{
- need_conntrack();
return xt_register_match(&conntrack_match);
}
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 33ccdbf8e79..a5a6e192ac2 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -6,23 +6,8 @@
* $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $
*
* Development of this code was funded by Astaro AG, http://www.astaro.com/
- *
- * based on ipt_limit.c by:
- * Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
- * Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
- * Rusty Russell <rusty@rustcorp.com.au>
- *
- * The general idea is to create a hash table for every dstip and have a
- * seperate limit counter per tuple. This way you can do something like 'limit
- * the number of syn packets for each of my internal addresses.
- *
- * Ideally this would just be implemented as a general 'hash' match, which would
- * allow us to attach any iptables target to it's hash buckets. But this is
- * not possible in the current iptables architecture. As always, pkttables for
- * 2.7.x will help ;)
*/
#include <linux/module.h>
-#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/random.h>
#include <linux/jhash.h>
@@ -31,28 +16,41 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_hashlimit.h>
-
-/* FIXME: this is just for IP_NF_ASSERRT */
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/xt_hashlimit.h>
#include <linux/mutex.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
+MODULE_ALIAS("ipt_hashlimit");
+MODULE_ALIAS("ip6t_hashlimit");
/* need to declare this at the top */
-static struct proc_dir_entry *hashlimit_procdir;
+static struct proc_dir_entry *hashlimit_procdir4;
+static struct proc_dir_entry *hashlimit_procdir6;
static struct file_operations dl_file_ops;
/* hash table crap */
-
struct dsthash_dst {
- __be32 src_ip;
- __be32 dst_ip;
- /* ports have to be consecutive !!! */
+ union {
+ struct {
+ __be32 src;
+ __be32 dst;
+ } ip;
+ struct {
+ __be32 src[4];
+ __be32 dst[4];
+ } ip6;
+ } addr;
__be16 src_port;
__be16 dst_port;
};
@@ -71,9 +69,10 @@ struct dsthash_ent {
} rateinfo;
};
-struct ipt_hashlimit_htable {
+struct xt_hashlimit_htable {
struct hlist_node node; /* global list of all htables */
atomic_t use;
+ int family;
struct hashlimit_cfg cfg; /* config */
@@ -81,8 +80,8 @@ struct ipt_hashlimit_htable {
spinlock_t lock; /* lock for list_head */
u_int32_t rnd; /* random seed for hash */
int rnd_initialized;
+ unsigned int count; /* number entries in table */
struct timer_list timer; /* timer for gc */
- atomic_t count; /* number entries in table */
/* seq_file stuff */
struct proc_dir_entry *pde;
@@ -93,45 +92,37 @@ struct ipt_hashlimit_htable {
static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */
static HLIST_HEAD(hashlimit_htables);
-static kmem_cache_t *hashlimit_cachep __read_mostly;
+static struct kmem_cache *hashlimit_cachep __read_mostly;
static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
{
- return (ent->dst.dst_ip == b->dst_ip
- && ent->dst.dst_port == b->dst_port
- && ent->dst.src_port == b->src_port
- && ent->dst.src_ip == b->src_ip);
+ return !memcmp(&ent->dst, b, sizeof(ent->dst));
}
-static inline u_int32_t
-hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst)
+static u_int32_t
+hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
{
- return (jhash_3words((__force u32)dst->dst_ip,
- ((__force u32)dst->dst_port<<16 |
- (__force u32)dst->src_port),
- (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size);
+ return jhash(dst, sizeof(*dst), ht->rnd) % ht->cfg.size;
}
-static inline struct dsthash_ent *
-__dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
+static struct dsthash_ent *
+dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst)
{
struct dsthash_ent *ent;
struct hlist_node *pos;
u_int32_t hash = hash_dst(ht, dst);
- if (!hlist_empty(&ht->hash[hash]))
- hlist_for_each_entry(ent, pos, &ht->hash[hash], node) {
- if (dst_cmp(ent, dst)) {
+ if (!hlist_empty(&ht->hash[hash])) {
+ hlist_for_each_entry(ent, pos, &ht->hash[hash], node)
+ if (dst_cmp(ent, dst))
return ent;
- }
- }
-
+ }
return NULL;
}
/* allocate dsthash_ent, initialize dst, put in htable and lock it */
static struct dsthash_ent *
-__dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
+dsthash_alloc_init(struct xt_hashlimit_htable *ht, struct dsthash_dst *dst)
{
struct dsthash_ent *ent;
@@ -142,12 +133,11 @@ __dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
ht->rnd_initialized = 1;
}
- if (ht->cfg.max &&
- atomic_read(&ht->count) >= ht->cfg.max) {
+ if (ht->cfg.max && ht->count >= ht->cfg.max) {
/* FIXME: do something. question is what.. */
if (net_ratelimit())
- printk(KERN_WARNING
- "ipt_hashlimit: max count of %u reached\n",
+ printk(KERN_WARNING
+ "xt_hashlimit: max count of %u reached\n",
ht->cfg.max);
return NULL;
}
@@ -155,53 +145,47 @@ __dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
if (!ent) {
if (net_ratelimit())
- printk(KERN_ERR
- "ipt_hashlimit: can't allocate dsthash_ent\n");
+ printk(KERN_ERR
+ "xt_hashlimit: can't allocate dsthash_ent\n");
return NULL;
}
-
- atomic_inc(&ht->count);
-
- ent->dst.dst_ip = dst->dst_ip;
- ent->dst.dst_port = dst->dst_port;
- ent->dst.src_ip = dst->src_ip;
- ent->dst.src_port = dst->src_port;
+ memcpy(&ent->dst, dst, sizeof(ent->dst));
hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
-
+ ht->count++;
return ent;
}
-static inline void
-__dsthash_free(struct ipt_hashlimit_htable *ht, struct dsthash_ent *ent)
+static inline void
+dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
{
hlist_del(&ent->node);
kmem_cache_free(hashlimit_cachep, ent);
- atomic_dec(&ht->count);
+ ht->count--;
}
static void htable_gc(unsigned long htlong);
-static int htable_create(struct ipt_hashlimit_info *minfo)
+static int htable_create(struct xt_hashlimit_info *minfo, int family)
{
- int i;
+ struct xt_hashlimit_htable *hinfo;
unsigned int size;
- struct ipt_hashlimit_htable *hinfo;
+ unsigned int i;
if (minfo->cfg.size)
size = minfo->cfg.size;
else {
- size = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct list_head));
+ size = ((num_physpages << PAGE_SHIFT) / 16384) /
+ sizeof(struct list_head);
if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
size = 8192;
if (size < 16)
size = 16;
}
/* FIXME: don't use vmalloc() here or anywhere else -HW */
- hinfo = vmalloc(sizeof(struct ipt_hashlimit_htable)
- + (sizeof(struct list_head) * size));
+ hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
+ sizeof(struct list_head) * size);
if (!hinfo) {
- printk(KERN_ERR "ipt_hashlimit: Unable to create hashtable\n");
+ printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
return -1;
}
minfo->hinfo = hinfo;
@@ -217,11 +201,14 @@ static int htable_create(struct ipt_hashlimit_info *minfo)
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- atomic_set(&hinfo->count, 0);
atomic_set(&hinfo->use, 1);
+ hinfo->count = 0;
+ hinfo->family = family;
hinfo->rnd_initialized = 0;
spin_lock_init(&hinfo->lock);
- hinfo->pde = create_proc_entry(minfo->name, 0, hashlimit_procdir);
+ hinfo->pde = create_proc_entry(minfo->name, 0,
+ family == AF_INET ? hashlimit_procdir4 :
+ hashlimit_procdir6);
if (!hinfo->pde) {
vfree(hinfo);
return -1;
@@ -242,23 +229,21 @@ static int htable_create(struct ipt_hashlimit_info *minfo)
return 0;
}
-static int select_all(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he)
+static int select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
{
return 1;
}
-static int select_gc(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he)
+static int select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)
{
return (jiffies >= he->expires);
}
-static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht,
- int (*select)(struct ipt_hashlimit_htable *ht,
+static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
+ int (*select)(struct xt_hashlimit_htable *ht,
struct dsthash_ent *he))
{
- int i;
-
- IP_NF_ASSERT(ht->cfg.size && ht->cfg.max);
+ unsigned int i;
/* lock hash table and iterate over it */
spin_lock_bh(&ht->lock);
@@ -267,7 +252,7 @@ static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht,
struct hlist_node *pos, *n;
hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
if ((*select)(ht, dh))
- __dsthash_free(ht, dh);
+ dsthash_free(ht, dh);
}
}
spin_unlock_bh(&ht->lock);
@@ -276,7 +261,7 @@ static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht,
/* hash table garbage collector, run by timer */
static void htable_gc(unsigned long htlong)
{
- struct ipt_hashlimit_htable *ht = (struct ipt_hashlimit_htable *)htlong;
+ struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
htable_selective_cleanup(ht, select_gc);
@@ -285,38 +270,39 @@ static void htable_gc(unsigned long htlong)
add_timer(&ht->timer);
}
-static void htable_destroy(struct ipt_hashlimit_htable *hinfo)
+static void htable_destroy(struct xt_hashlimit_htable *hinfo)
{
/* remove timer, if it is pending */
if (timer_pending(&hinfo->timer))
del_timer(&hinfo->timer);
/* remove proc entry */
- remove_proc_entry(hinfo->pde->name, hashlimit_procdir);
-
+ remove_proc_entry(hinfo->pde->name,
+ hinfo->family == AF_INET ? hashlimit_procdir4 :
+ hashlimit_procdir6);
htable_selective_cleanup(hinfo, select_all);
vfree(hinfo);
}
-static struct ipt_hashlimit_htable *htable_find_get(char *name)
+static struct xt_hashlimit_htable *htable_find_get(char *name, int family)
{
- struct ipt_hashlimit_htable *hinfo;
+ struct xt_hashlimit_htable *hinfo;
struct hlist_node *pos;
spin_lock_bh(&hashlimit_lock);
hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
- if (!strcmp(name, hinfo->pde->name)) {
+ if (!strcmp(name, hinfo->pde->name) &&
+ hinfo->family == family) {
atomic_inc(&hinfo->use);
spin_unlock_bh(&hashlimit_lock);
return hinfo;
}
}
spin_unlock_bh(&hashlimit_lock);
-
return NULL;
}
-static void htable_put(struct ipt_hashlimit_htable *hinfo)
+static void htable_put(struct xt_hashlimit_htable *hinfo)
{
if (atomic_dec_and_test(&hinfo->use)) {
spin_lock_bh(&hashlimit_lock);
@@ -326,7 +312,6 @@ static void htable_put(struct ipt_hashlimit_htable *hinfo)
}
}
-
/* The algorithm used is the Simple Token Bucket Filter (TBF)
* see net/sched/sch_tbf.c in the linux source tree
*/
@@ -370,17 +355,82 @@ user2credits(u_int32_t user)
/* If multiplying would overflow... */
if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
/* Divide first. */
- return (user / IPT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+ return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
- return (user * HZ * CREDITS_PER_JIFFY) / IPT_HASHLIMIT_SCALE;
+ return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
}
static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
{
- dh->rateinfo.credit += (now - xchg(&dh->rateinfo.prev, now))
- * CREDITS_PER_JIFFY;
+ dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
dh->rateinfo.credit = dh->rateinfo.credit_cap;
+ dh->rateinfo.prev = now;
+}
+
+static int
+hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
+ const struct sk_buff *skb, unsigned int protoff)
+{
+ __be16 _ports[2], *ports;
+ int nexthdr;
+
+ memset(dst, 0, sizeof(*dst));
+
+ switch (hinfo->family) {
+ case AF_INET:
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
+ dst->addr.ip.dst = skb->nh.iph->daddr;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
+ dst->addr.ip.src = skb->nh.iph->saddr;
+
+ if (!(hinfo->cfg.mode &
+ (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
+ return 0;
+ nexthdr = skb->nh.iph->protocol;
+ break;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ case AF_INET6:
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
+ memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr,
+ sizeof(dst->addr.ip6.dst));
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
+ memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr,
+ sizeof(dst->addr.ip6.src));
+
+ if (!(hinfo->cfg.mode &
+ (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
+ return 0;
+ nexthdr = ipv6_find_hdr(skb, &protoff, -1, NULL);
+ if (nexthdr < 0)
+ return -1;
+ break;
+#endif
+ default:
+ BUG();
+ return 0;
+ }
+
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
+ ports = skb_header_pointer(skb, protoff, sizeof(_ports),
+ &_ports);
+ break;
+ default:
+ _ports[0] = _ports[1] = 0;
+ ports = _ports;
+ break;
+ }
+ if (!ports)
+ return -1;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SPT)
+ dst->src_port = ports[0];
+ if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DPT)
+ dst->dst_port = ports[1];
+ return 0;
}
static int
@@ -393,68 +443,31 @@ hashlimit_match(const struct sk_buff *skb,
unsigned int protoff,
int *hotdrop)
{
- struct ipt_hashlimit_info *r =
- ((struct ipt_hashlimit_info *)matchinfo)->u.master;
- struct ipt_hashlimit_htable *hinfo = r->hinfo;
+ struct xt_hashlimit_info *r =
+ ((struct xt_hashlimit_info *)matchinfo)->u.master;
+ struct xt_hashlimit_htable *hinfo = r->hinfo;
unsigned long now = jiffies;
struct dsthash_ent *dh;
struct dsthash_dst dst;
- /* build 'dst' according to hinfo->cfg and current packet */
- memset(&dst, 0, sizeof(dst));
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DIP)
- dst.dst_ip = skb->nh.iph->daddr;
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SIP)
- dst.src_ip = skb->nh.iph->saddr;
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT
- ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) {
- __be16 _ports[2], *ports;
-
- switch (skb->nh.iph->protocol) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- ports = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_ports), &_ports);
- break;
- default:
- _ports[0] = _ports[1] = 0;
- ports = _ports;
- break;
- }
- if (!ports) {
- /* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
- *hotdrop = 1;
- return 0;
- }
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT)
- dst.src_port = ports[0];
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT)
- dst.dst_port = ports[1];
- }
+ if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
+ goto hotdrop;
spin_lock_bh(&hinfo->lock);
- dh = __dsthash_find(hinfo, &dst);
+ dh = dsthash_find(hinfo, &dst);
if (!dh) {
- dh = __dsthash_alloc_init(hinfo, &dst);
-
+ dh = dsthash_alloc_init(hinfo, &dst);
if (!dh) {
- /* enomem... don't match == DROP */
- if (net_ratelimit())
- printk(KERN_ERR "%s: ENOMEM\n", __FUNCTION__);
spin_unlock_bh(&hinfo->lock);
- return 0;
+ goto hotdrop;
}
dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-
dh->rateinfo.prev = jiffies;
- dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
- dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
+ dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
+ dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
} else {
/* update expiration timeout */
@@ -473,6 +486,10 @@ hashlimit_match(const struct sk_buff *skb,
/* default case: we're overlimit, thus don't match */
return 0;
+
+hotdrop:
+ *hotdrop = 1;
+ return 0;
}
static int
@@ -482,42 +499,37 @@ hashlimit_checkentry(const char *tablename,
void *matchinfo,
unsigned int hook_mask)
{
- struct ipt_hashlimit_info *r = matchinfo;
+ struct xt_hashlimit_info *r = matchinfo;
/* Check for overflow. */
- if (r->cfg.burst == 0
- || user2credits(r->cfg.avg * r->cfg.burst) <
- user2credits(r->cfg.avg)) {
- printk(KERN_ERR "ipt_hashlimit: Overflow, try lower: %u/%u\n",
+ if (r->cfg.burst == 0 ||
+ user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
+ printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
r->cfg.avg, r->cfg.burst);
return 0;
}
-
- if (r->cfg.mode == 0
- || r->cfg.mode > (IPT_HASHLIMIT_HASH_DPT
- |IPT_HASHLIMIT_HASH_DIP
- |IPT_HASHLIMIT_HASH_SIP
- |IPT_HASHLIMIT_HASH_SPT))
+ if (r->cfg.mode == 0 ||
+ r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
+ XT_HASHLIMIT_HASH_DIP |
+ XT_HASHLIMIT_HASH_SIP |
+ XT_HASHLIMIT_HASH_SPT))
return 0;
-
if (!r->cfg.gc_interval)
return 0;
-
if (!r->cfg.expire)
return 0;
-
if (r->name[sizeof(r->name) - 1] != '\0')
return 0;
/* This is the best we've got: We cannot release and re-grab lock,
- * since checkentry() is called before ip_tables.c grabs ipt_mutex.
- * We also cannot grab the hashtable spinlock, since htable_create will
+ * since checkentry() is called before x_tables.c grabs xt_mutex.
+ * We also cannot grab the hashtable spinlock, since htable_create will
* call vmalloc, and that can sleep. And we cannot just re-search
* the list of htable's in htable_create(), since then we would
* create duplicate proc files. -HW */
mutex_lock(&hlimit_mutex);
- r->hinfo = htable_find_get(r->name);
- if (!r->hinfo && (htable_create(r) != 0)) {
+ r->hinfo = htable_find_get(r->name, match->family);
+ if (!r->hinfo && htable_create(r, match->family) != 0) {
mutex_unlock(&hlimit_mutex);
return 0;
}
@@ -525,20 +537,19 @@ hashlimit_checkentry(const char *tablename,
/* Ugly hack: For SMP, we only want to use one set */
r->u.master = r;
-
return 1;
}
static void
hashlimit_destroy(const struct xt_match *match, void *matchinfo)
{
- struct ipt_hashlimit_info *r = matchinfo;
+ struct xt_hashlimit_info *r = matchinfo;
htable_put(r->hinfo);
}
#ifdef CONFIG_COMPAT
-struct compat_ipt_hashlimit_info {
+struct compat_xt_hashlimit_info {
char name[IFNAMSIZ];
struct hashlimit_cfg cfg;
compat_uptr_t hinfo;
@@ -547,40 +558,56 @@ struct compat_ipt_hashlimit_info {
static void compat_from_user(void *dst, void *src)
{
- int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+ int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
memcpy(dst, src, off);
- memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
+ memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
}
static int compat_to_user(void __user *dst, void *src)
{
- int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+ int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
return copy_to_user(dst, src, off) ? -EFAULT : 0;
}
#endif
-static struct ipt_match ipt_hashlimit = {
- .name = "hashlimit",
- .match = hashlimit_match,
- .matchsize = sizeof(struct ipt_hashlimit_info),
+static struct xt_match xt_hashlimit[] = {
+ {
+ .name = "hashlimit",
+ .family = AF_INET,
+ .match = hashlimit_match,
+ .matchsize = sizeof(struct xt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_xt_hashlimit_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
+ .checkentry = hashlimit_checkentry,
+ .destroy = hashlimit_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "hashlimit",
+ .family = AF_INET6,
+ .match = hashlimit_match,
+ .matchsize = sizeof(struct xt_hashlimit_info),
#ifdef CONFIG_COMPAT
- .compatsize = sizeof(struct compat_ipt_hashlimit_info),
- .compat_from_user = compat_from_user,
- .compat_to_user = compat_to_user,
+ .compatsize = sizeof(struct compat_xt_hashlimit_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
#endif
- .checkentry = hashlimit_checkentry,
- .destroy = hashlimit_destroy,
- .me = THIS_MODULE
+ .checkentry = hashlimit_checkentry,
+ .destroy = hashlimit_destroy,
+ .me = THIS_MODULE
+ },
};
/* PROC stuff */
-
static void *dl_seq_start(struct seq_file *s, loff_t *pos)
{
struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = pde->data;
unsigned int *bucket;
spin_lock_bh(&htable->lock);
@@ -598,7 +625,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = pde->data;
unsigned int *bucket = (unsigned int *)v;
*pos = ++(*bucket);
@@ -612,43 +639,59 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void dl_seq_stop(struct seq_file *s, void *v)
{
struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = pde->data;
unsigned int *bucket = (unsigned int *)v;
kfree(bucket);
-
spin_unlock_bh(&htable->lock);
}
-static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s)
+static int dl_seq_real_show(struct dsthash_ent *ent, int family,
+ struct seq_file *s)
{
/* recalculate to show accurate numbers */
rateinfo_recalc(ent, jiffies);
- return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n",
- (long)(ent->expires - jiffies)/HZ,
- NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port),
- NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port),
- ent->rateinfo.credit, ent->rateinfo.credit_cap,
- ent->rateinfo.cost);
+ switch (family) {
+ case AF_INET:
+ return seq_printf(s, "%ld %u.%u.%u.%u:%u->"
+ "%u.%u.%u.%u:%u %u %u %u\n",
+ (long)(ent->expires - jiffies)/HZ,
+ NIPQUAD(ent->dst.addr.ip.src),
+ ntohs(ent->dst.src_port),
+ NIPQUAD(ent->dst.addr.ip.dst),
+ ntohs(ent->dst.dst_port),
+ ent->rateinfo.credit, ent->rateinfo.credit_cap,
+ ent->rateinfo.cost);
+ case AF_INET6:
+ return seq_printf(s, "%ld " NIP6_FMT ":%u->"
+ NIP6_FMT ":%u %u %u %u\n",
+ (long)(ent->expires - jiffies)/HZ,
+ NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.src),
+ ntohs(ent->dst.src_port),
+ NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.dst),
+ ntohs(ent->dst.dst_port),
+ ent->rateinfo.credit, ent->rateinfo.credit_cap,
+ ent->rateinfo.cost);
+ default:
+ BUG();
+ return 0;
+ }
}
static int dl_seq_show(struct seq_file *s, void *v)
{
struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = pde->data;
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
struct hlist_node *pos;
- if (!hlist_empty(&htable->hash[*bucket]))
- hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) {
- if (dl_seq_real_show(ent, s)) {
- /* buffer was filled and unable to print that tuple */
+ if (!hlist_empty(&htable->hash[*bucket])) {
+ hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
+ if (dl_seq_real_show(ent, htable->family, s))
return 1;
- }
- }
-
+ }
return 0;
}
@@ -678,56 +721,53 @@ static struct file_operations dl_file_ops = {
.release = seq_release
};
-static int init_or_fini(int fini)
+static int __init xt_hashlimit_init(void)
{
- int ret = 0;
-
- if (fini)
- goto cleanup;
+ int err;
- if (ipt_register_match(&ipt_hashlimit)) {
- ret = -EINVAL;
- goto cleanup_nothing;
- }
+ err = xt_register_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+ if (err < 0)
+ goto err1;
- hashlimit_cachep = kmem_cache_create("ipt_hashlimit",
- sizeof(struct dsthash_ent), 0,
- 0, NULL, NULL);
+ err = -ENOMEM;
+ hashlimit_cachep = kmem_cache_create("xt_hashlimit",
+ sizeof(struct dsthash_ent), 0, 0,
+ NULL, NULL);
if (!hashlimit_cachep) {
- printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n");
- ret = -ENOMEM;
- goto cleanup_unreg_match;
+ printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
+ goto err2;
}
-
- hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net);
- if (!hashlimit_procdir) {
- printk(KERN_ERR "Unable to create proc dir entry\n");
- ret = -ENOMEM;
- goto cleanup_free_slab;
+ hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net);
+ if (!hashlimit_procdir4) {
+ printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ "entry\n");
+ goto err3;
}
-
- return ret;
-
-cleanup:
+ hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net);
+ if (!hashlimit_procdir6) {
+ printk(KERN_ERR "xt_hashlimit: tnable to create proc dir "
+ "entry\n");
+ goto err4;
+ }
+ return 0;
+err4:
remove_proc_entry("ipt_hashlimit", proc_net);
-cleanup_free_slab:
+err3:
kmem_cache_destroy(hashlimit_cachep);
-cleanup_unreg_match:
- ipt_unregister_match(&ipt_hashlimit);
-cleanup_nothing:
- return ret;
-
-}
+err2:
+ xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+err1:
+ return err;
-static int __init ipt_hashlimit_init(void)
-{
- return init_or_fini(0);
}
-static void __exit ipt_hashlimit_fini(void)
+static void __exit xt_hashlimit_fini(void)
{
- init_or_fini(1);
+ remove_proc_entry("ipt_hashlimit", proc_net);
+ remove_proc_entry("ip6t_hashlimit", proc_net);
+ kmem_cache_destroy(hashlimit_cachep);
+ xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
}
-module_init(ipt_hashlimit_init);
-module_exit(ipt_hashlimit_fini);
+module_init(xt_hashlimit_init);
+module_exit(xt_hashlimit_fini);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 5d7818b73e3..04bc32ba719 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,6 +24,7 @@
#endif
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_helper.h>
+#include <net/netfilter/nf_conntrack_compat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -143,13 +144,11 @@ static int check(const char *tablename,
{
struct xt_helper_info *info = matchinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (nf_ct_l3proto_try_module_get(match->family) < 0) {
- printk(KERN_WARNING "can't load nf_conntrack support for "
+ printk(KERN_WARNING "can't load conntrack support for "
"proto=%d\n", match->family);
return 0;
}
-#endif
info->name[29] = '\0';
return 1;
}
@@ -157,9 +156,7 @@ static int check(const char *tablename,
static void
destroy(const struct xt_match *match, void *matchinfo)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
-#endif
}
static struct xt_match xt_helper_match[] = {
@@ -185,7 +182,6 @@ static struct xt_match xt_helper_match[] = {
static int __init xt_helper_init(void)
{
- need_conntrack();
return xt_register_matches(xt_helper_match,
ARRAY_SIZE(xt_helper_match));
}
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 934dddfbcd2..dfa1ee6914c 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
{
const struct xt_mark_info *info = matchinfo;
- return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
+ return ((skb->mark & info->mask) == info->mark) ^ info->invert;
}
static int
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index d3aefd38093..1602086c7fd 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -1,5 +1,5 @@
-/* Kernel module to match one of a list of TCP/UDP/SCTP/DCCP ports: ports are in
- the same place so we can treat them as equal. */
+/* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports:
+ ports are in the same place so we can treat them as equal. */
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
@@ -104,7 +104,7 @@ match(const struct sk_buff *skb,
unsigned int protoff,
int *hotdrop)
{
- u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
const struct xt_multiport *multiinfo = matchinfo;
if (offset)
@@ -135,7 +135,7 @@ match_v1(const struct sk_buff *skb,
unsigned int protoff,
int *hotdrop)
{
- u16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr;
const struct xt_multiport_v1 *multiinfo = matchinfo;
if (offset)
@@ -162,6 +162,7 @@ check(u_int16_t proto,
{
/* Must specify supported protocol, no unknown flags or bad count */
return (proto == IPPROTO_TCP || proto == IPPROTO_UDP
+ || proto == IPPROTO_UDPLITE
|| proto == IPPROTO_SCTP || proto == IPPROTO_DCCP)
&& !(ip_invflags & XT_INV_PROTO)
&& (match_flags == XT_MULTIPORT_SOURCE
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index fd8f954cded..b9b3ffc5451 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -113,20 +113,16 @@ checkentry(const char *tablename,
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return 0;
- if (brnf_deferred_hooks == 0 &&
- info->bitmask & XT_PHYSDEV_OP_OUT &&
+ if (info->bitmask & XT_PHYSDEV_OP_OUT &&
(!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
(1 << NF_IP_POST_ROUTING))) {
printk(KERN_WARNING "physdev match: using --physdev-out in the "
"OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
- "traffic is deprecated and breaks other things, it will "
- "be removed in January 2007. See Documentation/"
- "feature-removal-schedule.txt for details. This doesn't "
- "affect you in case you're using it for purely bridged "
- "traffic.\n");
- brnf_deferred_hooks = 1;
+ "traffic is not supported anymore.\n");
+ if (hook_mask & (1 << NF_IP_LOCAL_OUT))
+ return 0;
}
return 1;
}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 7956acaaa24..71bf036f833 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -71,7 +71,7 @@ match_packet(const struct sk_buff *skb,
duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
++i, offset, sch->type, htons(sch->length), sch->flags);
- offset += (htons(sch->length) + 3) & ~3;
+ offset += (ntohs(sch->length) + 3) & ~3;
duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index d9010b16a1f..df37b912163 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -50,22 +50,18 @@ static int check(const char *tablename,
void *matchinfo,
unsigned int hook_mask)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (nf_ct_l3proto_try_module_get(match->family) < 0) {
- printk(KERN_WARNING "can't load nf_conntrack support for "
+ printk(KERN_WARNING "can't load conntrack support for "
"proto=%d\n", match->family);
return 0;
}
-#endif
return 1;
}
static void
destroy(const struct xt_match *match, void *matchinfo)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_ct_l3proto_module_put(match->family);
-#endif
}
static struct xt_match xt_state_match[] = {
@@ -91,7 +87,6 @@ static struct xt_match xt_state_match[] = {
static int __init xt_state_init(void)
{
- need_conntrack();
return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
}
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index e76a68e0bc6..46414b562a1 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -10,7 +10,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
-MODULE_DESCRIPTION("x_tables match for TCP and UDP, supports IPv4 and IPv6");
+MODULE_DESCRIPTION("x_tables match for TCP and UDP(-Lite), supports IPv4 and IPv6");
MODULE_LICENSE("GPL");
MODULE_ALIAS("xt_tcp");
MODULE_ALIAS("xt_udp");
@@ -234,6 +234,24 @@ static struct xt_match xt_tcpudp_match[] = {
.proto = IPPROTO_UDP,
.me = THIS_MODULE,
},
+ {
+ .name = "udplite",
+ .family = AF_INET,
+ .checkentry = udp_checkentry,
+ .match = udp_match,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDPLITE,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "udplite",
+ .family = AF_INET6,
+ .checkentry = udp_checkentry,
+ .match = udp_match,
+ .matchsize = sizeof(struct xt_udp),
+ .proto = IPPROTO_UDPLITE,
+ .me = THIS_MODULE,
+ },
};
static int __init xt_tcpudp_init(void)
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index 9f7121ae13e..56958c85f2b 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -4,7 +4,7 @@
config NETLABEL
bool "NetLabel subsystem support"
- depends on NET && SECURITY
+ depends on SECURITY
default n
---help---
NetLabel provides support for explicit network packet labeling
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index a6ce1d6d5c5..743b05734a4 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -407,12 +407,14 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
&audit_info);
- audit_log_format(audit_buf,
- " cipso_doi=%u cipso_type=%s res=%u",
- doi,
- type_str,
- ret_val == 0 ? 1 : 0);
- audit_log_end(audit_buf);
+ if (audit_buf != NULL) {
+ audit_log_format(audit_buf,
+ " cipso_doi=%u cipso_type=%s res=%u",
+ doi,
+ type_str,
+ ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
return ret_val;
}
@@ -452,17 +454,13 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
}
list_start:
- ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL);
+ ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE * nlsze_mult, GFP_KERNEL);
if (ans_skb == NULL) {
ret_val = -ENOMEM;
goto list_failure;
}
- data = netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- info->snd_seq,
- netlbl_cipsov4_gnl_family.id,
- 0,
- NLBL_CIPSOV4_C_LIST);
+ data = genlmsg_put_reply(ans_skb, info, &netlbl_cipsov4_gnl_family,
+ 0, NLBL_CIPSOV4_C_LIST);
if (data == NULL) {
ret_val = -ENOMEM;
goto list_failure;
@@ -568,7 +566,7 @@ list_start:
genlmsg_end(ans_skb, data);
- ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ ret_val = genlmsg_reply(ans_skb, info);
if (ret_val != 0)
goto list_failure;
@@ -607,12 +605,9 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
void *data;
- data = netlbl_netlink_hdr_put(cb_arg->skb,
- NETLINK_CB(cb_arg->nl_cb->skb).pid,
- cb_arg->seq,
- netlbl_cipsov4_gnl_family.id,
- NLM_F_MULTI,
- NLBL_CIPSOV4_C_LISTALL);
+ data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ cb_arg->seq, &netlbl_cipsov4_gnl_family,
+ NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL);
if (data == NULL)
goto listall_cb_failure;
@@ -687,11 +682,13 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
&audit_info);
- audit_log_format(audit_buf,
- " cipso_doi=%u res=%u",
- doi,
- ret_val == 0 ? 1 : 0);
- audit_log_end(audit_buf);
+ if (audit_buf != NULL) {
+ audit_log_format(audit_buf,
+ " cipso_doi=%u res=%u",
+ doi,
+ ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
return ret_val;
}
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index af4371d3b45..f46a0aeec44 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -202,7 +202,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
int ret_val;
u32 bkt;
struct audit_buffer *audit_buf;
- char *audit_domain;
switch (entry->type) {
case NETLBL_NLTYPE_UNLABELED:
@@ -243,24 +242,24 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
} else
ret_val = -EINVAL;
- if (entry->domain != NULL)
- audit_domain = entry->domain;
- else
- audit_domain = "(default)";
audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
- audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain);
- switch (entry->type) {
- case NETLBL_NLTYPE_UNLABELED:
- audit_log_format(audit_buf, " nlbl_protocol=unlbl");
- break;
- case NETLBL_NLTYPE_CIPSOV4:
+ if (audit_buf != NULL) {
audit_log_format(audit_buf,
- " nlbl_protocol=cipsov4 cipso_doi=%u",
- entry->type_def.cipsov4->doi);
- break;
+ " nlbl_domain=%s",
+ entry->domain ? entry->domain : "(default)");
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ audit_log_format(audit_buf, " nlbl_protocol=unlbl");
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ audit_log_format(audit_buf,
+ " nlbl_protocol=cipsov4 cipso_doi=%u",
+ entry->type_def.cipsov4->doi);
+ break;
+ }
+ audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
}
- audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
- audit_log_end(audit_buf);
rcu_read_unlock();
@@ -310,7 +309,6 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
int ret_val = -ENOENT;
struct netlbl_dom_map *entry;
struct audit_buffer *audit_buf;
- char *audit_domain;
rcu_read_lock();
if (domain != NULL)
@@ -348,16 +346,14 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
spin_unlock(&netlbl_domhsh_def_lock);
}
- if (entry->domain != NULL)
- audit_domain = entry->domain;
- else
- audit_domain = "(default)";
audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
- audit_log_format(audit_buf,
- " nlbl_domain=%s res=%u",
- audit_domain,
- ret_val == 0 ? 1 : 0);
- audit_log_end(audit_buf);
+ if (audit_buf != NULL) {
+ audit_log_format(audit_buf,
+ " nlbl_domain=%s res=%u",
+ entry->domain ? entry->domain : "(default)",
+ ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
if (ret_val == 0)
call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index ff971103fd0..e03a3282c55 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -40,6 +40,207 @@
#include "netlabel_user.h"
/*
+ * Security Attribute Functions
+ */
+
+/**
+ * netlbl_secattr_catmap_walk - Walk a LSM secattr catmap looking for a bit
+ * @catmap: the category bitmap
+ * @offset: the offset to start searching at, in bits
+ *
+ * Description:
+ * This function walks a LSM secattr category bitmap starting at @offset and
+ * returns the spot of the first set bit or -ENOENT if no bits are set.
+ *
+ */
+int netlbl_secattr_catmap_walk(struct netlbl_lsm_secattr_catmap *catmap,
+ u32 offset)
+{
+ struct netlbl_lsm_secattr_catmap *iter = catmap;
+ u32 node_idx;
+ u32 node_bit;
+ NETLBL_CATMAP_MAPTYPE bitmap;
+
+ if (offset > iter->startbit) {
+ while (offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) {
+ iter = iter->next;
+ if (iter == NULL)
+ return -ENOENT;
+ }
+ node_idx = (offset - iter->startbit) / NETLBL_CATMAP_MAPSIZE;
+ node_bit = offset - iter->startbit -
+ (NETLBL_CATMAP_MAPSIZE * node_idx);
+ } else {
+ node_idx = 0;
+ node_bit = 0;
+ }
+ bitmap = iter->bitmap[node_idx] >> node_bit;
+
+ for (;;) {
+ if (bitmap != 0) {
+ while ((bitmap & NETLBL_CATMAP_BIT) == 0) {
+ bitmap >>= 1;
+ node_bit++;
+ }
+ return iter->startbit +
+ (NETLBL_CATMAP_MAPSIZE * node_idx) + node_bit;
+ }
+ if (++node_idx >= NETLBL_CATMAP_MAPCNT) {
+ if (iter->next != NULL) {
+ iter = iter->next;
+ node_idx = 0;
+ } else
+ return -ENOENT;
+ }
+ bitmap = iter->bitmap[node_idx];
+ node_bit = 0;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * netlbl_secattr_catmap_walk_rng - Find the end of a string of set bits
+ * @catmap: the category bitmap
+ * @offset: the offset to start searching at, in bits
+ *
+ * Description:
+ * This function walks a LSM secattr category bitmap starting at @offset and
+ * returns the spot of the first cleared bit or -ENOENT if the offset is past
+ * the end of the bitmap.
+ *
+ */
+int netlbl_secattr_catmap_walk_rng(struct netlbl_lsm_secattr_catmap *catmap,
+ u32 offset)
+{
+ struct netlbl_lsm_secattr_catmap *iter = catmap;
+ u32 node_idx;
+ u32 node_bit;
+ NETLBL_CATMAP_MAPTYPE bitmask;
+ NETLBL_CATMAP_MAPTYPE bitmap;
+
+ if (offset > iter->startbit) {
+ while (offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) {
+ iter = iter->next;
+ if (iter == NULL)
+ return -ENOENT;
+ }
+ node_idx = (offset - iter->startbit) / NETLBL_CATMAP_MAPSIZE;
+ node_bit = offset - iter->startbit -
+ (NETLBL_CATMAP_MAPSIZE * node_idx);
+ } else {
+ node_idx = 0;
+ node_bit = 0;
+ }
+ bitmask = NETLBL_CATMAP_BIT << node_bit;
+
+ for (;;) {
+ bitmap = iter->bitmap[node_idx];
+ while (bitmask != 0 && (bitmap & bitmask) != 0) {
+ bitmask <<= 1;
+ node_bit++;
+ }
+
+ if (bitmask != 0)
+ return iter->startbit +
+ (NETLBL_CATMAP_MAPSIZE * node_idx) +
+ node_bit - 1;
+ else if (++node_idx >= NETLBL_CATMAP_MAPCNT) {
+ if (iter->next == NULL)
+ return iter->startbit + NETLBL_CATMAP_SIZE - 1;
+ iter = iter->next;
+ node_idx = 0;
+ }
+ bitmask = NETLBL_CATMAP_BIT;
+ node_bit = 0;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * netlbl_secattr_catmap_setbit - Set a bit in a LSM secattr catmap
+ * @catmap: the category bitmap
+ * @bit: the bit to set
+ * @flags: memory allocation flags
+ *
+ * Description:
+ * Set the bit specified by @bit in @catmap. Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap,
+ u32 bit,
+ gfp_t flags)
+{
+ struct netlbl_lsm_secattr_catmap *iter = catmap;
+ u32 node_bit;
+ u32 node_idx;
+
+ while (iter->next != NULL &&
+ bit >= (iter->startbit + NETLBL_CATMAP_SIZE))
+ iter = iter->next;
+ if (bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) {
+ iter->next = netlbl_secattr_catmap_alloc(flags);
+ if (iter->next == NULL)
+ return -ENOMEM;
+ iter = iter->next;
+ iter->startbit = bit & ~(NETLBL_CATMAP_SIZE - 1);
+ }
+
+ /* gcc always rounds to zero when doing integer division */
+ node_idx = (bit - iter->startbit) / NETLBL_CATMAP_MAPSIZE;
+ node_bit = bit - iter->startbit - (NETLBL_CATMAP_MAPSIZE * node_idx);
+ iter->bitmap[node_idx] |= NETLBL_CATMAP_BIT << node_bit;
+
+ return 0;
+}
+
+/**
+ * netlbl_secattr_catmap_setrng - Set a range of bits in a LSM secattr catmap
+ * @catmap: the category bitmap
+ * @start: the starting bit
+ * @end: the last bit in the string
+ * @flags: memory allocation flags
+ *
+ * Description:
+ * Set a range of bits, starting at @start and ending with @end. Returns zero
+ * on success, negative values on failure.
+ *
+ */
+int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap,
+ u32 start,
+ u32 end,
+ gfp_t flags)
+{
+ int ret_val = 0;
+ struct netlbl_lsm_secattr_catmap *iter = catmap;
+ u32 iter_max_spot;
+ u32 spot;
+
+ /* XXX - This could probably be made a bit faster by combining writes
+ * to the catmap instead of setting a single bit each time, but for
+ * right now skipping to the start of the range in the catmap should
+ * be a nice improvement over calling the individual setbit function
+ * repeatedly from a loop. */
+
+ while (iter->next != NULL &&
+ start >= (iter->startbit + NETLBL_CATMAP_SIZE))
+ iter = iter->next;
+ iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE;
+
+ for (spot = start; spot <= end && ret_val == 0; spot++) {
+ if (spot >= iter_max_spot && iter->next != NULL) {
+ iter = iter->next;
+ iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE;
+ }
+ ret_val = netlbl_secattr_catmap_setbit(iter, spot, GFP_ATOMIC);
+ }
+
+ return ret_val;
+}
+
+/*
* LSM Functions
*/
@@ -62,6 +263,9 @@ int netlbl_socket_setattr(const struct socket *sock,
int ret_val = -ENOENT;
struct netlbl_dom_map *dom_entry;
+ if ((secattr->flags & NETLBL_SECATTR_DOMAIN) == 0)
+ return -ENOENT;
+
rcu_read_lock();
dom_entry = netlbl_domhsh_getentry(secattr->domain);
if (dom_entry == NULL)
@@ -146,10 +350,8 @@ int netlbl_socket_getattr(const struct socket *sock,
int netlbl_skbuff_getattr(const struct sk_buff *skb,
struct netlbl_lsm_secattr *secattr)
{
- int ret_val;
-
- ret_val = cipso_v4_skbuff_getattr(skb, secattr);
- if (ret_val == 0)
+ if (CIPSO_V4_OPTEXIST(skb) &&
+ cipso_v4_skbuff_getattr(skb, secattr) == 0)
return 0;
return netlbl_unlabel_getattr(secattr);
@@ -200,7 +402,7 @@ void netlbl_cache_invalidate(void)
int netlbl_cache_add(const struct sk_buff *skb,
const struct netlbl_lsm_secattr *secattr)
{
- if (secattr->cache == NULL)
+ if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0)
return -ENOMSG;
if (CIPSO_V4_OPTEXIST(skb))
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 53c9079ad2c..e8c80f33f3d 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -188,12 +188,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
struct netlbl_domhsh_walk_arg *cb_arg = arg;
void *data;
- data = netlbl_netlink_hdr_put(cb_arg->skb,
- NETLINK_CB(cb_arg->nl_cb->skb).pid,
- cb_arg->seq,
- netlbl_mgmt_gnl_family.id,
- NLM_F_MULTI,
- NLBL_MGMT_C_LISTALL);
+ data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ cb_arg->seq, &netlbl_mgmt_gnl_family,
+ NLM_F_MULTI, NLBL_MGMT_C_LISTALL);
if (data == NULL)
goto listall_cb_failure;
@@ -356,15 +353,11 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
void *data;
struct netlbl_dom_map *entry;
- ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (ans_skb == NULL)
return -ENOMEM;
- data = netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- info->snd_seq,
- netlbl_mgmt_gnl_family.id,
- 0,
- NLBL_MGMT_C_LISTDEF);
+ data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family,
+ 0, NLBL_MGMT_C_LISTDEF);
if (data == NULL)
goto listdef_failure;
@@ -390,7 +383,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(ans_skb, data);
- ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ ret_val = genlmsg_reply(ans_skb, info);
if (ret_val != 0)
goto listdef_failure;
return 0;
@@ -422,12 +415,9 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
int ret_val = -ENOMEM;
void *data;
- data = netlbl_netlink_hdr_put(skb,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- netlbl_mgmt_gnl_family.id,
- NLM_F_MULTI,
- NLBL_MGMT_C_PROTOCOLS);
+ data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &netlbl_mgmt_gnl_family, NLM_F_MULTI,
+ NLBL_MGMT_C_PROTOCOLS);
if (data == NULL)
goto protocols_cb_failure;
@@ -492,15 +482,11 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *ans_skb = NULL;
void *data;
- ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (ans_skb == NULL)
return -ENOMEM;
- data = netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- info->snd_seq,
- netlbl_mgmt_gnl_family.id,
- 0,
- NLBL_MGMT_C_VERSION);
+ data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family,
+ 0, NLBL_MGMT_C_VERSION);
if (data == NULL)
goto version_failure;
@@ -512,7 +498,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(ans_skb, data);
- ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ ret_val = genlmsg_reply(ans_skb, info);
if (ret_val != 0)
goto version_failure;
return 0;
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 1833ad233b3..5bc37181662 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -35,6 +35,7 @@
#include <linux/socket.h>
#include <linux/string.h>
#include <linux/skbuff.h>
+#include <linux/audit.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -47,7 +48,8 @@
#include "netlabel_unlabeled.h"
/* Accept unlabeled packets flag */
-static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(netlabel_unlabel_acceptflg_lock);
+static u8 netlabel_unlabel_acceptflg = 0;
/* NetLabel Generic NETLINK CIPSOv4 family */
static struct genl_family netlbl_unlabel_gnl_family = {
@@ -82,13 +84,20 @@ static void netlbl_unlabel_acceptflg_set(u8 value,
struct audit_buffer *audit_buf;
u8 old_val;
- old_val = atomic_read(&netlabel_unlabel_accept_flg);
- atomic_set(&netlabel_unlabel_accept_flg, value);
+ rcu_read_lock();
+ old_val = netlabel_unlabel_acceptflg;
+ spin_lock(&netlabel_unlabel_acceptflg_lock);
+ netlabel_unlabel_acceptflg = value;
+ spin_unlock(&netlabel_unlabel_acceptflg_lock);
+ rcu_read_unlock();
audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW,
audit_info);
- audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val);
- audit_log_end(audit_buf);
+ if (audit_buf != NULL) {
+ audit_log_format(audit_buf,
+ " unlbl_accept=%u old=%u", value, old_val);
+ audit_log_end(audit_buf);
+ }
}
/*
@@ -138,29 +147,27 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *ans_skb;
void *data;
- ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (ans_skb == NULL)
goto list_failure;
- data = netlbl_netlink_hdr_put(ans_skb,
- info->snd_pid,
- info->snd_seq,
- netlbl_unlabel_gnl_family.id,
- 0,
- NLBL_UNLABEL_C_LIST);
+ data = genlmsg_put_reply(ans_skb, info, &netlbl_unlabel_gnl_family,
+ 0, NLBL_UNLABEL_C_LIST);
if (data == NULL) {
ret_val = -ENOMEM;
goto list_failure;
}
+ rcu_read_lock();
ret_val = nla_put_u8(ans_skb,
NLBL_UNLABEL_A_ACPTFLG,
- atomic_read(&netlabel_unlabel_accept_flg));
+ netlabel_unlabel_acceptflg);
+ rcu_read_unlock();
if (ret_val != 0)
goto list_failure;
genlmsg_end(ans_skb, data);
- ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+ ret_val = genlmsg_reply(ans_skb, info);
if (ret_val != 0)
goto list_failure;
return 0;
@@ -240,10 +247,17 @@ int netlbl_unlabel_genl_init(void)
*/
int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
{
- if (atomic_read(&netlabel_unlabel_accept_flg) == 1)
- return netlbl_secattr_init(secattr);
+ int ret_val;
- return -ENOMSG;
+ rcu_read_lock();
+ if (netlabel_unlabel_acceptflg == 1) {
+ netlbl_secattr_init(secattr);
+ ret_val = 0;
+ } else
+ ret_val = -ENOMSG;
+ rcu_read_unlock();
+
+ return ret_val;
}
/**
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 98a416381e6..42f12bd6596 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -46,6 +46,10 @@
#include "netlabel_cipso_v4.h"
#include "netlabel_user.h"
+/* do not do any auditing if audit_enabled == 0, see kernel/audit.c for
+ * details */
+extern int audit_enabled;
+
/*
* NetLabel NETLINK Setup Functions
*/
@@ -101,6 +105,9 @@ struct audit_buffer *netlbl_audit_start_common(int type,
char *secctx;
u32 secctx_len;
+ if (audit_enabled == 0)
+ return NULL;
+
audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type);
if (audit_buf == NULL)
return NULL;
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 47967ef3296..6d7f4ab46c2 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -42,37 +42,6 @@
/* NetLabel NETLINK helper functions */
/**
- * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
- * @skb: the packet
- * @pid: the PID of the receipient
- * @seq: the sequence number
- * @type: the generic NETLINK message family type
- * @cmd: command
- *
- * Description:
- * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
- * struct to the packet. Returns a pointer to the start of the payload buffer
- * on success or NULL on failure.
- *
- */
-static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
- u32 pid,
- u32 seq,
- int type,
- int flags,
- u8 cmd)
-{
- return genlmsg_put(skb,
- pid,
- seq,
- type,
- 0,
- flags,
- cmd,
- NETLBL_PROTO_VERSION);
-}
-
-/**
* netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
* @skb: the packet
* @audit_info: NetLabel audit information
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d56e0d21f91..276131fe56d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -699,7 +699,7 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
struct sock *netlink_getsockbyfilp(struct file *filp)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = filp->f_path.dentry->d_inode;
struct sock *sock;
if (!S_ISSOCK(inode->i_mode))
@@ -1075,8 +1075,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
len = sizeof(int);
val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
- put_user(len, optlen);
- put_user(val, optval);
+ if (put_user(len, optlen) ||
+ put_user(val, optval))
+ return -EFAULT;
err = 0;
break;
default:
@@ -1147,12 +1148,11 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (len > sk->sk_sndbuf - 32)
goto out;
err = -ENOBUFS;
- skb = nlmsg_new(len, GFP_KERNEL);
+ skb = alloc_skb(len, GFP_KERNEL);
if (skb==NULL)
goto out;
NETLINK_CB(skb).pid = nlk->pid;
- NETLINK_CB(skb).dst_pid = dst_pid;
NETLINK_CB(skb).dst_group = dst_group;
NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
@@ -1434,14 +1434,13 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
struct sk_buff *skb;
struct nlmsghdr *rep;
struct nlmsgerr *errmsg;
- int size;
+ size_t payload = sizeof(*errmsg);
- if (err == 0)
- size = nlmsg_total_size(sizeof(*errmsg));
- else
- size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
+ /* error messages get the original request appened */
+ if (err)
+ payload += nlmsg_len(nlh);
- skb = nlmsg_new(size, GFP_KERNEL);
+ skb = nlmsg_new(payload, GFP_KERNEL);
if (!skb) {
struct sock *sk;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 49bc2db7982..548e4e6e698 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -143,6 +143,13 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
goto errout;
}
+ if (ops->dumpit)
+ ops->flags |= GENL_CMD_CAP_DUMP;
+ if (ops->doit)
+ ops->flags |= GENL_CMD_CAP_DO;
+ if (ops->policy)
+ ops->flags |= GENL_CMD_CAP_HASPOL;
+
genl_lock();
list_add_tail(&ops->ops_list, &family->ops_list);
genl_unlock();
@@ -331,7 +338,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
*errp = err = netlink_dump_start(genl_sock, skb, nlh,
- ops->dumpit, NULL);
+ ops->dumpit, ops->done);
if (err == 0)
skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
skb->len));
@@ -384,16 +391,19 @@ static void genl_rcv(struct sock *sk, int len)
* Controller
**************************************************************************/
+static struct genl_family genl_ctrl = {
+ .id = GENL_ID_CTRL,
+ .name = "nlctrl",
+ .version = 0x2,
+ .maxattr = CTRL_ATTR_MAX,
+};
+
static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
u32 flags, struct sk_buff *skb, u8 cmd)
{
- struct nlattr *nla_ops;
- struct genl_ops *ops;
void *hdr;
- int idx = 1;
- hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
- family->version);
+ hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd);
if (hdr == NULL)
return -1;
@@ -403,34 +413,31 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize);
NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr);
- nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
- if (nla_ops == NULL)
- goto nla_put_failure;
-
- list_for_each_entry(ops, &family->ops_list, ops_list) {
- struct nlattr *nest;
+ if (!list_empty(&family->ops_list)) {
+ struct nlattr *nla_ops;
+ struct genl_ops *ops;
+ int idx = 1;
- nest = nla_nest_start(skb, idx++);
- if (nest == NULL)
+ nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
+ if (nla_ops == NULL)
goto nla_put_failure;
- NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd);
- NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags);
+ list_for_each_entry(ops, &family->ops_list, ops_list) {
+ struct nlattr *nest;
- if (ops->policy)
- NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY);
+ nest = nla_nest_start(skb, idx++);
+ if (nest == NULL)
+ goto nla_put_failure;
- if (ops->doit)
- NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT);
+ NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd);
+ NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags);
- if (ops->dumpit)
- NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT);
+ nla_nest_end(skb, nest);
+ }
- nla_nest_end(skb, nest);
+ nla_nest_end(skb, nla_ops);
}
- nla_nest_end(skb, nla_ops);
-
return genlmsg_end(skb, hdr);
nla_put_failure:
@@ -480,7 +487,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
struct sk_buff *skb;
int err;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb == NULL)
return ERR_PTR(-ENOBUFS);
@@ -529,7 +536,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
goto errout;
}
- err = genlmsg_unicast(msg, info->snd_pid);
+ err = genlmsg_reply(msg, info);
errout:
return err;
}
@@ -562,13 +569,6 @@ static struct genl_ops genl_ctrl_ops = {
.policy = ctrl_policy,
};
-static struct genl_family genl_ctrl = {
- .id = GENL_ID_CTRL,
- .name = "nlctrl",
- .version = 0x1,
- .maxattr = CTRL_ATTR_MAX,
-};
-
static int __init genl_init(void)
{
int i, err;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index c11737f472d..0096105bcd4 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -155,14 +155,15 @@ static int nr_add_node(ax25_address *nr, const char *mnemonic, ax25_address *ax2
atomic_set(&nr_neigh->refcount, 1);
if (ax25_digi != NULL && ax25_digi->ndigi > 0) {
- if ((nr_neigh->digipeat = kmalloc(sizeof(*ax25_digi), GFP_KERNEL)) == NULL) {
+ nr_neigh->digipeat = kmemdup(ax25_digi,
+ sizeof(*ax25_digi),
+ GFP_KERNEL);
+ if (nr_neigh->digipeat == NULL) {
kfree(nr_neigh);
if (nr_node)
nr_node_put(nr_node);
return -ENOMEM;
}
- memcpy(nr_neigh->digipeat, ax25_digi,
- sizeof(*ax25_digi));
}
spin_lock_bh(&nr_neigh_list_lock);
@@ -432,11 +433,12 @@ static int nr_add_neigh(ax25_address *callsign, ax25_digi *ax25_digi, struct net
atomic_set(&nr_neigh->refcount, 1);
if (ax25_digi != NULL && ax25_digi->ndigi > 0) {
- if ((nr_neigh->digipeat = kmalloc(sizeof(*ax25_digi), GFP_KERNEL)) == NULL) {
+ nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi),
+ GFP_KERNEL);
+ if (nr_neigh->digipeat == NULL) {
kfree(nr_neigh);
return -ENOMEM;
}
- memcpy(nr_neigh->digipeat, ax25_digi, sizeof(*ax25_digi));
}
spin_lock_bh(&nr_neigh_list_lock);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f4ccb90e673..da73e8a8c18 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -71,6 +71,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
+#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -201,7 +202,7 @@ struct packet_sock {
spinlock_t bind_lock;
char running; /* prot_hook is attached*/
int ifindex; /* bound device */
- unsigned short num;
+ __be16 num;
#ifdef CONFIG_PACKET_MULTICAST
struct packet_mclist *mclist;
#endif
@@ -331,7 +332,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
struct sk_buff *skb;
struct net_device *dev;
- unsigned short proto=0;
+ __be16 proto=0;
int err;
/*
@@ -659,7 +660,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
sll->sll_ifindex = dev->ifindex;
h->tp_status = status;
- mb();
+ smp_mb();
{
struct page *p_start, *p_end;
@@ -704,7 +705,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
struct sk_buff *skb;
struct net_device *dev;
- unsigned short proto;
+ __be16 proto;
unsigned char *addr;
int ifindex, err, reserve = 0;
@@ -858,7 +859,7 @@ static int packet_release(struct socket *sock)
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
+static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
{
struct packet_sock *po = pkt_sk(sk);
/*
@@ -983,6 +984,7 @@ static int packet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct packet_sock *po;
+ __be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
if (!capable(CAP_NET_RAW))
@@ -1010,7 +1012,7 @@ static int packet_create(struct socket *sock, int protocol)
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
- po->num = protocol;
+ po->num = proto;
sk->sk_destruct = packet_sock_destruct;
atomic_inc(&packet_socks_nr);
@@ -1027,8 +1029,8 @@ static int packet_create(struct socket *sock, int protocol)
#endif
po->prot_hook.af_packet_priv = sk;
- if (protocol) {
- po->prot_hook.type = protocol;
+ if (proto) {
+ po->prot_hook.type = proto;
dev_add_pack(&po->prot_hook);
sock_hold(sk);
po->running = 1;
@@ -1624,7 +1626,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
{
char **pg_vec = NULL;
struct packet_sock *po = pkt_sk(sk);
- int was_running, num, order = 0;
+ int was_running, order = 0;
+ __be16 num;
int err = 0;
if (req->tp_block_nr) {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a22542fa1bc..7252344779a 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -396,7 +396,7 @@ int rose_add_loopback_neigh(void)
int rose_add_loopback_node(rose_address *address)
{
struct rose_node *rose_node;
- unsigned int err = 0;
+ int err = 0;
spin_lock_bh(&rose_node_list_lock);
@@ -432,7 +432,7 @@ int rose_add_loopback_node(rose_address *address)
out:
spin_unlock_bh(&rose_node_list_lock);
- return 0;
+ return err;
}
/*
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
index dada34a77b2..49effd92144 100644
--- a/net/rxrpc/krxiod.c
+++ b/net/rxrpc/krxiod.c
@@ -13,6 +13,7 @@
#include <linux/completion.h>
#include <linux/spinlock.h>
#include <linux/init.h>
+#include <linux/freezer.h>
#include <rxrpc/krxiod.h>
#include <rxrpc/transport.h>
#include <rxrpc/peer.h>
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
index cea4eb5e249..3ab0f77409f 100644
--- a/net/rxrpc/krxsecd.c
+++ b/net/rxrpc/krxsecd.c
@@ -27,6 +27,7 @@
#include <rxrpc/call.h>
#include <linux/udp.h>
#include <linux/ip.h>
+#include <linux/freezer.h>
#include <net/sock.h>
#include "internal.h"
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
index 3e7466900bd..9a9b6132dba 100644
--- a/net/rxrpc/krxtimod.c
+++ b/net/rxrpc/krxtimod.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/completion.h>
+#include <linux/freezer.h>
#include <rxrpc/rxrpc.h>
#include <rxrpc/krxtimod.h>
#include <asm/errno.h>
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index 94b2e2fe6fd..4268b38d92d 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -31,7 +31,6 @@
#endif
#include <linux/errqueue.h>
#include <asm/uaccess.h>
-#include <asm/checksum.h>
#include "internal.h"
struct errormsg {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 8298ea9ffe1..f4544dd8647 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -6,6 +6,7 @@ menu "QoS and/or fair queueing"
config NET_SCHED
bool "QoS and/or fair queueing"
+ select NET_SCH_FIFO
---help---
When the kernel has several packets to send out over a network
device, it has to decide which ones to send first, which ones to
@@ -40,6 +41,9 @@ config NET_SCHED
The available schedulers are listed in the following questions; you
can say Y to as many as you like. If unsure, say N now.
+config NET_SCH_FIFO
+ bool
+
if NET_SCHED
choice
@@ -320,7 +324,7 @@ config CLS_U32_PERF
config CLS_U32_MARK
bool "Netfilter marks support"
- depends on NET_CLS_U32 && NETFILTER
+ depends on NET_CLS_U32
---help---
Say Y here to be able to use netfilter marks as u32 key.
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 0f06aec6609..ff2d6e5e282 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -4,7 +4,7 @@
obj-y := sch_generic.o
-obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o
+obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
obj-$(CONFIG_NET_CLS) += cls_api.o
obj-$(CONFIG_NET_CLS_ACT) += act_api.o
obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
+obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 6cff56696a8..85de7efd5fe 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -48,14 +48,14 @@ static struct tcf_hashinfo gact_hash_info = {
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
- if (net_random() % gact->tcfg_pval)
+ if (!gact->tcfg_pval || net_random() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
static int gact_determ(struct tcf_gact *gact)
{
- if (gact->tcf_bstats.packets % gact->tcfg_pval)
+ if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d8c9310da6e..a9608064a4c 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -156,10 +156,9 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
strcpy(tname, "mangle");
- t = kmalloc(td->u.target_size, GFP_KERNEL);
+ t = kmemdup(td, td->u.target_size, GFP_KERNEL);
if (unlikely(!t))
goto err2;
- memcpy(t, td, td->u.target_size);
if ((err = ipt_init_target(t, tname, hook)) < 0)
goto err3;
@@ -256,13 +255,12 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
** for foolproof you need to not assume this
*/
- t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
+ t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
if (unlikely(!t))
goto rtattr_failure;
c.bindcnt = ipt->tcf_bindcnt - bind;
c.refcnt = ipt->tcf_refcnt - ref;
- memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size);
strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index fed47b65883..af68e1e8325 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -46,6 +46,18 @@ static struct tcf_hashinfo police_hash_info = {
.lock = &police_lock,
};
+/* old policer structure from before tc actions */
+struct tc_police_compat
+{
+ u32 index;
+ int action;
+ u32 limit;
+ u32 burst;
+ u32 mtu;
+ struct tc_ratespec rate;
+ struct tc_ratespec peakrate;
+};
+
/* Each policer is serialized by its individual spinlock */
#ifdef CONFIG_NET_CLS_ACT
@@ -131,12 +143,15 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
+ int size;
if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
return -EINVAL;
- if (tb[TCA_POLICE_TBF-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+ if (tb[TCA_POLICE_TBF-1] == NULL)
+ return -EINVAL;
+ size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
+ if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
return -EINVAL;
parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
@@ -415,12 +430,15 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
struct tcf_police *police;
struct rtattr *tb[TCA_POLICE_MAX];
struct tc_police *parm;
+ int size;
if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
return NULL;
- if (tb[TCA_POLICE_TBF-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+ if (tb[TCA_POLICE_TBF-1] == NULL)
+ return NULL;
+ size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
+ if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
return NULL;
parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 901571a6770..5fe80854ca9 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -71,11 +71,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
{
- d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL);
+ d->tcfd_defdata = kmemdup(defdata, datalen, GFP_KERNEL);
if (unlikely(!d->tcfd_defdata))
return -ENOMEM;
d->tcfd_datalen = datalen;
- memcpy(d->tcfd_defdata, defdata, datalen);
return 0;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 37a18402164..edb8fc97ae1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -217,7 +217,7 @@ replay:
/* Create new proto tcf */
err = -ENOBUFS;
- if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL)
+ if ((tp = kzalloc(sizeof(*tp), GFP_KERNEL)) == NULL)
goto errout;
err = -EINVAL;
tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]);
@@ -247,7 +247,6 @@ replay:
kfree(tp);
goto errout;
}
- memset(tp, 0, sizeof(*tp));
tp->ops = tp_ops;
tp->protocol = protocol;
tp->prio = nprio ? : tcf_auto_prio(*back);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e54acc6bccc..c797d6ada7d 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -101,13 +101,10 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct fw_head *head = (struct fw_head*)tp->root;
struct fw_filter *f;
int r;
-#ifdef CONFIG_NETFILTER
- u32 id = skb->nfmark & head->mask;
-#else
- u32 id = 0;
-#endif
+ u32 id = skb->mark;
if (head != NULL) {
+ id &= head->mask;
for (f=head->ht[fw_hash(id)]; f; f=f->next) {
if (f->id == id) {
*res = f->res;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 6e230ecfba0..587b9adab38 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -77,7 +77,7 @@ struct rsvp_head
struct rsvp_session
{
struct rsvp_session *next;
- u32 dst[RSVP_DST_LEN];
+ __be32 dst[RSVP_DST_LEN];
struct tc_rsvp_gpi dpi;
u8 protocol;
u8 tunnelid;
@@ -89,7 +89,7 @@ struct rsvp_session
struct rsvp_filter
{
struct rsvp_filter *next;
- u32 src[RSVP_DST_LEN];
+ __be32 src[RSVP_DST_LEN];
struct tc_rsvp_gpi spi;
u8 tunnelhdr;
@@ -100,17 +100,17 @@ struct rsvp_filter
struct rsvp_session *sess;
};
-static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
+static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
{
- unsigned h = dst[RSVP_DST_LEN-1];
+ unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
h ^= h>>16;
h ^= h>>8;
return (h ^ protocol ^ tunnelid) & 0xFF;
}
-static __inline__ unsigned hash_src(u32 *src)
+static __inline__ unsigned hash_src(__be32 *src)
{
- unsigned h = src[RSVP_DST_LEN-1];
+ unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
h ^= h>>16;
h ^= h>>8;
h ^= h>>4;
@@ -138,7 +138,7 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned h1, h2;
- u32 *dst, *src;
+ __be32 *dst, *src;
u8 protocol;
u8 tunnelid = 0;
u8 *xprt;
@@ -410,7 +410,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
struct rtattr *tb[TCA_RSVP_MAX];
struct tcf_exts e;
unsigned h1, h2;
- u32 *dst;
+ __be32 *dst;
int err;
if (opt == NULL)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0a6cfa0005b..8b519480199 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -143,7 +143,7 @@ next_knode:
#endif
#ifdef CONFIG_CLS_U32_MARK
- if ((skb->nfmark & n->mark.mask) != n->mark.val) {
+ if ((skb->mark & n->mark.mask) != n->mark.val) {
n = n->next;
goto next_knode;
} else {
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 61e3b740ab1..45d47d37155 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -208,13 +208,9 @@ META_COLLECTOR(int_maclen)
* Netfilter
**************************************************************************/
-META_COLLECTOR(int_nfmark)
+META_COLLECTOR(int_mark)
{
-#ifdef CONFIG_NETFILTER
- dst->value = skb->nfmark;
-#else
- dst->value = 0;
-#endif
+ dst->value = skb->mark;
}
/**************************************************************************
@@ -490,7 +486,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
[META_ID(PKTLEN)] = META_FUNC(int_pktlen),
[META_ID(DATALEN)] = META_FUNC(int_datalen),
[META_ID(MACLEN)] = META_FUNC(int_maclen),
- [META_ID(NFMARK)] = META_FUNC(int_nfmark),
+ [META_ID(NFMARK)] = META_FUNC(int_mark),
[META_ID(TCINDEX)] = META_FUNC(int_tcindex),
[META_ID(RTCLASSID)] = META_FUNC(int_rtclassid),
[META_ID(RTIIF)] = META_FUNC(int_rtiif),
@@ -550,10 +546,9 @@ static int meta_var_change(struct meta_value *dst, struct rtattr *rta)
{
int len = RTA_PAYLOAD(rta);
- dst->val = (unsigned long) kmalloc(len, GFP_KERNEL);
+ dst->val = (unsigned long)kmemdup(RTA_DATA(rta), len, GFP_KERNEL);
if (dst->val == 0UL)
return -ENOMEM;
- memcpy((void *) dst->val, RTA_DATA(rta), len);
dst->len = len;
return 0;
}
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index cc80babfd79..005db409be6 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -34,12 +34,10 @@ static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
return -EINVAL;
em->datalen = sizeof(*nbyte) + nbyte->len;
- em->data = (unsigned long) kmalloc(em->datalen, GFP_KERNEL);
+ em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL);
if (em->data == 0UL)
return -ENOBUFS;
- memcpy((void *) em->data, data, em->datalen);
-
return 0;
}
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 0fd0768a17c..8f8a16da72a 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -251,12 +251,11 @@ static int tcf_em_validate(struct tcf_proto *tp,
goto errout;
em->data = *(u32 *) data;
} else {
- void *v = kmalloc(data_len, GFP_KERNEL);
+ void *v = kmemdup(data, data_len, GFP_KERNEL);
if (v == NULL) {
err = -ENOBUFS;
goto errout;
}
- memcpy(v, data, data_len);
em->data = (unsigned long) v;
}
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0b648929114..65825f4409d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -191,21 +191,27 @@ int unregister_qdisc(struct Qdisc_ops *qops)
(root qdisc, all its children, children of children etc.)
*/
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
- read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
- if (q->handle == handle) {
- read_unlock(&qdisc_tree_lock);
+ if (q->handle == handle)
return q;
- }
}
- read_unlock(&qdisc_tree_lock);
return NULL;
}
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+{
+ struct Qdisc *q;
+
+ read_lock(&qdisc_tree_lock);
+ q = __qdisc_lookup(dev, handle);
+ read_unlock(&qdisc_tree_lock);
+ return q;
+}
+
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -348,6 +354,26 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
return oqdisc;
}
+void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+{
+ struct Qdisc_class_ops *cops;
+ unsigned long cl;
+ u32 parentid;
+
+ if (n == 0)
+ return;
+ while ((parentid = sch->parent)) {
+ sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ cops = sch->ops->cl_ops;
+ if (cops->qlen_notify) {
+ cl = cops->get(sch, parentid);
+ cops->qlen_notify(sch, cl);
+ cops->put(sch, cl);
+ }
+ sch->q.qlen -= n;
+ }
+}
+EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
to device "dev".
@@ -1112,7 +1138,7 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
{
int err = 0;
- u32 protocol = skb->protocol;
+ __be16 protocol = skb->protocol;
#ifdef CONFIG_NET_CLS_ACT
struct tcf_proto *otp = tp;
reclassify:
@@ -1277,7 +1303,6 @@ static int __init pktsched_init(void)
subsys_initcall(pktsched_init);
-EXPORT_SYMBOL(qdisc_lookup);
EXPORT_SYMBOL(qdisc_get_rtab);
EXPORT_SYMBOL(qdisc_put_rtab);
EXPORT_SYMBOL(register_qdisc);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index dbf44da0912..edc7bb0b9c8 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -316,7 +316,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
}
memset(flow,0,sizeof(*flow));
flow->filter_list = NULL;
- if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
+ if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid)))
flow->q = &noop_qdisc;
DPRINTK("atm_tc_change: qdisc %p\n",flow->q);
flow->sock = sock;
@@ -576,7 +576,8 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt)
DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
p->flows = &p->link;
- if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
+ if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,
+ sch->handle)))
p->link.q = &noop_qdisc;
DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q);
p->link.filter_list = NULL;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index bac881bfe36..f79a4f3d0a9 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -371,8 +371,6 @@ static void cbq_deactivate_class(struct cbq_class *this)
return;
}
}
-
- cl = cl_prev->next_alive;
return;
}
} while ((cl_prev = cl) != q->active[prio]);
@@ -1258,6 +1256,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
do {
if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
sch->q.qlen--;
+ if (!cl->q->q.qlen)
+ cbq_deactivate_class(cl);
return len;
}
} while ((cl = cl->next_alive) != cl_head);
@@ -1429,7 +1429,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
q->link.sibling = &q->link;
q->link.classid = sch->handle;
q->link.qdisc = sch;
- if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+ if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ sch->handle)))
q->link.q = &noop_qdisc;
q->link.priority = TC_CBQ_MAXPRIO-1;
@@ -1674,7 +1675,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl) {
if (new == NULL) {
- if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
+ if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ cl->classid)) == NULL)
return -ENOBUFS;
} else {
#ifdef CONFIG_NET_CLS_POLICE
@@ -1683,9 +1685,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
#endif
}
sch_tree_lock(sch);
- *old = cl->q;
- cl->q = new;
- sch->q.qlen -= (*old)->q.qlen;
+ *old = xchg(&cl->q, new);
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
@@ -1702,6 +1703,14 @@ cbq_leaf(struct Qdisc *sch, unsigned long arg)
return cl ? cl->q : NULL;
}
+static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+ struct cbq_class *cl = (struct cbq_class *)arg;
+
+ if (cl->q->q.qlen == 0)
+ cbq_deactivate_class(cl);
+}
+
static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
{
struct cbq_sched_data *q = qdisc_priv(sch);
@@ -1932,7 +1941,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
cl->R_tab = rtab;
rtab = NULL;
cl->refcnt = 1;
- if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+ if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
cl->q = &noop_qdisc;
cl->classid = classid;
cl->tparent = parent;
@@ -1986,12 +1995,17 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class*)arg;
+ unsigned int qlen;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
sch_tree_lock(sch);
+ qlen = cl->q->q.qlen;
+ qdisc_reset(cl->q);
+ qdisc_tree_decrease_qlen(cl->q, qlen);
+
if (cl->next_alive)
cbq_deactivate_class(cl);
@@ -2082,6 +2096,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
static struct Qdisc_class_ops cbq_class_ops = {
.graft = cbq_graft,
.leaf = cbq_leaf,
+ .qlen_notify = cbq_qlen_notify,
.get = cbq_get,
.put = cbq_put,
.change = cbq_change_class,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 11c8a2119b9..d5421816f00 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -88,15 +88,16 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
sch, p, new, old);
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ sch->handle);
if (new == NULL)
new = &noop_qdisc;
}
sch_tree_lock(sch);
*old = xchg(&p->q, new);
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
- sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
@@ -307,7 +308,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
if (p->mask[index] != 0xff || p->value[index])
printk(KERN_WARNING "dsmark_dequeue: "
"unsupported protocol %d\n",
- htons(skb->protocol));
+ ntohs(skb->protocol));
break;
};
@@ -387,7 +388,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
p->default_index = default_index;
p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
- p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
if (p->q == NULL)
p->q = &noop_qdisc;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 88c6a99ce53..bc116bd6937 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -450,13 +450,15 @@ errout:
return ERR_PTR(-err);
}
-struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
+struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
+ unsigned int parentid)
{
struct Qdisc *sch;
sch = qdisc_alloc(dev, ops);
if (IS_ERR(sch))
goto errout;
+ sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
@@ -520,7 +522,8 @@ void dev_activate(struct net_device *dev)
if (dev->qdisc_sleeping == &noop_qdisc) {
struct Qdisc *qdisc;
if (dev->tx_queue_len) {
- qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
+ qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops,
+ TC_H_ROOT);
if (qdisc == NULL) {
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
@@ -606,13 +609,10 @@ void dev_shutdown(struct net_device *dev)
qdisc_unlock_tree(dev);
}
-EXPORT_SYMBOL(__netdev_watchdog_up);
EXPORT_SYMBOL(netif_carrier_on);
EXPORT_SYMBOL(netif_carrier_off);
EXPORT_SYMBOL(noop_qdisc);
-EXPORT_SYMBOL(noop_qdisc_ops);
EXPORT_SYMBOL(qdisc_create_dflt);
-EXPORT_SYMBOL(qdisc_alloc);
EXPORT_SYMBOL(qdisc_destroy);
EXPORT_SYMBOL(qdisc_reset);
EXPORT_SYMBOL(qdisc_lock_tree);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6a6735a2ed3..6eefa699577 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -946,6 +946,7 @@ qdisc_peek_len(struct Qdisc *sch)
if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
if (net_ratelimit())
printk("qdisc_peek_len: failed to requeue\n");
+ qdisc_tree_decrease_qlen(sch, 1);
return 0;
}
return len;
@@ -957,11 +958,7 @@ hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
unsigned int len = cl->qdisc->q.qlen;
qdisc_reset(cl->qdisc);
- if (len > 0) {
- update_vf(cl, 0, 0);
- set_passive(cl);
- sch->q.qlen -= len;
- }
+ qdisc_tree_decrease_qlen(cl->qdisc, len);
}
static void
@@ -1138,7 +1135,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->classid = classid;
cl->sched = q;
cl->cl_parent = parent;
- cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
cl->stats_lock = &sch->dev->queue_lock;
@@ -1271,7 +1268,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl->level > 0)
return -EINVAL;
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ cl->classid);
if (new == NULL)
new = &noop_qdisc;
}
@@ -1294,6 +1292,17 @@ hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
return NULL;
}
+static void
+hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ if (cl->qdisc->q.qlen == 0) {
+ update_vf(cl, 0, 0);
+ set_passive(cl);
+ }
+}
+
static unsigned long
hfsc_get_class(struct Qdisc *sch, u32 classid)
{
@@ -1514,7 +1523,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
q->root.refcnt = 1;
q->root.classid = sch->handle;
q->root.sched = q;
- q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ sch->handle);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
q->root.stats_lock = &sch->dev->queue_lock;
@@ -1777,6 +1787,7 @@ static struct Qdisc_class_ops hfsc_class_ops = {
.delete = hfsc_delete_class,
.graft = hfsc_graft_class,
.leaf = hfsc_class_leaf,
+ .qlen_notify = hfsc_qlen_notify,
.get = hfsc_get_class,
.put = hfsc_put_class,
.bind_tcf = hfsc_bind_tcf,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9b9c555c713..15f23c5511a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -147,6 +147,10 @@ struct htb_class {
psched_tdiff_t mbuffer; /* max wait time */
long tokens, ctokens; /* current number of tokens */
psched_time_t t_c; /* checkpoint time */
+
+ int prio; /* For parent to leaf return possible here */
+ int quantum; /* we do backup. Finally full replacement */
+ /* of un.leaf originals should be done. */
};
/* TODO: maybe compute rate when size is too large .. or drop ? */
@@ -1223,17 +1227,14 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct htb_class *cl = (struct htb_class *)arg;
if (cl && !cl->level) {
- if (new == NULL && (new = qdisc_create_dflt(sch->dev,
- &pfifo_qdisc_ops))
+ if (new == NULL &&
+ (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ cl->classid))
== NULL)
return -ENOBUFS;
sch_tree_lock(sch);
if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
- if (cl->prio_activity)
- htb_deactivate(qdisc_priv(sch), cl);
-
- /* TODO: is it correct ? Why CBQ doesn't do it ? */
- sch->q.qlen -= (*old)->q.qlen;
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
}
sch_tree_unlock(sch);
@@ -1248,6 +1249,14 @@ static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
return (cl && !cl->level) ? cl->un.leaf.q : NULL;
}
+static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+ struct htb_class *cl = (struct htb_class *)arg;
+
+ if (cl->un.leaf.q->q.qlen == 0)
+ htb_deactivate(qdisc_priv(sch), cl);
+}
+
static unsigned long htb_get(struct Qdisc *sch, u32 classid)
{
struct htb_class *cl = htb_find(classid, sch);
@@ -1266,12 +1275,44 @@ static void htb_destroy_filters(struct tcf_proto **fl)
}
}
+static inline int htb_parent_last_child(struct htb_class *cl)
+{
+ if (!cl->parent)
+ /* the root class */
+ return 0;
+
+ if (!(cl->parent->children.next == &cl->sibling &&
+ cl->parent->children.prev == &cl->sibling))
+ /* not the last child */
+ return 0;
+
+ return 1;
+}
+
+static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
+{
+ struct htb_class *parent = cl->parent;
+
+ BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
+
+ parent->level = 0;
+ memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+ INIT_LIST_HEAD(&parent->un.leaf.drop_list);
+ parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
+ parent->un.leaf.quantum = parent->quantum;
+ parent->un.leaf.prio = parent->prio;
+ parent->tokens = parent->buffer;
+ parent->ctokens = parent->cbuffer;
+ PSCHED_GET_TIME(parent->t_c);
+ parent->cmode = HTB_CAN_SEND;
+}
+
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
struct htb_sched *q = qdisc_priv(sch);
+
if (!cl->level) {
BUG_TRAP(cl->un.leaf.q);
- sch->q.qlen -= cl->un.leaf.q->q.qlen;
qdisc_destroy(cl->un.leaf.q);
}
qdisc_put_rtab(cl->rate);
@@ -1284,8 +1325,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
struct htb_class, sibling));
/* note: this delete may happen twice (see htb_delete) */
- if (!hlist_unhashed(&cl->hlist))
- hlist_del(&cl->hlist);
+ hlist_del_init(&cl->hlist);
list_del(&cl->sibling);
if (cl->prio_activity)
@@ -1323,6 +1363,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
+ unsigned int qlen;
+ struct Qdisc *new_q = NULL;
+ int last_child = 0;
// TODO: why don't allow to delete subtree ? references ? does
// tc subsys quarantee us that in htb_destroy it holds no class
@@ -1330,15 +1373,29 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
if (!list_empty(&cl->children) || cl->filter_cnt)
return -EBUSY;
+ if (!cl->level && htb_parent_last_child(cl)) {
+ new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ cl->parent->classid);
+ last_child = 1;
+ }
+
sch_tree_lock(sch);
/* delete from hash and active; remainder in destroy_class */
- if (!hlist_unhashed(&cl->hlist))
- hlist_del(&cl->hlist);
+ hlist_del_init(&cl->hlist);
+
+ if (!cl->level) {
+ qlen = cl->un.leaf.q->q.qlen;
+ qdisc_reset(cl->un.leaf.q);
+ qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+ }
if (cl->prio_activity)
htb_deactivate(q, cl);
+ if (last_child)
+ htb_parent_to_leaf(cl, new_q);
+
if (--cl->refcnt == 0)
htb_destroy_class(sch, cl);
@@ -1412,11 +1469,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
so that can't be used inside of sch_tree_lock
-- thanks to Karlis Peisenieks */
- new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
sch_tree_lock(sch);
if (parent && !parent->level) {
+ unsigned int qlen = parent->un.leaf.q->q.qlen;
+
/* turn parent into inner node */
- sch->q.qlen -= parent->un.leaf.q->q.qlen;
+ qdisc_reset(parent->un.leaf.q);
+ qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
qdisc_destroy(parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
@@ -1470,6 +1530,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->un.leaf.quantum = hopt->quantum;
if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
+
+ /* backup for htb_parent_to_leaf */
+ cl->quantum = cl->un.leaf.quantum;
+ cl->prio = cl->un.leaf.prio;
}
cl->buffer = hopt->buffer;
@@ -1564,6 +1628,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
static struct Qdisc_class_ops htb_class_ops = {
.graft = htb_graft,
.leaf = htb_leaf,
+ .qlen_notify = htb_qlen_notify,
.get = htb_get,
.put = htb_put,
.change = htb_change_class,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 45939bafbdf..79542af9dab 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -4,7 +4,7 @@
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * 2 of the License.
*
* Many of the algorithms and ideas for this came from
* NIST Net which is not copyrighted.
@@ -170,6 +170,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_BYPASS;
}
+ skb_orphan(skb);
+
/*
* If we need to duplicate packet, then re-insert at top of the
* qdisc tree, since parent queuer expects that only one
@@ -285,13 +287,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
+ qdisc_tree_decrease_qlen(q->qdisc, 1);
sch->qstats.drops++;
-
- /* After this qlen is confused */
printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
q->qdisc->ops->id);
-
- sch->q.qlen--;
}
mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
@@ -572,7 +571,8 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
q->timer.function = netem_watchdog;
q->timer.data = (unsigned long) sch;
- q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops);
+ q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, 1));
if (!q->qdisc) {
pr_debug("netem: qdisc create failed\n");
return -ENOMEM;
@@ -659,8 +659,8 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = xchg(&q->qdisc, new);
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
- sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a5fa03c0c19..2567b4c96c1 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -222,21 +222,27 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
- if (child != &noop_qdisc)
+ if (child != &noop_qdisc) {
+ qdisc_tree_decrease_qlen(child, child->q.qlen);
qdisc_destroy(child);
+ }
}
sch_tree_unlock(sch);
for (i=0; i<q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
struct Qdisc *child;
- child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, i + 1));
if (child) {
sch_tree_lock(sch);
child = xchg(&q->queues[i], child);
- if (child != &noop_qdisc)
+ if (child != &noop_qdisc) {
+ qdisc_tree_decrease_qlen(child,
+ child->q.qlen);
qdisc_destroy(child);
+ }
sch_tree_unlock(sch);
}
}
@@ -294,7 +300,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = q->queues[band];
q->queues[band] = new;
- sch->q.qlen -= (*old)->q.qlen;
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d65cadddea6..acddad08850 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -175,12 +175,14 @@ static void red_destroy(struct Qdisc *sch)
qdisc_destroy(q->qdisc);
}
-static struct Qdisc *red_create_dflt(struct net_device *dev, u32 limit)
+static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
{
- struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
+ struct Qdisc *q;
struct rtattr *rta;
int ret;
+ q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, 1));
if (q) {
rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)),
GFP_KERNEL);
@@ -219,7 +221,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
if (ctl->limit > 0) {
- child = red_create_dflt(sch->dev, ctl->limit);
+ child = red_create_dflt(sch, ctl->limit);
if (child == NULL)
return -ENOMEM;
}
@@ -227,8 +229,10 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
sch_tree_lock(sch);
q->flags = ctl->flags;
q->limit = ctl->limit;
- if (child)
+ if (child) {
+ qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
qdisc_destroy(xchg(&q->qdisc, child));
+ }
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
@@ -306,8 +310,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = xchg(&q->qdisc, new);
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
- sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d0d6e595a78..459cda258a5 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -393,6 +393,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+ unsigned int qlen;
if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
return -EINVAL;
@@ -403,8 +404,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
if (ctl->limit)
q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+ qlen = sch->q.qlen;
while (sch->q.qlen >= q->limit-1)
sfq_drop(sch);
+ qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index d9a5d298d75..ed9b6d93854 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -250,7 +250,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
/* When requeue fails skb is dropped */
- sch->q.qlen--;
+ qdisc_tree_decrease_qlen(q->qdisc, 1);
sch->qstats.drops++;
}
@@ -273,12 +273,14 @@ static void tbf_reset(struct Qdisc* sch)
del_timer(&q->wd_timer);
}
-static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit)
+static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
{
- struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
+ struct Qdisc *q;
struct rtattr *rta;
int ret;
+ q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, 1));
if (q) {
rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (rta) {
@@ -341,13 +343,15 @@ static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
goto done;
if (qopt->limit > 0) {
- if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL)
+ if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL)
goto done;
}
sch_tree_lock(sch);
- if (child)
+ if (child) {
+ qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
qdisc_destroy(xchg(&q->qdisc, child));
+ }
q->limit = qopt->limit;
q->mtu = qopt->mtu;
q->max_size = max_size;
@@ -449,8 +453,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = xchg(&q->qdisc, new);
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
- sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 27329ce9c31..5db95caed0a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -61,7 +61,7 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal functions. */
-static void sctp_assoc_bh_rcv(struct sctp_association *asoc);
+static void sctp_assoc_bh_rcv(struct work_struct *work);
/* 1st Level Abstractions. */
@@ -269,9 +269,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Create an input queue. */
sctp_inq_init(&asoc->base.inqueue);
- sctp_inq_set_th_handler(&asoc->base.inqueue,
- (void (*)(void *))sctp_assoc_bh_rcv,
- asoc);
+ sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv);
/* Create an output queue. */
sctp_outq_init(asoc, &asoc->outqueue);
@@ -300,6 +298,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
asoc->default_flags = sp->default_flags;
asoc->default_context = sp->default_context;
asoc->default_timetolive = sp->default_timetolive;
+ asoc->default_rcv_context = sp->default_rcv_context;
return asoc;
@@ -346,11 +345,18 @@ void sctp_association_free(struct sctp_association *asoc)
struct list_head *pos, *temp;
int i;
- list_del(&asoc->asocs);
+ /* Only real associations count against the endpoint, so
+ * don't bother for if this is a temporary association.
+ */
+ if (!asoc->temp) {
+ list_del(&asoc->asocs);
- /* Decrement the backlog value for a TCP-style listening socket. */
- if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
- sk->sk_ack_backlog--;
+ /* Decrement the backlog value for a TCP-style listening
+ * socket.
+ */
+ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
+ sk->sk_ack_backlog--;
+ }
/* Mark as dead, so other users can know this structure is
* going away.
@@ -481,7 +487,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
" port: %d\n",
asoc,
(&peer->ipaddr),
- peer->ipaddr.v4.sin_port);
+ ntohs(peer->ipaddr.v4.sin_port));
/* If we are to remove the current retran_path, update it
* to the next peer before removing this peer from the list.
@@ -530,13 +536,13 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
sp = sctp_sk(asoc->base.sk);
/* AF_INET and AF_INET6 share common port field. */
- port = addr->v4.sin_port;
+ port = ntohs(addr->v4.sin_port);
SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ",
" port: %d state:%d\n",
asoc,
addr,
- addr->v4.sin_port,
+ port,
peer_state);
/* Set the port if it has not been set yet. */
@@ -702,6 +708,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
struct sctp_transport *first;
struct sctp_transport *second;
struct sctp_ulpevent *event;
+ struct sockaddr_storage addr;
struct list_head *pos;
int spc_state = 0;
@@ -724,8 +731,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
* user.
*/
- event = sctp_ulpevent_make_peer_addr_change(asoc,
- (struct sockaddr_storage *) &transport->ipaddr,
+ memset(&addr, 0, sizeof(struct sockaddr_storage));
+ memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
+ event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
sctp_ulpq_tail_event(&asoc->ulpq, event);
@@ -861,7 +869,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc,
struct list_head *entry, *pos;
struct sctp_transport *transport;
struct sctp_chunk *chunk;
- __u32 key = htonl(tsn);
+ __be32 key = htonl(tsn);
match = NULL;
@@ -919,8 +927,8 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
sctp_read_lock(&asoc->base.addr_lock);
- if ((asoc->base.bind_addr.port == laddr->v4.sin_port) &&
- (asoc->peer.port == paddr->v4.sin_port)) {
+ if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
+ (htons(asoc->peer.port) == paddr->v4.sin_port)) {
transport = sctp_assoc_lookup_paddr(asoc, paddr);
if (!transport)
goto out;
@@ -937,8 +945,11 @@ out:
}
/* Do delayed input processing. This is scheduled by sctp_rcv(). */
-static void sctp_assoc_bh_rcv(struct sctp_association *asoc)
+static void sctp_assoc_bh_rcv(struct work_struct *work)
{
+ struct sctp_association *asoc =
+ container_of(work, struct sctp_association,
+ base.inqueue.immediate);
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
struct sock *sk;
@@ -1128,7 +1139,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
" port: %d\n",
asoc,
(&t->ipaddr),
- t->ipaddr.v4.sin_port);
+ ntohs(t->ipaddr.v4.sin_port));
}
/* Choose the transport for sending a INIT packet. */
@@ -1153,7 +1164,7 @@ struct sctp_transport *sctp_assoc_choose_init_transport(
" port: %d\n",
asoc,
(&t->ipaddr),
- t->ipaddr.v4.sin_port);
+ ntohs(t->ipaddr.v4.sin_port));
return t;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 2b9c12a170e..00994158e49 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -161,7 +161,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
* Both v4 and v6 have the port at the same offset.
*/
if (!addr->a.v4.sin_port)
- addr->a.v4.sin_port = bp->port;
+ addr->a.v4.sin_port = htons(bp->port);
addr->use_as_src = use_as_src;
@@ -275,7 +275,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
break;
}
- af->from_addr_param(&addr, rawaddr, port, 0);
+ af->from_addr_param(&addr, rawaddr, htons(port), 0);
retval = sctp_add_bind_addr(bp, &addr, 1, gfp);
if (retval) {
/* Can't finish building the list, clean up. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 35c49ff2d06..129756908da 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -61,7 +61,7 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal helpers. */
-static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
+static void sctp_endpoint_bh_rcv(struct work_struct *work);
/*
* Initialize the base fields of the endpoint structure.
@@ -72,6 +72,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
{
memset(ep, 0, sizeof(struct sctp_endpoint));
+ ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
+ if (!ep->digest)
+ return NULL;
+
/* Initialize the base structure. */
/* What type of endpoint are we? */
ep->base.type = SCTP_EP_TYPE_SOCKET;
@@ -85,8 +89,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
sctp_inq_init(&ep->base.inqueue);
/* Set its top-half handler */
- sctp_inq_set_th_handler(&ep->base.inqueue,
- (void (*)(void *))sctp_endpoint_bh_rcv, ep);
+ sctp_inq_set_th_handler(&ep->base.inqueue, sctp_endpoint_bh_rcv);
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);
@@ -144,6 +147,13 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
{
struct sock *sk = ep->base.sk;
+ /* If this is a temporary association, don't bother
+ * since we'll be removing it shortly and don't
+ * want anyone to find it anyway.
+ */
+ if (asoc->temp)
+ return;
+
/* Now just add it to our list of asocs */
list_add_tail(&asoc->asocs, &ep->asocs);
@@ -175,6 +185,9 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
/* Free up the HMAC transform. */
crypto_free_hash(sctp_sk(ep->base.sk)->hmac);
+ /* Free the digest buffer */
+ kfree(ep->digest);
+
/* Cleanup. */
sctp_inq_free(&ep->base.inqueue);
sctp_bind_addr_free(&ep->base.bind_addr);
@@ -216,7 +229,7 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
struct sctp_endpoint *retval;
sctp_read_lock(&ep->base.addr_lock);
- if (ep->base.bind_addr.port == laddr->v4.sin_port) {
+ if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
sctp_sk(ep->base.sk))) {
retval = ep;
@@ -244,7 +257,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
struct sctp_association *asoc;
struct list_head *pos;
- rport = paddr->v4.sin_port;
+ rport = ntohs(paddr->v4.sin_port);
list_for_each(pos, &ep->asocs) {
asoc = list_entry(pos, struct sctp_association, asocs);
@@ -304,8 +317,11 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
/* Do delayed input processing. This is scheduled by sctp_rcv().
* This may be called on BH or task time.
*/
-static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep)
+static void sctp_endpoint_bh_rcv(struct work_struct *work)
{
+ struct sctp_endpoint *ep =
+ container_of(work, struct sctp_endpoint,
+ base.inqueue.immediate);
struct sctp_association *asoc;
struct sock *sk;
struct sctp_transport *transport;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 64f63010253..33111873a48 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -135,6 +135,9 @@ int sctp_rcv(struct sk_buff *skb)
SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS);
+ if (skb_linearize(skb))
+ goto discard_it;
+
sh = (struct sctphdr *) skb->h.raw;
/* Pull up the IP and SCTP headers. */
@@ -723,7 +726,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
struct sctp_endpoint *ep;
int hash;
- hash = sctp_ep_hashfn(laddr->v4.sin_port);
+ hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port));
head = &sctp_ep_hashtable[hash];
read_lock(&head->lock);
for (epb = head->chain; epb; epb = epb->next) {
@@ -768,6 +771,9 @@ static void __sctp_hash_established(struct sctp_association *asoc)
/* Add an association to the hash. Local BH-safe. */
void sctp_hash_established(struct sctp_association *asoc)
{
+ if (asoc->temp)
+ return;
+
sctp_local_bh_disable();
__sctp_hash_established(asoc);
sctp_local_bh_enable();
@@ -801,6 +807,9 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
/* Remove association from the hash table. Local BH-safe. */
void sctp_unhash_established(struct sctp_association *asoc)
{
+ if (asoc->temp)
+ return;
+
sctp_local_bh_disable();
__sctp_unhash_established(asoc);
sctp_local_bh_enable();
@@ -821,7 +830,7 @@ static struct sctp_association *__sctp_lookup_association(
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- hash = sctp_assoc_hashfn(local->v4.sin_port, peer->v4.sin_port);
+ hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port));
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
for (epb = head->chain; epb; epb = epb->next) {
@@ -948,7 +957,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
if (!af)
continue;
- af->from_addr_param(paddr, params.addr, ntohs(sh->source), 0);
+ af->from_addr_param(paddr, params.addr, sh->source, 0);
asoc = __sctp_lookup_association(laddr, paddr, &transport);
if (asoc)
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf6deed7e84..71b07466e88 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -54,7 +54,7 @@ void sctp_inq_init(struct sctp_inq *queue)
queue->in_progress = NULL;
/* Create a task for delivering data. */
- INIT_WORK(&queue->immediate, NULL, NULL);
+ INIT_WORK(&queue->immediate, NULL);
queue->malloced = 0;
}
@@ -97,7 +97,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
* on the BH related data structures.
*/
list_add_tail(&chunk->list, &q->in_chunk_list);
- q->immediate.func(q->immediate.data);
+ q->immediate.func(&q->immediate);
}
/* Extract a chunk from an SCTP inqueue.
@@ -205,9 +205,8 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
* The intent is that this routine will pull stuff out of the
* inqueue and process it.
*/
-void sctp_inq_set_th_handler(struct sctp_inq *q,
- void (*callback)(void *), void *arg)
+void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback)
{
- INIT_WORK(&q->immediate, callback, arg);
+ INIT_WORK(&q->immediate, callback);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 78071c6e6cf..d8d36dee5ab 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -78,13 +78,49 @@
#include <asm/uaccess.h>
+/* Event handler for inet6 address addition/deletion events. */
+int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
+ void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct sctp_sockaddr_entry *addr;
+ struct list_head *pos, *temp;
+
+ switch (ev) {
+ case NETDEV_UP:
+ addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+ if (addr) {
+ addr->a.v6.sin6_family = AF_INET6;
+ addr->a.v6.sin6_port = 0;
+ memcpy(&addr->a.v6.sin6_addr, &ifa->addr,
+ sizeof(struct in6_addr));
+ addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
+ list_add_tail(&addr->list, &sctp_local_addr_list);
+ }
+ break;
+ case NETDEV_DOWN:
+ list_for_each_safe(pos, temp, &sctp_local_addr_list) {
+ addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) {
+ list_del(pos);
+ kfree(addr);
+ break;
+ }
+ }
+
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
static struct notifier_block sctp_inet6addr_notifier = {
- .notifier_call = sctp_inetaddr_event,
+ .notifier_call = sctp_inet6addr_event,
};
/* ICMP error handler. */
SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct inet6_dev *idev;
struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
@@ -170,8 +206,6 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport,
fl.oif = transport->saddr.v6.sin6_scope_id;
else
fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_sport = inet_sk(sk)->sport;
- fl.fl_ip_dport = transport->ipaddr.v6.sin6_port;
if (np->opt && np->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
@@ -239,7 +273,7 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
int i, j;
for (i = 0; i < 4 ; i++) {
- __u32 a1xora2;
+ __be32 a1xora2;
a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
@@ -350,7 +384,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
int is_saddr)
{
void *from;
- __u16 *port;
+ __be16 *port;
struct sctphdr *sh;
port = &addr->v6.sin6_port;
@@ -360,10 +394,10 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
sh = (struct sctphdr *) skb->h.raw;
if (is_saddr) {
- *port = ntohs(sh->source);
+ *port = sh->source;
from = &skb->nh.ipv6h->saddr;
} else {
- *port = ntohs(sh->dest);
+ *port = sh->dest;
from = &skb->nh.ipv6h->daddr;
}
ipv6_addr_copy(&addr->v6.sin6_addr, from);
@@ -373,7 +407,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
{
addr->v6.sin6_family = AF_INET6;
- addr->v6.sin6_port = inet_sk(sk)->num;
+ addr->v6.sin6_port = 0;
addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
}
@@ -407,7 +441,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
/* Initialize a sctp_addr from an address parameter. */
static void sctp_v6_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
- __u16 port, int iif)
+ __be16 port, int iif)
{
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = port;
@@ -425,7 +459,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
int length = sizeof(sctp_ipv6addr_param_t);
param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
- param->v6.param_hdr.length = ntohs(length);
+ param->v6.param_hdr.length = htons(length);
ipv6_addr_copy(&param->v6.addr, &addr->v6.sin6_addr);
return length;
@@ -433,7 +467,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
/* Initialize a sctp_addr from a dst_entry. */
static void sctp_v6_dst_saddr(union sctp_addr *addr, struct dst_entry *dst,
- unsigned short port)
+ __be16 port)
{
struct rt6_info *rt = (struct rt6_info *)dst;
addr->sa.sa_family = AF_INET6;
@@ -480,7 +514,7 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
}
/* Initialize addr struct to INADDR_ANY. */
-static void sctp_v6_inaddr_any(union sctp_addr *addr, unsigned short port)
+static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port)
{
memset(addr, 0x00, sizeof(union sctp_addr));
addr->v6.sin6_family = AF_INET6;
@@ -855,7 +889,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
* Returns number of addresses supported.
*/
static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
- __u16 *types)
+ __be16 *types)
{
types[0] = SCTP_PARAM_IPV4_ADDRESS;
types[1] = SCTP_PARAM_IPV6_ADDRESS;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 739582415bf..fba567a7cb6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1065,7 +1065,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
* A) Initialize the cacc_saw_newack to 0 for all destination
* addresses.
*/
- if (sack->num_gap_ack_blocks > 0 &&
+ if (sack->num_gap_ack_blocks &&
primary->cacc.changeover_active) {
list_for_each(pos, transport_list) {
transport = list_entry(pos, struct sctp_transport,
@@ -1632,7 +1632,7 @@ pass:
}
static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist,
- int nskips, __u16 stream)
+ int nskips, __be16 stream)
{
int i;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 7f49e769080..b3493bdbcac 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -160,7 +160,7 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo
list_for_each(pos, &epb->bind_addr.address_list) {
laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
- addr = (union sctp_addr *)&laddr->a;
+ addr = &laddr->a;
af = sctp_get_af_specific(addr->sa.sa_family);
if (primary && af->cmp_addr(addr, primary)) {
seq_printf(seq, "*");
@@ -177,10 +177,10 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
union sctp_addr *addr, *primary;
struct sctp_af *af;
- primary = &(assoc->peer.primary_addr);
+ primary = &assoc->peer.primary_addr;
list_for_each(pos, &assoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport, transports);
- addr = (union sctp_addr *)&transport->ipaddr;
+ addr = &transport->ipaddr;
af = sctp_get_af_specific(addr->sa.sa_family);
if (af->cmp_addr(addr, primary)) {
seq_printf(seq, "*");
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fac7674438a..3a3db56729c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -79,8 +79,8 @@ static struct sctp_pf *sctp_pf_inet_specific;
static struct sctp_af *sctp_af_v4_specific;
static struct sctp_af *sctp_af_v6_specific;
-kmem_cache_t *sctp_chunk_cachep __read_mostly;
-kmem_cache_t *sctp_bucket_cachep __read_mostly;
+struct kmem_cache *sctp_chunk_cachep __read_mostly;
+struct kmem_cache *sctp_bucket_cachep __read_mostly;
/* Return the address of the control sock. */
struct sock *sctp_get_ctl_sock(void)
@@ -163,7 +163,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
/* Extract our IP addresses from the system and stash them in the
* protocol structure.
*/
-static void __sctp_get_local_addr_list(void)
+static void sctp_get_local_addr_list(void)
{
struct net_device *dev;
struct list_head *pos;
@@ -179,17 +179,8 @@ static void __sctp_get_local_addr_list(void)
read_unlock(&dev_base_lock);
}
-static void sctp_get_local_addr_list(void)
-{
- unsigned long flags;
-
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- __sctp_get_local_addr_list();
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
-}
-
/* Free the existing local addresses. */
-static void __sctp_free_local_addr_list(void)
+static void sctp_free_local_addr_list(void)
{
struct sctp_sockaddr_entry *addr;
struct list_head *pos, *temp;
@@ -201,27 +192,15 @@ static void __sctp_free_local_addr_list(void)
}
}
-/* Free the existing local addresses. */
-static void sctp_free_local_addr_list(void)
-{
- unsigned long flags;
-
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- __sctp_free_local_addr_list();
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
-}
-
/* Copy the local addresses which are valid for 'scope' into 'bp'. */
int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
gfp_t gfp, int copy_flags)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
- struct list_head *pos;
- unsigned long flags;
+ struct list_head *pos, *temp;
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- list_for_each(pos, &sctp_local_addr_list) {
+ list_for_each_safe(pos, temp, &sctp_local_addr_list) {
addr = list_entry(pos, struct sctp_sockaddr_entry, list);
if (sctp_in_scope(&addr->a, scope)) {
/* Now that the address is in scope, check to see if
@@ -242,7 +221,6 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
}
end_copy:
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
return error;
}
@@ -251,7 +229,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
int is_saddr)
{
void *from;
- __u16 *port;
+ __be16 *port;
struct sctphdr *sh;
port = &addr->v4.sin_port;
@@ -259,10 +237,10 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
sh = (struct sctphdr *) skb->h.raw;
if (is_saddr) {
- *port = ntohs(sh->source);
+ *port = sh->source;
from = &skb->nh.iph->saddr;
} else {
- *port = ntohs(sh->dest);
+ *port = sh->dest;
from = &skb->nh.iph->daddr;
}
memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
@@ -272,7 +250,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk)
{
addr->v4.sin_family = AF_INET;
- addr->v4.sin_port = inet_sk(sk)->num;
+ addr->v4.sin_port = 0;
addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr;
}
@@ -291,7 +269,7 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
/* Initialize a sctp_addr from an address parameter. */
static void sctp_v4_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
- __u16 port, int iif)
+ __be16 port, int iif)
{
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = port;
@@ -307,7 +285,7 @@ static int sctp_v4_to_addr_param(const union sctp_addr *addr,
int length = sizeof(sctp_ipv4addr_param_t);
param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
- param->v4.param_hdr.length = ntohs(length);
+ param->v4.param_hdr.length = htons(length);
param->v4.addr.s_addr = addr->v4.sin_addr.s_addr;
return length;
@@ -315,7 +293,7 @@ static int sctp_v4_to_addr_param(const union sctp_addr *addr,
/* Initialize a sctp_addr from a dst_entry. */
static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct dst_entry *dst,
- unsigned short port)
+ __be16 port)
{
struct rtable *rt = (struct rtable *)dst;
saddr->v4.sin_family = AF_INET;
@@ -338,7 +316,7 @@ static int sctp_v4_cmp_addr(const union sctp_addr *addr1,
}
/* Initialize addr struct to INADDR_ANY. */
-static void sctp_v4_inaddr_any(union sctp_addr *addr, unsigned short port)
+static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port)
{
addr->v4.sin_family = AF_INET;
addr->v4.sin_addr.s_addr = INADDR_ANY;
@@ -481,7 +459,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
list);
if (!laddr->use_as_src)
continue;
- sctp_v4_dst_saddr(&dst_saddr, dst, bp->port);
+ sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
}
@@ -538,7 +516,7 @@ static void sctp_v4_get_saddr(struct sctp_association *asoc,
if (rt) {
saddr->v4.sin_family = AF_INET;
- saddr->v4.sin_port = asoc->base.bind_addr.port;
+ saddr->v4.sin_port = htons(asoc->base.bind_addr.port);
saddr->v4.sin_addr.s_addr = rt->rt_src;
}
}
@@ -591,7 +569,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
newinet->dport = htons(asoc->peer.port);
newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
newinet->pmtudisc = inet->pmtudisc;
- newinet->id = 0;
+ newinet->id = asoc->next_tsn ^ jiffies;
newinet->uc_ttl = -1;
newinet->mc_loop = 1;
@@ -622,18 +600,36 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
}
-/* Event handler for inet address addition/deletion events.
- * Basically, whenever there is an event, we re-build our local address list.
- */
+/* Event handler for inet address addition/deletion events. */
int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
void *ptr)
{
- unsigned long flags;
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct sctp_sockaddr_entry *addr;
+ struct list_head *pos, *temp;
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- __sctp_free_local_addr_list();
- __sctp_get_local_addr_list();
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+ switch (ev) {
+ case NETDEV_UP:
+ addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+ if (addr) {
+ addr->a.v4.sin_family = AF_INET;
+ addr->a.v4.sin_port = 0;
+ addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
+ list_add_tail(&addr->list, &sctp_local_addr_list);
+ }
+ break;
+ case NETDEV_DOWN:
+ list_for_each_safe(pos, temp, &sctp_local_addr_list) {
+ addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) {
+ list_del(pos);
+ kfree(addr);
+ break;
+ }
+ }
+
+ break;
+ }
return NOTIFY_DONE;
}
@@ -791,7 +787,7 @@ static int sctp_inet_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
* chunks. Returns number of addresses supported.
*/
static int sctp_inet_supported_addrs(const struct sctp_sock *opt,
- __u16 *types)
+ __be16 *types)
{
types[0] = SCTP_PARAM_IPV4_ADDRESS;
return 1;
@@ -808,7 +804,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
NIPQUAD(((struct rtable *)skb->dst)->rt_dst));
SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
- return ip_queue_xmit(skb, ipfragok);
+ return ip_queue_xmit(skb, skb->sk, ipfragok);
}
static struct sctp_af sctp_ipv4_specific;
@@ -1172,13 +1168,12 @@ SCTP_STATIC __init int sctp_init(void)
/* Initialize the local address list. */
INIT_LIST_HEAD(&sctp_local_addr_list);
- spin_lock_init(&sctp_local_addr_lock);
+
+ sctp_get_local_addr_list();
/* Register notifier for inet address additions/deletions. */
register_inetaddr_notifier(&sctp_inetaddr_notifier);
- sctp_get_local_addr_list();
-
__unsafe(THIS_MODULE);
status = 0;
out:
@@ -1263,6 +1258,7 @@ module_exit(sctp_exit);
* __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly.
*/
MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132");
MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
MODULE_LICENSE("GPL");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 507dff72c58..30927d3a597 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -65,7 +65,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
-extern kmem_cache_t *sctp_chunk_cachep;
+extern struct kmem_cache *sctp_chunk_cachep;
SCTP_STATIC
struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
@@ -111,7 +111,7 @@ static const struct sctp_paramhdr prsctp_param = {
* provided chunk, as most cause codes will be embedded inside an
* abort chunk.
*/
-void sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code,
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
const void *payload, size_t paylen)
{
sctp_errhdr_t err;
@@ -183,7 +183,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
int num_types, addrs_len = 0;
struct sctp_sock *sp;
sctp_supported_addrs_param_t sat;
- __u16 types[2];
+ __be16 types[2];
sctp_adaption_ind_param_t aiparam;
/* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -775,7 +775,7 @@ struct sctp_chunk *sctp_make_abort_no_data(
const struct sctp_chunk *chunk, __u32 tsn)
{
struct sctp_chunk *retval;
- __u32 payload;
+ __be32 payload;
retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
+ sizeof(tsn));
@@ -951,7 +951,7 @@ nodata:
/* Create an Operation Error chunk. */
struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
- __u16 cause_code, const void *payload,
+ __be16 cause_code, const void *payload,
size_t paylen)
{
struct sctp_chunk *retval;
@@ -979,7 +979,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
{
struct sctp_chunk *retval;
- retval = kmem_cache_alloc(sctp_chunk_cachep, SLAB_ATOMIC);
+ retval = kmem_cache_alloc(sctp_chunk_cachep, GFP_ATOMIC);
if (!retval)
goto nodata;
@@ -1190,15 +1190,14 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
ssn = 0;
} else {
- sid = htons(chunk->subh.data_hdr->stream);
+ sid = ntohs(chunk->subh.data_hdr->stream);
if (chunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG)
ssn = sctp_ssn_next(&chunk->asoc->ssnmap->out, sid);
else
ssn = sctp_ssn_peek(&chunk->asoc->ssnmap->out, sid);
- ssn = htons(ssn);
}
- chunk->subh.data_hdr->ssn = ssn;
+ chunk->subh.data_hdr->ssn = htons(ssn);
chunk->has_ssn = 1;
}
@@ -1280,15 +1279,13 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
- (bodysize % SCTP_COOKIE_MULTIPLE);
*cookie_len = headersize + bodysize;
- retval = kmalloc(*cookie_len, GFP_ATOMIC);
-
- if (!retval)
- goto nodata;
-
/* Clear this memory since we are sending this data structure
* out on the network.
*/
- memset(retval, 0x00, *cookie_len);
+ retval = kzalloc(*cookie_len, GFP_ATOMIC);
+ if (!retval)
+ goto nodata;
+
cookie = (struct sctp_signed_cookie *) retval->body;
/* Set up the parameter header. */
@@ -1438,7 +1435,7 @@ no_hmac:
goto fail;
}
- if (ntohs(chunk->sctp_hdr->source) != bear_cookie->peer_addr.v4.sin_port ||
+ if (chunk->sctp_hdr->source != bear_cookie->peer_addr.v4.sin_port ||
ntohs(chunk->sctp_hdr->dest) != bear_cookie->my_port) {
*error = -SCTP_IERROR_BAD_PORTS;
goto fail;
@@ -1473,10 +1470,10 @@ no_hmac:
suseconds_t usecs = (tv.tv_sec -
bear_cookie->expiration.tv_sec) * 1000000L +
tv.tv_usec - bear_cookie->expiration.tv_usec;
+ __be32 n = htonl(usecs);
- usecs = htonl(usecs);
sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
- &usecs, sizeof(usecs));
+ &n, sizeof(n));
*error = -SCTP_IERROR_STALE_COOKIE;
} else
*error = -SCTP_IERROR_NOMEM;
@@ -1539,8 +1536,8 @@ malformed:
********************************************************************/
struct __sctp_missing {
- __u32 num_missing;
- __u16 type;
+ __be32 num_missing;
+ __be16 type;
} __attribute__((packed));
/*
@@ -1852,9 +1849,10 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
* added as the primary transport. The source address seems to
* be a a better choice than any of the embedded addresses.
*/
- if (peer_addr)
+ if (peer_addr) {
if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE))
goto nomem;
+ }
/* Process the initialization parameters. */
@@ -1910,10 +1908,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
/* Copy cookie in case we need to resend COOKIE-ECHO. */
cookie = asoc->peer.cookie;
if (cookie) {
- asoc->peer.cookie = kmalloc(asoc->peer.cookie_len, gfp);
+ asoc->peer.cookie = kmemdup(cookie, asoc->peer.cookie_len, gfp);
if (!asoc->peer.cookie)
goto clean_up;
- memcpy(asoc->peer.cookie, cookie, asoc->peer.cookie_len);
}
/* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily
@@ -2027,7 +2024,7 @@ static int sctp_process_param(struct sctp_association *asoc,
/* Fall through. */
case SCTP_PARAM_IPV4_ADDRESS:
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr, asoc->peer.port, 0);
+ af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
scope = sctp_scope(peer_addr);
if (sctp_in_scope(&addr, scope))
if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
@@ -2230,7 +2227,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
union sctp_addr *laddr,
struct sockaddr *addrs,
int addrcnt,
- __u16 flags)
+ __be16 flags)
{
sctp_addip_param_t param;
struct sctp_chunk *retval;
@@ -2363,14 +2360,14 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
}
/* Add response parameters to an ASCONF_ACK chunk. */
-static void sctp_add_asconf_response(struct sctp_chunk *chunk, __u32 crr_id,
- __u16 err_code, sctp_addip_param_t *asconf_param)
+static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
+ __be16 err_code, sctp_addip_param_t *asconf_param)
{
sctp_addip_param_t ack_param;
sctp_errhdr_t err_param;
int asconf_param_len = 0;
int err_param_len = 0;
- __u16 response_type;
+ __be16 response_type;
if (SCTP_ERROR_NO_ERROR == err_code) {
response_type = SCTP_PARAM_SUCCESS_REPORT;
@@ -2404,7 +2401,7 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __u32 crr_id,
}
/* Process a asconf parameter. */
-static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
+static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
struct sctp_chunk *asconf,
sctp_addip_param_t *asconf_param)
{
@@ -2413,7 +2410,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
union sctp_addr addr;
struct list_head *pos;
union sctp_addr_param *addr_param;
-
+
addr_param = (union sctp_addr_param *)
((void *)asconf_param + sizeof(sctp_addip_param_t));
@@ -2421,7 +2418,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
if (unlikely(!af))
return SCTP_ERROR_INV_PARAM;
- af->from_addr_param(&addr, addr_param, asoc->peer.port, 0);
+ af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
/* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
@@ -2487,7 +2484,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
sctp_addip_param_t *asconf_param;
struct sctp_chunk *asconf_ack;
- __u16 err_code;
+ __be16 err_code;
int length = 0;
int chunk_len = asconf->skb->len;
__u32 serial;
@@ -2586,7 +2583,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
/* We have checked the packet before, so we do not check again. */
af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type));
- af->from_addr_param(&addr, addr_param, bp->port, 0);
+ af->from_addr_param(&addr, addr_param, htons(bp->port), 0);
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
@@ -2630,7 +2627,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
* All TLVs after the failed response are considered unsuccessful unless a
* specific success indication is present for the parameter.
*/
-static __u16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
+static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
sctp_addip_param_t *asconf_param,
int no_err)
{
@@ -2638,7 +2635,7 @@ static __u16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
sctp_errhdr_t *err_param;
int length;
int asconf_ack_len = asconf_ack->skb->len;
- __u16 err_code;
+ __be16 err_code;
if (no_err)
err_code = SCTP_ERROR_NO_ERROR;
@@ -2694,7 +2691,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
int all_param_pass = 0;
int no_err = 1;
int retval = 0;
- __u16 err_code = SCTP_ERROR_NO_ERROR;
+ __be16 err_code = SCTP_ERROR_NO_ERROR;
/* Skip the chunkhdr and addiphdr from the last asconf sent and store
* a pointer to address parameter.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 9c10bdec1af..7bbc6156e45 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -442,7 +442,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
" transport IP: port:%d failed.\n",
asoc,
(&transport->ipaddr),
- transport->ipaddr.v4.sin_port);
+ ntohs(transport->ipaddr.v4.sin_port));
sctp_assoc_control_transport(asoc, transport,
SCTP_TRANSPORT_DOWN,
SCTP_FAILED_THRESHOLD);
@@ -1360,12 +1360,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
case SCTP_CMD_INIT_FAILED:
- sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
+ sctp_cmd_init_failed(commands, asoc, cmd->obj.err);
break;
case SCTP_CMD_ASSOC_FAILED:
sctp_cmd_assoc_failed(commands, asoc, event_type,
- subtype, chunk, cmd->obj.u32);
+ subtype, chunk, cmd->obj.err);
break;
case SCTP_CMD_INIT_COUNTER_INC:
@@ -1420,7 +1420,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_PROCESS_CTSN:
/* Dummy up a SACK for processing. */
- sackh.cum_tsn_ack = cmd->obj.u32;
+ sackh.cum_tsn_ack = cmd->obj.be32;
sackh.a_rwnd = 0;
sackh.num_gap_ack_blocks = 0;
sackh.num_dup_tsns = 0;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 1c42fe983a5..27cc444aaf1 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -93,7 +93,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
- __u16 error, int sk_err,
+ __be16 error, int sk_err,
const struct sctp_association *asoc,
struct sctp_transport *transport);
@@ -443,7 +443,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
__u32 init_tag;
struct sctp_chunk *err_chunk;
struct sctp_packet *packet;
- __u16 error;
+ sctp_error_t error;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -886,7 +886,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
SCTP_ERROR(ETIMEDOUT));
/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
@@ -2138,7 +2138,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
- SCTP_U32(SCTP_ERROR_STALE_COOKIE));
+ SCTP_PERR(SCTP_ERROR_STALE_COOKIE));
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -2158,7 +2158,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
* to give ample time to retransmit the new cookie and thus
* yield a higher probability of success on the reattempt.
*/
- stale = ntohl(*(suseconds_t *)((u8 *)err + sizeof(sctp_errhdr_t)));
+ stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t)));
stale = (stale * 2) / 1000;
bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
@@ -2250,7 +2250,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
{
struct sctp_chunk *chunk = arg;
unsigned len;
- __u16 error = SCTP_ERROR_NO_ERROR;
+ __be16 error = SCTP_ERROR_NO_ERROR;
if (!sctp_vtag_verify_either(chunk, asoc))
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2275,7 +2275,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
- sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_U32(error));
+ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
@@ -2295,7 +2295,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
{
struct sctp_chunk *chunk = arg;
unsigned len;
- __u16 error = SCTP_ERROR_NO_ERROR;
+ __be16 error = SCTP_ERROR_NO_ERROR;
if (!sctp_vtag_verify_either(chunk, asoc))
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2357,7 +2357,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
* This is common code called by several sctp_sf_*_abort() functions above.
*/
static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
- __u16 error, int sk_err,
+ __be16 error, int sk_err,
const struct sctp_association *asoc,
struct sctp_transport *transport)
{
@@ -2370,7 +2370,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err));
/* CMD_INIT_FAILED will DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
- SCTP_U32(error));
+ SCTP_PERR(error));
return SCTP_DISPOSITION_ABORT;
}
@@ -2466,7 +2466,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
* received by the SHUTDOWN sender.
*/
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN,
- SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack));
+ SCTP_BE32(chunk->subh.shutdown_hdr->cum_tsn_ack));
out:
return disposition;
@@ -2545,6 +2545,7 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
{
sctp_cwrhdr_t *cwr;
struct sctp_chunk *chunk = arg;
+ u32 lowest_tsn;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2556,14 +2557,14 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
cwr = (sctp_cwrhdr_t *) chunk->skb->data;
skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t));
- cwr->lowest_tsn = ntohl(cwr->lowest_tsn);
+ lowest_tsn = ntohl(cwr->lowest_tsn);
/* Does this CWR ack the last sent congestion notification? */
- if (TSN_lte(asoc->last_ecne_tsn, cwr->lowest_tsn)) {
+ if (TSN_lte(asoc->last_ecne_tsn, lowest_tsn)) {
/* Stop sending ECNE. */
sctp_add_cmd_sf(commands,
SCTP_CMD_ECN_CWR,
- SCTP_U32(cwr->lowest_tsn));
+ SCTP_U32(lowest_tsn));
}
return SCTP_DISPOSITION_CONSUME;
}
@@ -3360,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_ASCONF_ACK));
+ SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
@@ -3388,7 +3389,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_ASCONF_ACK));
+ SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
@@ -3743,12 +3744,12 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNREFUSED));
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
- SCTP_U32(SCTP_ERROR_PROTO_VIOLATION));
+ SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
} else {
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_PROTO_VIOLATION));
+ SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
}
@@ -4062,7 +4063,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
SCTP_ERROR(ECONNABORTED));
/* Delete the established association. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_USER_ABORT));
+ SCTP_PERR(SCTP_ERROR_USER_ABORT));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
@@ -4199,7 +4200,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
SCTP_ERROR(ECONNREFUSED));
/* Delete the established association. */
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
- SCTP_U32(SCTP_ERROR_USER_ABORT));
+ SCTP_PERR(SCTP_ERROR_USER_ABORT));
return retval;
}
@@ -4571,7 +4572,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
SCTP_ERROR(ETIMEDOUT));
/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
@@ -4693,7 +4694,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -4745,7 +4746,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -4781,7 +4782,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
SCTP_ERROR(ETIMEDOUT));
/* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
@@ -4859,7 +4860,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
@@ -4915,7 +4916,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_NO_ERROR));
+ SCTP_PERR(SCTP_ERROR_NO_ERROR));
return SCTP_DISPOSITION_DELETE_TCB;
nomem:
@@ -5365,7 +5366,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
- SCTP_U32(SCTP_ERROR_NO_DATA));
+ SCTP_PERR(SCTP_ERROR_NO_DATA));
SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
return SCTP_IERROR_NO_DATA;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 8bcca567615..733dd87b3a7 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -104,325 +104,322 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
};
}
+#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
+
#define TYPE_SCTP_DATA { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_eat_data_6_2, .name = "sctp_sf_eat_data_6_2"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_data_6_2), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_eat_data_6_2, .name = "sctp_sf_eat_data_6_2"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_data_6_2), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_eat_data_fast_4_4, .name = "sctp_sf_eat_data_fast_4_4"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_data_fast_4_4), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_DATA */
#define TYPE_SCTP_INIT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_do_5_1B_init, .name = "sctp_sf_do_5_1B_init"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_1B_init), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_do_5_2_1_siminit, .name = "sctp_sf_do_5_2_1_siminit"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_1_siminit), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_5_2_1_siminit, .name = "sctp_sf_do_5_2_1_siminit"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_1_siminit), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_do_9_2_reshutack, .name = "sctp_sf_do_9_2_reshutack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_reshutack), \
} /* TYPE_SCTP_INIT */
#define TYPE_SCTP_INIT_ACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_do_5_1C_ack, .name = "sctp_sf_do_5_1C_ack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_1C_ack), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_INIT_ACK */
#define TYPE_SCTP_SACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_SACK */
#define TYPE_SCTP_HEARTBEAT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
/* This should not happen, but we are nice. */ \
- {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \
} /* TYPE_SCTP_HEARTBEAT */
#define TYPE_SCTP_HEARTBEAT_ACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+ TYPE_SCTP_FUNC(sctp_sf_violation), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_HEARTBEAT_ACK */
#define TYPE_SCTP_ABORT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_pdiscard, .name = "sctp_sf_pdiscard"}, \
+ TYPE_SCTP_FUNC(sctp_sf_pdiscard), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_cookie_wait_abort, .name = "sctp_sf_cookie_wait_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_wait_abort), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_cookie_echoed_abort, \
- .name = "sctp_sf_cookie_echoed_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_abort), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_9_1_abort, .name = "sctp_sf_do_9_1_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_1_abort), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_shutdown_pending_abort, \
- .name = "sctp_sf_shutdown_pending_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_shutdown_pending_abort), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_shutdown_sent_abort, \
- .name = "sctp_sf_shutdown_sent_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_shutdown_sent_abort), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_9_1_abort, .name = "sctp_sf_do_9_1_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_1_abort), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_shutdown_ack_sent_abort, \
- .name = "sctp_sf_shutdown_ack_sent_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_shutdown_ack_sent_abort), \
} /* TYPE_SCTP_ABORT */
#define TYPE_SCTP_SHUTDOWN { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_9_2_shutdown, .name = "sctp_sf_do_9_2_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_9_2_shutdown_ack, \
- .name = "sctp_sf_do_9_2_shutdown_ack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_SHUTDOWN */
#define TYPE_SCTP_SHUTDOWN_ACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_do_8_5_1_E_sa, .name = "sctp_sf_do_8_5_1_E_sa"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_8_5_1_E_sa), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_8_5_1_E_sa, .name = "sctp_sf_do_8_5_1_E_sa"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_8_5_1_E_sa), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+ TYPE_SCTP_FUNC(sctp_sf_violation), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+ TYPE_SCTP_FUNC(sctp_sf_violation), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_9_2_final, .name = "sctp_sf_do_9_2_final"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_final), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+ TYPE_SCTP_FUNC(sctp_sf_violation), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_do_9_2_final, .name = "sctp_sf_do_9_2_final"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_final), \
} /* TYPE_SCTP_SHUTDOWN_ACK */
#define TYPE_SCTP_ERROR { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_cookie_echoed_err, .name = "sctp_sf_cookie_echoed_err"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_err), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \
+ TYPE_SCTP_FUNC(sctp_sf_operr_notify), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \
+ TYPE_SCTP_FUNC(sctp_sf_operr_notify), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \
+ TYPE_SCTP_FUNC(sctp_sf_operr_notify), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_ERROR */
#define TYPE_SCTP_COOKIE_ECHO { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_do_5_1D_ce, .name = "sctp_sf_do_5_1D_ce"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_1D_ce), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \
} /* TYPE_SCTP_COOKIE_ECHO */
#define TYPE_SCTP_COOKIE_ACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_5_1E_ca, .name = "sctp_sf_do_5_1E_ca"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_5_1E_ca), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_COOKIE_ACK */
#define TYPE_SCTP_ECN_ECNE { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecne), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecne), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecne), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecne), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecne), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_ECN_ECNE */
#define TYPE_SCTP_ECN_CWR { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecn_cwr), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecn_cwr), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_ecn_cwr), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_ECN_CWR */
#define TYPE_SCTP_SHUTDOWN_COMPLETE { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_do_4_C, .name = "sctp_sf_do_4_C"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_4_C), \
} /* TYPE_SCTP_SHUTDOWN_COMPLETE */
/* The primary index for this table is the chunk type.
@@ -450,44 +447,44 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
#define TYPE_SCTP_ASCONF { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_asconf, .name = "sctp_sf_do_asconf"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_ASCONF */
#define TYPE_SCTP_ASCONF_ACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_asconf_ack, .name = "sctp_sf_do_asconf_ack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_ASCONF_ACK */
/* The primary index for this table is the chunk type.
@@ -500,23 +497,23 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_
#define TYPE_SCTP_FWD_TSN { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ootb), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_eat_fwd_tsn, .name = "sctp_sf_eat_fwd_tsn"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_fwd_tsn), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_eat_fwd_tsn, .name = "sctp_sf_eat_fwd_tsn"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_fwd_tsn), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_eat_fwd_tsn_fast, .name = "sctp_sf_eat_fwd_tsn_fast"}, \
+ TYPE_SCTP_FUNC(sctp_sf_eat_fwd_tsn_fast), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_FWD_TSN */
/* The primary index for this table is the chunk type.
@@ -529,167 +526,150 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN
static const sctp_sm_table_entry_t
chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
/* SCTP_STATE_EMPTY */
- {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"},
+ TYPE_SCTP_FUNC(sctp_sf_ootb),
/* SCTP_STATE_CLOSED */
- {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"},
+ TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8),
/* SCTP_STATE_COOKIE_WAIT */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
/* SCTP_STATE_COOKIE_ECHOED */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
/* SCTP_STATE_ESTABLISHED */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
/* SCTP_STATE_SHUTDOWN_PENDING */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
/* SCTP_STATE_SHUTDOWN_SENT */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
/* SCTP_STATE_SHUTDOWN_RECEIVED */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
/* SCTP_STATE_SHUTDOWN_ACK_SENT */
- {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+ TYPE_SCTP_FUNC(sctp_sf_unk_chunk),
}; /* chunk unknown */
#define TYPE_SCTP_PRIMITIVE_ASSOCIATE { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_do_prm_asoc, .name = "sctp_sf_do_prm_asoc"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_asoc), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+ TYPE_SCTP_FUNC(sctp_sf_not_impl), \
} /* TYPE_SCTP_PRIMITIVE_ASSOCIATE */
#define TYPE_SCTP_PRIMITIVE_SHUTDOWN { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_cookie_wait_prm_shutdown, \
- .name = "sctp_sf_cookie_wait_prm_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_wait_prm_shutdown), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_cookie_echoed_prm_shutdown, \
- .name = "sctp_sf_cookie_echoed_prm_shutdown"},\
+ TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_prm_shutdown),\
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_9_2_prm_shutdown, \
- .name = "sctp_sf_do_9_2_prm_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_prm_shutdown), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \
} /* TYPE_SCTP_PRIMITIVE_SHUTDOWN */
#define TYPE_SCTP_PRIMITIVE_ABORT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_cookie_wait_prm_abort, \
- .name = "sctp_sf_cookie_wait_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_wait_prm_abort), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_cookie_echoed_prm_abort, \
- .name = "sctp_sf_cookie_echoed_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_prm_abort), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_9_1_prm_abort, \
- .name = "sctp_sf_do_9_1_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_1_prm_abort), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_shutdown_pending_prm_abort, \
- .name = "sctp_sf_shutdown_pending_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_shutdown_pending_prm_abort), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_shutdown_sent_prm_abort, \
- .name = "sctp_sf_shutdown_sent_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_shutdown_sent_prm_abort), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_9_1_prm_abort, \
- .name = "sctp_sf_do_9_1_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_1_prm_abort), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_shutdown_ack_sent_prm_abort, \
- .name = "sctp_sf_shutdown_ack_sent_prm_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_shutdown_ack_sent_prm_abort), \
} /* TYPE_SCTP_PRIMITIVE_ABORT */
#define TYPE_SCTP_PRIMITIVE_SEND { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_send), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_send), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_send), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
} /* TYPE_SCTP_PRIMITIVE_SEND */
#define TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_do_prm_requestheartbeat, \
- .name = "sctp_sf_do_prm_requestheartbeat"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \
} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
#define TYPE_SCTP_PRIMITIVE_ASCONF { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_closed), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_prm_asconf, .name = "sctp_sf_do_prm_asconf"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
/* The primary index for this table is the primitive type.
@@ -706,47 +686,44 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
#define TYPE_SCTP_OTHER_NO_PENDING_TSN { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_9_2_start_shutdown, \
- .name = "sctp_do_9_2_start_shutdown"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_start_shutdown), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_9_2_shutdown_ack, \
- .name = "sctp_sf_do_9_2_shutdown_ack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
}
#define TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_cookie_wait_icmp_abort, \
- .name = "sctp_sf_cookie_wait_icmp_abort"}, \
+ TYPE_SCTP_FUNC(sctp_sf_cookie_wait_icmp_abort), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+ TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
}
static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
@@ -756,215 +733,212 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
#define TYPE_SCTP_EVENT_TIMEOUT_NONE { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_t1_cookie_timer_expire, \
- .name = "sctp_sf_t1_cookie_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t1_cookie_timer_expire), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_T1_INIT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_t1_init_timer_expire, \
- .name = "sctp_sf_t1_init_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t1_init_timer_expire), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_T2_SHUTDOWN { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_t2_timer_expire, .name = "sctp_sf_t2_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t2_timer_expire), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_t2_timer_expire, .name = "sctp_sf_t2_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t2_timer_expire), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_T3_RTX { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_T4_RTO { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_t4_timer_expire, .name = "sctp_sf_t4_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t4_timer_expire), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_t5_timer_expire, .name = "sctp_sf_t5_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_t5_timer_expire, .name = "sctp_sf_t5_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_sendbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_sendbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \
+ TYPE_SCTP_FUNC(sctp_sf_sendbeat_8_3), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_SACK { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+ TYPE_SCTP_FUNC(sctp_sf_bug), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_2_sack), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_2_sack), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \
+ TYPE_SCTP_FUNC(sctp_sf_do_6_2_sack), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
#define TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE { \
/* SCTP_STATE_EMPTY */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_CLOSED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_WAIT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_COOKIE_ECHOED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_ESTABLISHED */ \
- {.fn = sctp_sf_autoclose_timer_expire, \
- .name = "sctp_sf_autoclose_timer_expire"}, \
+ TYPE_SCTP_FUNC(sctp_sf_autoclose_timer_expire), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
- {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9f34dec6ff8..bdd8bd428b6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -107,7 +107,7 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
struct sctp_association *, sctp_socket_type_t);
static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
-extern kmem_cache_t *sctp_bucket_cachep;
+extern struct kmem_cache *sctp_bucket_cachep;
/* Get the sndbuf space available at the time on the association. */
static inline int sctp_wspace(struct sctp_association *asoc)
@@ -229,11 +229,9 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
struct sctp_transport *transport;
union sctp_addr *laddr = (union sctp_addr *)addr;
- laddr->v4.sin_port = ntohs(laddr->v4.sin_port);
addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,
- (union sctp_addr *)addr,
+ laddr,
&transport);
- laddr->v4.sin_port = htons(laddr->v4.sin_port);
if (!addr_asoc)
return NULL;
@@ -368,9 +366,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
sctp_write_lock(&ep->base.addr_lock);
/* Use GFP_ATOMIC since BHs are disabled. */
- addr->v4.sin_port = ntohs(addr->v4.sin_port);
ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
- addr->v4.sin_port = htons(addr->v4.sin_port);
sctp_write_unlock(&ep->base.addr_lock);
sctp_local_bh_enable();
@@ -572,7 +568,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
addr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
memcpy(&saveaddr, addr, af->sockaddr_len);
- saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port);
retval = sctp_add_bind_addr(bp, &saveaddr, 0,
GFP_ATOMIC);
addr_buf += af->sockaddr_len;
@@ -607,9 +602,8 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
int cnt;
struct sctp_bind_addr *bp = &ep->base.bind_addr;
int retval = 0;
- union sctp_addr saveaddr;
void *addr_buf;
- struct sockaddr *sa_addr;
+ union sctp_addr *sa_addr;
struct sctp_af *af;
SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n",
@@ -627,19 +621,13 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
goto err_bindx_rem;
}
- /* The list may contain either IPv4 or IPv6 address;
- * determine the address length to copy the address to
- * saveaddr.
- */
- sa_addr = (struct sockaddr *)addr_buf;
- af = sctp_get_af_specific(sa_addr->sa_family);
+ sa_addr = (union sctp_addr *)addr_buf;
+ af = sctp_get_af_specific(sa_addr->sa.sa_family);
if (!af) {
retval = -EINVAL;
goto err_bindx_rem;
}
- memcpy(&saveaddr, sa_addr, af->sockaddr_len);
- saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port);
- if (saveaddr.v4.sin_port != bp->port) {
+ if (sa_addr->v4.sin_port != htons(bp->port)) {
retval = -EINVAL;
goto err_bindx_rem;
}
@@ -654,7 +642,7 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
sctp_local_bh_disable();
sctp_write_lock(&ep->base.addr_lock);
- retval = sctp_del_bind_addr(bp, &saveaddr);
+ retval = sctp_del_bind_addr(bp, sa_addr);
sctp_write_unlock(&ep->base.addr_lock);
sctp_local_bh_enable();
@@ -693,7 +681,6 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sctp_bind_addr *bp;
struct sctp_chunk *chunk;
union sctp_addr *laddr;
- union sctp_addr saveaddr;
void *addr_buf;
struct sctp_af *af;
struct list_head *pos, *pos1;
@@ -773,13 +760,11 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
for (i = 0; i < addrcnt; i++) {
laddr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(laddr->v4.sin_family);
- memcpy(&saveaddr, laddr, af->sockaddr_len);
- saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port);
list_for_each(pos1, &bp->address_list) {
saddr = list_entry(pos1,
struct sctp_sockaddr_entry,
list);
- if (sctp_cmp_addr_exact(&saddr->a, &saveaddr))
+ if (sctp_cmp_addr_exact(&saddr->a, laddr))
saddr->use_as_src = 0;
}
addr_buf += af->sockaddr_len;
@@ -979,7 +964,7 @@ static int __sctp_connect(struct sock* sk,
int err = 0;
int addrcnt = 0;
int walk_size = 0;
- struct sockaddr *sa_addr;
+ union sctp_addr *sa_addr;
void *addr_buf;
sp = sctp_sk(sk);
@@ -999,8 +984,8 @@ static int __sctp_connect(struct sock* sk,
/* Walk through the addrs buffer and count the number of addresses. */
addr_buf = kaddrs;
while (walk_size < addrs_size) {
- sa_addr = (struct sockaddr *)addr_buf;
- af = sctp_get_af_specific(sa_addr->sa_family);
+ sa_addr = (union sctp_addr *)addr_buf;
+ af = sctp_get_af_specific(sa_addr->sa.sa_family);
/* If the address family is not supported or if this address
* causes the address buffer to overflow return EINVAL.
@@ -1010,18 +995,16 @@ static int __sctp_connect(struct sock* sk,
goto out_free;
}
- err = sctp_verify_addr(sk, (union sctp_addr *)sa_addr,
- af->sockaddr_len);
+ err = sctp_verify_addr(sk, sa_addr, af->sockaddr_len);
if (err)
goto out_free;
memcpy(&to, sa_addr, af->sockaddr_len);
- to.v4.sin_port = ntohs(to.v4.sin_port);
/* Check if there already is a matching association on the
* endpoint (other than the one created here).
*/
- asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+ asoc2 = sctp_endpoint_lookup_assoc(ep, sa_addr, &transport);
if (asoc2 && asoc2 != asoc) {
if (asoc2->state >= SCTP_STATE_ESTABLISHED)
err = -EISCONN;
@@ -1034,7 +1017,7 @@ static int __sctp_connect(struct sock* sk,
* make sure that there is no peeled-off association matching
* the peer address even on another socket.
*/
- if (sctp_endpoint_is_peeled_off(ep, &to)) {
+ if (sctp_endpoint_is_peeled_off(ep, sa_addr)) {
err = -EADDRNOTAVAIL;
goto out_free;
}
@@ -1065,7 +1048,7 @@ static int __sctp_connect(struct sock* sk,
}
}
- scope = sctp_scope(&to);
+ scope = sctp_scope(sa_addr);
asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
if (!asoc) {
err = -ENOMEM;
@@ -1074,7 +1057,7 @@ static int __sctp_connect(struct sock* sk,
}
/* Prime the peer's transport structures. */
- transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL,
+ transport = sctp_assoc_add_peer(asoc, sa_addr, GFP_KERNEL,
SCTP_UNKNOWN);
if (!transport) {
err = -ENOMEM;
@@ -1427,11 +1410,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
if (msg_namelen > sizeof(to))
msg_namelen = sizeof(to);
memcpy(&to, msg->msg_name, msg_namelen);
- SCTP_DEBUG_PRINTK("Just memcpy'd. msg_name is "
- "0x%x:%u.\n",
- to.v4.sin_addr.s_addr, to.v4.sin_port);
-
- to.v4.sin_port = ntohs(to.v4.sin_port);
msg_name = msg->msg_name;
}
@@ -2768,6 +2746,46 @@ static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval,
return 0;
}
+/*
+ * 7.1.29. Set or Get the default context (SCTP_CONTEXT)
+ *
+ * The context field in the sctp_sndrcvinfo structure is normally only
+ * used when a failed message is retrieved holding the value that was
+ * sent down on the actual send call. This option allows the setting of
+ * a default context on an association basis that will be received on
+ * reading messages from the peer. This is especially helpful in the
+ * one-2-many model for an application to keep some reference to an
+ * internal state machine that is processing messages on the
+ * association. Note that the setting of this value only effects
+ * received messages from the peer and does not effect the value that is
+ * saved with outbound messages.
+ */
+static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
+ int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_sock *sp;
+ struct sctp_association *asoc;
+
+ if (optlen != sizeof(struct sctp_assoc_value))
+ return -EINVAL;
+ if (copy_from_user(&params, optval, optlen))
+ return -EFAULT;
+
+ sp = sctp_sk(sk);
+
+ if (params.assoc_id != 0) {
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ return -EINVAL;
+ asoc->default_rcv_context = params.assoc_value;
+ } else {
+ sp->default_rcv_context = params.assoc_value;
+ }
+
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2879,6 +2897,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_ADAPTION_LAYER:
retval = sctp_setsockopt_adaption_layer(sk, optval, optlen);
break;
+ case SCTP_CONTEXT:
+ retval = sctp_setsockopt_context(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
@@ -3038,6 +3059,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->default_context = 0;
sp->default_timetolive = 0;
+ sp->default_rcv_context = 0;
+
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
* overridden by the SCTP_INIT CMSG.
@@ -3217,8 +3240,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
status.sstat_outstrms = asoc->c.sinit_num_ostreams;
status.sstat_fragmentation_point = asoc->frag_point;
status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
- memcpy(&status.sstat_primary.spinfo_address,
- &(transport->ipaddr), sizeof(union sctp_addr));
+ memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
+ transport->af_specific->sockaddr_len);
/* Map ipv4 address into v4-mapped-on-v6 address. */
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
(union sctp_addr *)&status.sstat_primary.spinfo_address);
@@ -3372,6 +3395,7 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
{
struct sock *sk = asoc->base.sk;
struct socket *sock;
+ struct inet_sock *inetsk;
int err = 0;
/* An association cannot be branched off from an already peeled-off
@@ -3389,6 +3413,14 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
* asoc to the newsk.
*/
sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+
+ /* Make peeled-off sockets more like 1-1 accepted sockets.
+ * Set the daddr and initialize id to something more random
+ */
+ inetsk = inet_sk(sock->sk);
+ inetsk->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
+ inetsk->id = asoc->next_tsn ^ jiffies;
+
*sockp = sock;
return err;
@@ -3761,7 +3793,6 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len,
memcpy(&temp, &from->ipaddr, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
- temp.v4.sin_port = htons(temp.v4.sin_port);
if (copy_to_user(to, &temp, addrlen))
return -EFAULT;
to += addrlen ;
@@ -3812,7 +3843,6 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
if(space_left < addrlen)
return -ENOMEM;
- temp.v4.sin_port = htons(temp.v4.sin_port);
if (copy_to_user(to, &temp, addrlen))
return -EFAULT;
to += addrlen;
@@ -3836,10 +3866,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
sctp_assoc_t id;
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
+ struct list_head *pos, *temp;
struct sctp_sockaddr_entry *addr;
rwlock_t *addr_lock;
- unsigned long flags;
int cnt = 0;
if (len != sizeof(sctp_assoc_t))
@@ -3874,18 +3903,15 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- list_for_each(pos, &sctp_local_addr_list) {
+ list_for_each_safe(pos, temp, &sctp_local_addr_list) {
addr = list_entry(pos,
struct sctp_sockaddr_entry,
list);
if ((PF_INET == sk->sk_family) &&
- (AF_INET6 == addr->a.sa.sa_family))
+ (AF_INET6 == addr->a.sa.sa_family))
continue;
cnt++;
}
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock,
- flags);
} else {
cnt = 1;
}
@@ -3907,15 +3933,13 @@ done:
static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs,
void __user *to)
{
- struct list_head *pos;
+ struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
- unsigned long flags;
union sctp_addr temp;
int cnt = 0;
int addrlen;
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- list_for_each(pos, &sctp_local_addr_list) {
+ list_for_each_safe(pos, next, &sctp_local_addr_list) {
addr = list_entry(pos, struct sctp_sockaddr_entry, list);
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
@@ -3924,17 +3948,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- temp.v4.sin_port = htons(port);
- if (copy_to_user(to, &temp, addrlen)) {
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock,
- flags);
+ if (copy_to_user(to, &temp, addrlen))
return -EFAULT;
- }
+
to += addrlen;
cnt ++;
if (cnt >= max_addrs) break;
}
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
return cnt;
}
@@ -3942,15 +3962,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
void __user **to, size_t space_left)
{
- struct list_head *pos;
+ struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
- unsigned long flags;
union sctp_addr temp;
int cnt = 0;
int addrlen;
- sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
- list_for_each(pos, &sctp_local_addr_list) {
+ list_for_each_safe(pos, next, &sctp_local_addr_list) {
addr = list_entry(pos, struct sctp_sockaddr_entry, list);
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
@@ -3961,17 +3979,13 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if(space_left<addrlen)
return -ENOMEM;
- temp.v4.sin_port = htons(port);
- if (copy_to_user(*to, &temp, addrlen)) {
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock,
- flags);
+ if (copy_to_user(*to, &temp, addrlen))
return -EFAULT;
- }
+
*to += addrlen;
cnt ++;
space_left -= addrlen;
}
- sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
return cnt;
}
@@ -4046,7 +4060,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- temp.v4.sin_port = htons(temp.v4.sin_port);
if (copy_to_user(to, &temp, addrlen)) {
err = -EFAULT;
goto unlock;
@@ -4137,7 +4150,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if(space_left < addrlen)
return -ENOMEM; /*fixme: right error?*/
- temp.v4.sin_port = htons(temp.v4.sin_port);
if (copy_to_user(to, &temp, addrlen)) {
err = -EFAULT;
goto unlock;
@@ -4185,12 +4197,8 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len,
if (!asoc->peer.primary_path)
return -ENOTCONN;
- asoc->peer.primary_path->ipaddr.v4.sin_port =
- htons(asoc->peer.primary_path->ipaddr.v4.sin_port);
memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr,
- sizeof(union sctp_addr));
- asoc->peer.primary_path->ipaddr.v4.sin_port =
- ntohs(asoc->peer.primary_path->ipaddr.v4.sin_port);
+ asoc->peer.primary_path->af_specific->sockaddr_len);
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp,
(union sctp_addr *)&prim.ssp_addr);
@@ -4458,6 +4466,42 @@ static int sctp_getsockopt_mappedv4(struct sock *sk, int len,
}
/*
+ * 7.1.29. Set or Get the default context (SCTP_CONTEXT)
+ * (chapter and verse is quoted at sctp_setsockopt_context())
+ */
+static int sctp_getsockopt_context(struct sock *sk, int len,
+ char __user *optval, int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_sock *sp;
+ struct sctp_association *asoc;
+
+ if (len != sizeof(struct sctp_assoc_value))
+ return -EINVAL;
+
+ if (copy_from_user(&params, optval, len))
+ return -EFAULT;
+
+ sp = sctp_sk(sk);
+
+ if (params.assoc_id != 0) {
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ return -EINVAL;
+ params.assoc_value = asoc->default_rcv_context;
+ } else {
+ params.assoc_value = sp->default_rcv_context;
+ }
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &params, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
* 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG)
*
* This socket option specifies the maximum size to put in any outgoing
@@ -4595,6 +4639,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_adaption_layer(sk, len, optval,
optlen);
break;
+ case SCTP_CONTEXT:
+ retval = sctp_getsockopt_context(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4636,9 +4683,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
unsigned short snum;
int ret;
- /* NOTE: Remember to put this back to net order. */
- addr->v4.sin_port = ntohs(addr->v4.sin_port);
- snum = addr->v4.sin_port;
+ snum = ntohs(addr->v4.sin_port);
SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum);
sctp_local_bh_disable();
@@ -4775,7 +4820,6 @@ fail_unlock:
fail:
sctp_local_bh_enable();
- addr->v4.sin_port = htons(addr->v4.sin_port);
return ret;
}
@@ -5015,7 +5059,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
{
struct sctp_bind_bucket *pp;
- pp = kmem_cache_alloc(sctp_bucket_cachep, SLAB_ATOMIC);
+ pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC);
SCTP_DBG_OBJCNT_INC(bind_bucket);
if (pp) {
pp->port = snum;
@@ -5074,7 +5118,7 @@ static int sctp_autobind(struct sock *sk)
{
union sctp_addr autoaddr;
struct sctp_af *af;
- unsigned short port;
+ __be16 port;
/* Initialize a local sockaddr structure to INADDR_ANY. */
af = sctp_sk(sk)->pf->af;
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index ac4fae161bc..42d9498c64f 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -401,13 +401,14 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map)
/* Refresh the gap ack information. */
if (sctp_tsnmap_has_gap(map)) {
+ __u16 start, end;
sctp_tsnmap_iter_init(map, &iter);
while (sctp_tsnmap_next_gap_ack(map, &iter,
- &map->gabs[gabs].start,
- &map->gabs[gabs].end)) {
+ &start,
+ &end)) {
- map->gabs[gabs].start = htons(map->gabs[gabs].start);
- map->gabs[gabs].end = htons(map->gabs[gabs].end);
+ map->gabs[gabs].start = htons(start);
+ map->gabs[gabs].end = htons(end);
gabs++;
if (gabs >= SCTP_MAX_GABS)
break;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index a015283a908..93ac63b055b 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -351,7 +351,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
struct sctp_remote_error *sre;
struct sk_buff *skb;
sctp_errhdr_t *ch;
- __u16 cause;
+ __be16 cause;
int elen;
ch = (sctp_errhdr_t *)(chunk->skb->data);
@@ -849,8 +849,10 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
*/
sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
+ /* context value that is set via SCTP_CONTEXT socket option. */
+ sinfo.sinfo_context = event->asoc->default_rcv_context;
+
/* These fields are not used while receiving. */
- sinfo.sinfo_context = 0;
sinfo.sinfo_timetolive = 0;
put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
diff --git a/net/socket.c b/net/socket.c
index 6c9b9b326d7..4e396312f8d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -77,7 +77,6 @@
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/highmem.h>
-#include <linux/divert.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
@@ -231,13 +230,13 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
#define SOCKFS_MAGIC 0x534F434B
-static kmem_cache_t *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __read_mostly;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
- ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+ ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
init_waitqueue_head(&ei->socket.wait);
@@ -258,7 +257,7 @@ static void sock_destroy_inode(struct inode *inode)
container_of(inode, struct socket_alloc, vfs_inode));
}
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
{
struct socket_alloc *ei = (struct socket_alloc *)foo;
@@ -306,7 +305,14 @@ static struct file_system_type sock_fs_type = {
static int sockfs_delete_dentry(struct dentry *dentry)
{
- return 1;
+ /*
+ * At creation time, we pretended this dentry was hashed
+ * (by clearing DCACHE_UNHASHED bit in d_flags)
+ * At delete time, we restore the truth : not hashed.
+ * (so that dput() can proceed correctly)
+ */
+ dentry->d_flags |= DCACHE_UNHASHED;
+ return 0;
}
static struct dentry_operations sockfs_dentry_operations = {
.d_delete = sockfs_delete_dentry,
@@ -354,16 +360,22 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
this.name = name;
- this.hash = SOCK_INODE(sock)->i_ino;
+ this.hash = 0;
- file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
- if (unlikely(!file->f_dentry))
+ file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
+ if (unlikely(!file->f_path.dentry))
return -ENOMEM;
- file->f_dentry->d_op = &sockfs_dentry_operations;
- d_add(file->f_dentry, SOCK_INODE(sock));
- file->f_vfsmnt = mntget(sock_mnt);
- file->f_mapping = file->f_dentry->d_inode->i_mapping;
+ file->f_path.dentry->d_op = &sockfs_dentry_operations;
+ /*
+ * We dont want to push this dentry into global dentry hash table.
+ * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
+ * This permits a working /proc/$pid/fd/XXX on sockets
+ */
+ file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
+ d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
+ file->f_path.mnt = mntget(sock_mnt);
+ file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
sock->file = file;
file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
@@ -401,7 +413,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_map_fd */
- inode = file->f_dentry->d_inode;
+ inode = file->f_path.dentry->d_inode;
if (!S_ISSOCK(inode->i_mode)) {
*err = -ENOTSOCK;
return NULL;
@@ -852,11 +864,6 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
err = vlan_ioctl_hook(argp);
mutex_unlock(&vlan_ioctl_mutex);
break;
- case SIOCGIFDIVERT:
- case SIOCSIFDIVERT:
- /* Convert this to call through a hook */
- err = divert_ioctl(cmd, argp);
- break;
case SIOCADDDLCI:
case SIOCDELDLCI:
err = -ENOPKG;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b36b9463f5a..e1a104abb78 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -68,7 +68,7 @@ static struct rpc_credops gss_credops;
#define GSS_CRED_SLACK 1024 /* XXX: unused */
/* length of a krb5 verifier (48), plus data added before arguments when
* using integrity (two 4-byte integers): */
-#define GSS_VERF_SLACK 56
+#define GSS_VERF_SLACK 100
/* XXX this define must match the gssd define
* as it is passed to gssd to signal the use of
@@ -94,46 +94,6 @@ struct gss_auth {
static void gss_destroy_ctx(struct gss_cl_ctx *);
static struct rpc_pipe_ops gss_upcall_ops;
-void
-print_hexl(u32 *p, u_int length, u_int offset)
-{
- u_int i, j, jm;
- u8 c, *cp;
-
- dprintk("RPC: print_hexl: length %d\n",length);
- dprintk("\n");
- cp = (u8 *) p;
-
- for (i = 0; i < length; i += 0x10) {
- dprintk(" %04x: ", (u_int)(i + offset));
- jm = length - i;
- jm = jm > 16 ? 16 : jm;
-
- for (j = 0; j < jm; j++) {
- if ((j % 2) == 1)
- dprintk("%02x ", (u_int)cp[i+j]);
- else
- dprintk("%02x", (u_int)cp[i+j]);
- }
- for (; j < 16; j++) {
- if ((j % 2) == 1)
- dprintk(" ");
- else
- dprintk(" ");
- }
- dprintk(" ");
-
- for (j = 0; j < jm; j++) {
- c = cp[i+j];
- c = isprint(c) ? c : '.';
- dprintk("%c", c);
- }
- dprintk("\n");
- }
-}
-
-EXPORT_SYMBOL(print_hexl);
-
static inline struct gss_cl_ctx *
gss_get_ctx(struct gss_cl_ctx *ctx)
{
@@ -198,11 +158,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
- dest->data = kmalloc(len, GFP_KERNEL);
+ dest->data = kmemdup(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
dest->len = len;
- memcpy(dest->data, p, len);
return q;
}
@@ -542,7 +501,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (!buf)
goto out;
- clnt = RPC_I(filp->f_dentry->d_inode)->private;
+ clnt = RPC_I(filp->f_path.dentry->d_inode)->private;
err = -EFAULT;
if (copy_from_user(buf, src, mlen))
goto err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index e11a40b25cc..d926cda8862 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -43,6 +43,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -61,9 +62,6 @@ krb5_encrypt(
u8 local_iv[16] = {0};
struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
- dprintk("RPC: krb5_encrypt: input data:\n");
- print_hexl((u32 *)in, length, 0);
-
if (length % crypto_blkcipher_blocksize(tfm) != 0)
goto out;
@@ -80,12 +78,9 @@ krb5_encrypt(
sg_set_buf(sg, out, length);
ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
-
- dprintk("RPC: krb5_encrypt: output data:\n");
- print_hexl((u32 *)out, length, 0);
out:
dprintk("RPC: krb5_encrypt returns %d\n",ret);
- return(ret);
+ return ret;
}
EXPORT_SYMBOL(krb5_encrypt);
@@ -103,9 +98,6 @@ krb5_decrypt(
u8 local_iv[16] = {0};
struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
- dprintk("RPC: krb5_decrypt: input data:\n");
- print_hexl((u32 *)in, length, 0);
-
if (length % crypto_blkcipher_blocksize(tfm) != 0)
goto out;
@@ -121,83 +113,14 @@ krb5_decrypt(
sg_set_buf(sg, out, length);
ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
-
- dprintk("RPC: krb5_decrypt: output_data:\n");
- print_hexl((u32 *)out, length, 0);
out:
dprintk("RPC: gss_k5decrypt returns %d\n",ret);
- return(ret);
+ return ret;
}
EXPORT_SYMBOL(krb5_decrypt);
static int
-process_xdr_buf(struct xdr_buf *buf, int offset, int len,
- int (*actor)(struct scatterlist *, void *), void *data)
-{
- int i, page_len, thislen, page_offset, ret = 0;
- struct scatterlist sg[1];
-
- if (offset >= buf->head[0].iov_len) {
- offset -= buf->head[0].iov_len;
- } else {
- thislen = buf->head[0].iov_len - offset;
- if (thislen > len)
- thislen = len;
- sg_set_buf(sg, buf->head[0].iov_base + offset, thislen);
- ret = actor(sg, data);
- if (ret)
- goto out;
- offset = 0;
- len -= thislen;
- }
- if (len == 0)
- goto out;
-
- if (offset >= buf->page_len) {
- offset -= buf->page_len;
- } else {
- page_len = buf->page_len - offset;
- if (page_len > len)
- page_len = len;
- len -= page_len;
- page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
- i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
- thislen = PAGE_CACHE_SIZE - page_offset;
- do {
- if (thislen > page_len)
- thislen = page_len;
- sg->page = buf->pages[i];
- sg->offset = page_offset;
- sg->length = thislen;
- ret = actor(sg, data);
- if (ret)
- goto out;
- page_len -= thislen;
- i++;
- page_offset = 0;
- thislen = PAGE_CACHE_SIZE;
- } while (page_len != 0);
- offset = 0;
- }
- if (len == 0)
- goto out;
-
- if (offset < buf->tail[0].iov_len) {
- thislen = buf->tail[0].iov_len - offset;
- if (thislen > len)
- thislen = len;
- sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen);
- ret = actor(sg, data);
- len -= thislen;
- }
- if (len != 0)
- ret = -EINVAL;
-out:
- return ret;
-}
-
-static int
checksummer(struct scatterlist *sg, void *data)
{
struct hash_desc *desc = data;
@@ -207,23 +130,13 @@ checksummer(struct scatterlist *sg, void *data)
/* checksum the plaintext data and hdrlen bytes of the token header */
s32
-make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
+make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
int body_offset, struct xdr_netobj *cksum)
{
- char *cksumname;
struct hash_desc desc; /* XXX add to ctx? */
struct scatterlist sg[1];
int err;
- switch (cksumtype) {
- case CKSUMTYPE_RSA_MD5:
- cksumname = "md5";
- break;
- default:
- dprintk("RPC: krb5_make_checksum:"
- " unsupported checksum %d", cksumtype);
- return GSS_S_FAILURE;
- }
desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(desc.tfm))
return GSS_S_FAILURE;
@@ -237,7 +150,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
err = crypto_hash_update(&desc, sg, hdrlen);
if (err)
goto out;
- err = process_xdr_buf(body, body_offset, body->len - body_offset,
+ err = xdr_process_buf(body, body_offset, body->len - body_offset,
checksummer, &desc);
if (err)
goto out;
@@ -335,7 +248,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
desc.fragno = 0;
desc.fraglen = 0;
- ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc);
+ ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
return ret;
}
@@ -401,7 +314,7 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
desc.desc.flags = 0;
desc.fragno = 0;
desc.fraglen = 0;
- return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
+ return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
}
EXPORT_SYMBOL(gss_decrypt_xdr_buf);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 325e72e4fd3..05d4bee86fc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -70,10 +70,9 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
- res->data = kmalloc(len, GFP_KERNEL);
+ res->data = kmemdup(p, len, GFP_KERNEL);
if (unlikely(res->data == NULL))
return ERR_PTR(-ENOMEM);
- memcpy(res->data, p, len);
res->len = len;
return q;
}
@@ -130,6 +129,7 @@ gss_import_sec_context_kerberos(const void *p,
{
const void *end = (const void *)((const char *)p + len);
struct krb5_ctx *ctx;
+ int tmp;
if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
goto out_err;
@@ -137,18 +137,23 @@ gss_import_sec_context_kerberos(const void *p,
p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
if (IS_ERR(p))
goto out_err_free_ctx;
- p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init));
- if (IS_ERR(p))
+ /* The downcall format was designed before we completely understood
+ * the uses of the context fields; so it includes some stuff we
+ * just give some minimal sanity-checking, and some we ignore
+ * completely (like the next twenty bytes): */
+ if (unlikely(p + 20 > end || p + 20 < p))
goto out_err_free_ctx;
- p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed));
+ p += 20;
+ p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
if (IS_ERR(p))
goto out_err_free_ctx;
- p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg));
- if (IS_ERR(p))
+ if (tmp != SGN_ALG_DES_MAC_MD5)
goto out_err_free_ctx;
- p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg));
+ p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
if (IS_ERR(p))
goto out_err_free_ctx;
+ if (tmp != SEAL_ALG_DES)
+ goto out_err_free_ctx;
p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
if (IS_ERR(p))
goto out_err_free_ctx;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 08601ee4cd7..d0bb5064f8c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -77,7 +77,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
struct xdr_netobj *token)
{
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
- s32 checksum_type;
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr, *krb5_hdr, *msg_start;
@@ -88,21 +87,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
now = get_seconds();
- switch (ctx->signalg) {
- case SGN_ALG_DES_MAC_MD5:
- checksum_type = CKSUMTYPE_RSA_MD5;
- break;
- default:
- dprintk("RPC: gss_krb5_seal: ctx->signalg %d not"
- " supported\n", ctx->signalg);
- goto out_err;
- }
- if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
- dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n",
- ctx->sealalg);
- goto out_err;
- }
-
token->len = g_token_size(&ctx->mech_used, 22);
ptr = token->data;
@@ -115,37 +99,26 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
krb5_hdr = ptr - 2;
msg_start = krb5_hdr + 24;
- *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg);
+ *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
memset(krb5_hdr + 4, 0xff, 4);
- if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
- goto out_err;
-
- switch (ctx->signalg) {
- case SGN_ALG_DES_MAC_MD5:
- if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
- md5cksum.data, md5cksum.len))
- goto out_err;
- memcpy(krb5_hdr + 16,
- md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
- KRB5_CKSUM_LENGTH);
-
- dprintk("RPC: make_seal_token: cksum data: \n");
- print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
- break;
- default:
- BUG();
- }
+ if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
+ return GSS_S_FAILURE;
+
+ if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+ md5cksum.data, md5cksum.len))
+ return GSS_S_FAILURE;
+
+ memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+ KRB5_CKSUM_LENGTH);
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send++;
spin_unlock(&krb5_seq_lock);
- if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
- seq_send, krb5_hdr + 16, krb5_hdr + 8)))
- goto out_err;
+ if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+ ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))
+ return GSS_S_FAILURE;
- return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
-out_err:
- return GSS_S_FAILURE;
+ return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 0828cf64100..87f8977ccec 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -78,7 +78,6 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
int signalg;
int sealalg;
- s32 checksum_type;
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
s32 now;
@@ -86,96 +85,54 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
s32 seqnum;
unsigned char *ptr = (unsigned char *)read_token->data;
int bodysize;
- u32 ret = GSS_S_DEFECTIVE_TOKEN;
dprintk("RPC: krb5_read_token\n");
if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
read_token->len))
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
(*ptr++ != ( KG_TOK_MIC_MSG &0xff)) )
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
- /* get the sign and seal algorithms */
-
signalg = ptr[0] + (ptr[1] << 8);
- sealalg = ptr[2] + (ptr[3] << 8);
+ if (signalg != SGN_ALG_DES_MAC_MD5)
+ return GSS_S_DEFECTIVE_TOKEN;
- /* Sanity checks */
+ sealalg = ptr[2] + (ptr[3] << 8);
+ if (sealalg != SEAL_ALG_NONE)
+ return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
- goto out;
-
- if (sealalg != 0xffff)
- goto out;
-
- /* there are several mappings of seal algorithms to sign algorithms,
- but few enough that we can try them all. */
-
- if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
- (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
- (ctx->sealalg == SEAL_ALG_DES3KD &&
- signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
- goto out;
-
- /* compute the checksum of the message */
-
- /* initialize the the cksum */
- switch (signalg) {
- case SGN_ALG_DES_MAC_MD5:
- checksum_type = CKSUMTYPE_RSA_MD5;
- break;
- default:
- ret = GSS_S_DEFECTIVE_TOKEN;
- goto out;
- }
-
- switch (signalg) {
- case SGN_ALG_DES_MAC_MD5:
- ret = make_checksum(checksum_type, ptr - 2, 8,
- message_buffer, 0, &md5cksum);
- if (ret)
- goto out;
-
- ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data,
- md5cksum.data, 16);
- if (ret)
- goto out;
-
- if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
- ret = GSS_S_BAD_SIG;
- goto out;
- }
- break;
- default:
- ret = GSS_S_DEFECTIVE_TOKEN;
- goto out;
- }
+ return GSS_S_DEFECTIVE_TOKEN;
+
+ if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
+ return GSS_S_FAILURE;
+
+ if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
+ return GSS_S_FAILURE;
+
+ if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+ return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
now = get_seconds();
- ret = GSS_S_CONTEXT_EXPIRED;
if (now > ctx->endtime)
- goto out;
+ return GSS_S_CONTEXT_EXPIRED;
/* do sequencing checks */
- ret = GSS_S_BAD_SIG;
- if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction,
- &seqnum)))
- goto out;
+ if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
+ return GSS_S_FAILURE;
if ((ctx->initiate && direction != 0xff) ||
(!ctx->initiate && direction != 0))
- goto out;
+ return GSS_S_BAD_SIG;
- ret = GSS_S_COMPLETE;
-out:
- return ret;
+ return GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index cc45c1605f8..fe25b3d898d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -57,9 +57,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
>>PAGE_CACHE_SHIFT;
int offset = (buf->page_base + len - 1)
& (PAGE_CACHE_SIZE - 1);
- ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA);
+ ptr = kmap_atomic(buf->pages[last], KM_USER0);
pad = *(ptr + offset);
- kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA);
+ kunmap_atomic(ptr, KM_USER0);
goto out;
} else
len -= buf->page_len;
@@ -120,7 +120,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
struct xdr_buf *buf, struct page **pages)
{
struct krb5_ctx *kctx = ctx->internal_ctx_id;
- s32 checksum_type;
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
int blocksize = 0, plainlen;
@@ -134,21 +133,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
now = get_seconds();
- switch (kctx->signalg) {
- case SGN_ALG_DES_MAC_MD5:
- checksum_type = CKSUMTYPE_RSA_MD5;
- break;
- default:
- dprintk("RPC: gss_krb5_seal: kctx->signalg %d not"
- " supported\n", kctx->signalg);
- goto out_err;
- }
- if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
- dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n",
- kctx->sealalg);
- goto out_err;
- }
-
blocksize = crypto_blkcipher_blocksize(kctx->enc);
gss_krb5_add_padding(buf, offset, blocksize);
BUG_ON((buf->len - offset) % blocksize);
@@ -175,37 +159,27 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
krb5_hdr = ptr - 2;
msg_start = krb5_hdr + 24;
- /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
- *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg);
+ *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
memset(krb5_hdr + 4, 0xff, 4);
- *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
+ *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
make_confounder(msg_start, blocksize);
/* XXXJBF: UGH!: */
tmp_pages = buf->pages;
buf->pages = pages;
- if (make_checksum(checksum_type, krb5_hdr, 8, buf,
+ if (make_checksum("md5", krb5_hdr, 8, buf,
offset + headlen - blocksize, &md5cksum))
- goto out_err;
+ return GSS_S_FAILURE;
buf->pages = tmp_pages;
- switch (kctx->signalg) {
- case SGN_ALG_DES_MAC_MD5:
- if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
- md5cksum.data, md5cksum.len))
- goto out_err;
- memcpy(krb5_hdr + 16,
- md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
- KRB5_CKSUM_LENGTH);
-
- dprintk("RPC: make_seal_token: cksum data: \n");
- print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
- break;
- default:
- BUG();
- }
+ if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+ md5cksum.data, md5cksum.len))
+ return GSS_S_FAILURE;
+ memcpy(krb5_hdr + 16,
+ md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+ KRB5_CKSUM_LENGTH);
spin_lock(&krb5_seq_lock);
seq_send = kctx->seq_send++;
@@ -215,15 +189,13 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
* and encrypt at the same time: */
if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
seq_send, krb5_hdr + 16, krb5_hdr + 8)))
- goto out_err;
+ return GSS_S_FAILURE;
if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
pages))
- goto out_err;
+ return GSS_S_FAILURE;
- return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
-out_err:
- return GSS_S_FAILURE;
+ return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
u32
@@ -232,7 +204,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
struct krb5_ctx *kctx = ctx->internal_ctx_id;
int signalg;
int sealalg;
- s32 checksum_type;
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
s32 now;
@@ -240,7 +211,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
s32 seqnum;
unsigned char *ptr;
int bodysize;
- u32 ret = GSS_S_DEFECTIVE_TOKEN;
void *data_start, *orig_start;
int data_len;
int blocksize;
@@ -250,98 +220,58 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
ptr = (u8 *)buf->head[0].iov_base + offset;
if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
buf->len - offset))
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
(*ptr++ != (KG_TOK_WRAP_MSG &0xff)) )
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
/* get the sign and seal algorithms */
signalg = ptr[0] + (ptr[1] << 8);
- sealalg = ptr[2] + (ptr[3] << 8);
+ if (signalg != SGN_ALG_DES_MAC_MD5)
+ return GSS_S_DEFECTIVE_TOKEN;
- /* Sanity checks */
+ sealalg = ptr[2] + (ptr[3] << 8);
+ if (sealalg != SEAL_ALG_DES)
+ return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
- goto out;
-
- if (sealalg == 0xffff)
- goto out;
-
- /* in the current spec, there is only one valid seal algorithm per
- key type, so a simple comparison is ok */
-
- if (sealalg != kctx->sealalg)
- goto out;
-
- /* there are several mappings of seal algorithms to sign algorithms,
- but few enough that we can try them all. */
-
- if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
- (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
- (kctx->sealalg == SEAL_ALG_DES3KD &&
- signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
if (gss_decrypt_xdr_buf(kctx->enc, buf,
ptr + 22 - (unsigned char *)buf->head[0].iov_base))
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
- /* compute the checksum of the message */
+ if (make_checksum("md5", ptr - 2, 8, buf,
+ ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+ return GSS_S_FAILURE;
- /* initialize the the cksum */
- switch (signalg) {
- case SGN_ALG_DES_MAC_MD5:
- checksum_type = CKSUMTYPE_RSA_MD5;
- break;
- default:
- ret = GSS_S_DEFECTIVE_TOKEN;
- goto out;
- }
-
- switch (signalg) {
- case SGN_ALG_DES_MAC_MD5:
- ret = make_checksum(checksum_type, ptr - 2, 8, buf,
- ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
- if (ret)
- goto out;
-
- ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
- md5cksum.data, md5cksum.len);
- if (ret)
- goto out;
-
- if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
- ret = GSS_S_BAD_SIG;
- goto out;
- }
- break;
- default:
- ret = GSS_S_DEFECTIVE_TOKEN;
- goto out;
- }
+ if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+ md5cksum.data, md5cksum.len))
+ return GSS_S_FAILURE;
+
+ if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+ return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
now = get_seconds();
- ret = GSS_S_CONTEXT_EXPIRED;
if (now > kctx->endtime)
- goto out;
+ return GSS_S_CONTEXT_EXPIRED;
/* do sequencing checks */
- ret = GSS_S_BAD_SIG;
- if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
- &seqnum)))
- goto out;
+ if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
+ &seqnum))
+ return GSS_S_BAD_SIG;
if ((kctx->initiate && direction != 0xff) ||
(!kctx->initiate && direction != 0))
- goto out;
+ return GSS_S_BAD_SIG;
/* Copy the data back to the right position. XXX: Would probably be
* better to copy and encrypt at the same time. */
@@ -354,11 +284,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
buf->head[0].iov_len -= (data_start - orig_start);
buf->len -= (data_start - orig_start);
- ret = GSS_S_DEFECTIVE_TOKEN;
if (gss_krb5_remove_padding(buf, blocksize))
- goto out;
+ return GSS_S_DEFECTIVE_TOKEN;
- ret = GSS_S_COMPLETE;
-out:
- return ret;
+ return GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index bdedf456bc1..41465072d0b 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -76,140 +76,79 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
- res->data = kmalloc(len, GFP_KERNEL);
+ res->data = kmemdup(p, len, GFP_KERNEL);
if (unlikely(res->data == NULL))
return ERR_PTR(-ENOMEM);
- memcpy(res->data, p, len);
return q;
}
-static inline const void *
-get_key(const void *p, const void *end, struct crypto_blkcipher **res,
- int *resalg)
-{
- struct xdr_netobj key = { 0 };
- int setkey = 0;
- char *alg_name;
-
- p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
- if (IS_ERR(p))
- goto out_err;
- p = simple_get_netobj(p, end, &key);
- if (IS_ERR(p))
- goto out_err;
-
- switch (*resalg) {
- case NID_des_cbc:
- alg_name = "cbc(des)";
- setkey = 1;
- break;
- case NID_cast5_cbc:
- /* XXXX here in name only, not used */
- alg_name = "cbc(cast5)";
- setkey = 0; /* XXX will need to set to 1 */
- break;
- case NID_md5:
- if (key.len == 0) {
- dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
- }
- alg_name = "md5";
- setkey = 0;
- break;
- default:
- dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
- goto out_err_free_key;
- }
- *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(*res)) {
- printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
- *res = NULL;
- goto out_err_free_key;
- }
- if (setkey) {
- if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
- printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
- goto out_err_free_tfm;
- }
- }
-
- if(key.len > 0)
- kfree(key.data);
- return p;
-
-out_err_free_tfm:
- crypto_free_blkcipher(*res);
-out_err_free_key:
- if(key.len > 0)
- kfree(key.data);
- p = ERR_PTR(-EINVAL);
-out_err:
- return p;
-}
-
static int
gss_import_sec_context_spkm3(const void *p, size_t len,
struct gss_ctx *ctx_id)
{
const void *end = (const void *)((const char *)p + len);
struct spkm3_ctx *ctx;
+ int version;
if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
goto out_err;
+ p = simple_get_bytes(p, end, &version, sizeof(version));
+ if (IS_ERR(p))
+ goto out_err_free_ctx;
+ if (version != 1) {
+ dprintk("RPC: unknown spkm3 token format: obsolete nfs-utils?\n");
+ goto out_err_free_ctx;
+ }
+
p = simple_get_netobj(p, end, &ctx->ctx_id);
if (IS_ERR(p))
goto out_err_free_ctx;
- p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop));
+ p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
if (IS_ERR(p))
goto out_err_free_ctx_id;
p = simple_get_netobj(p, end, &ctx->mech_used);
if (IS_ERR(p))
- goto out_err_free_mech;
+ goto out_err_free_ctx_id;
p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
if (IS_ERR(p))
goto out_err_free_mech;
- p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags));
+ p = simple_get_netobj(p, end, &ctx->conf_alg);
if (IS_ERR(p))
goto out_err_free_mech;
- p = simple_get_netobj(p, end, &ctx->share_key);
- if (IS_ERR(p))
- goto out_err_free_s_key;
-
- p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg);
+ p = simple_get_netobj(p, end, &ctx->derived_conf_key);
if (IS_ERR(p))
- goto out_err_free_s_key;
+ goto out_err_free_conf_alg;
- p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg);
+ p = simple_get_netobj(p, end, &ctx->intg_alg);
if (IS_ERR(p))
- goto out_err_free_key1;
+ goto out_err_free_conf_key;
- p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg));
+ p = simple_get_netobj(p, end, &ctx->derived_integ_key);
if (IS_ERR(p))
- goto out_err_free_key2;
-
- p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg));
- if (IS_ERR(p))
- goto out_err_free_key2;
+ goto out_err_free_intg_alg;
if (p != end)
- goto out_err_free_key2;
+ goto out_err_free_intg_key;
ctx_id->internal_ctx_id = ctx;
dprintk("Successfully imported new spkm context.\n");
return 0;
-out_err_free_key2:
- crypto_free_blkcipher(ctx->derived_integ_key);
-out_err_free_key1:
- crypto_free_blkcipher(ctx->derived_conf_key);
-out_err_free_s_key:
- kfree(ctx->share_key.data);
+out_err_free_intg_key:
+ kfree(ctx->derived_integ_key.data);
+out_err_free_intg_alg:
+ kfree(ctx->intg_alg.data);
+out_err_free_conf_key:
+ kfree(ctx->derived_conf_key.data);
+out_err_free_conf_alg:
+ kfree(ctx->conf_alg.data);
out_err_free_mech:
kfree(ctx->mech_used.data);
out_err_free_ctx_id:
@@ -221,13 +160,16 @@ out_err:
}
static void
-gss_delete_sec_context_spkm3(void *internal_ctx) {
+gss_delete_sec_context_spkm3(void *internal_ctx)
+{
struct spkm3_ctx *sctx = internal_ctx;
- crypto_free_blkcipher(sctx->derived_integ_key);
- crypto_free_blkcipher(sctx->derived_conf_key);
- kfree(sctx->share_key.data);
+ kfree(sctx->derived_integ_key.data);
+ kfree(sctx->intg_alg.data);
+ kfree(sctx->derived_conf_key.data);
+ kfree(sctx->conf_alg.data);
kfree(sctx->mech_used.data);
+ kfree(sctx->ctx_id.data);
kfree(sctx);
}
@@ -239,7 +181,6 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx,
u32 maj_stat = 0;
struct spkm3_ctx *sctx = ctx->internal_ctx_id;
- dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n");
maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
@@ -254,10 +195,9 @@ gss_get_mic_spkm3(struct gss_ctx *ctx,
u32 err = 0;
struct spkm3_ctx *sctx = ctx->internal_ctx_id;
- dprintk("RPC: gss_get_mic_spkm3\n");
-
err = spkm3_make_token(sctx, message_buffer,
- message_token, SPKM_MIC_TOK);
+ message_token, SPKM_MIC_TOK);
+ dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
return err;
}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 18c7862bc23..b179d58c624 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -39,11 +39,17 @@
#include <linux/sunrpc/gss_spkm3.h>
#include <linux/random.h>
#include <linux/crypto.h>
+#include <linux/pagemap.h>
+#include <linux/scatterlist.h>
+#include <linux/sunrpc/xdr.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
+const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
+
/*
* spkm3_make_token()
*
@@ -66,29 +72,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
int ctxelen = 0, ctxzbit = 0;
int md5elen = 0, md5zbit = 0;
- dprintk("RPC: spkm3_make_token\n");
-
now = jiffies;
if (ctx->ctx_id.len != 16) {
dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
- ctx->ctx_id.len);
+ ctx->ctx_id.len);
goto out_err;
}
-
- switch (ctx->intg_alg) {
- case NID_md5:
- checksum_type = CKSUMTYPE_RSA_MD5;
- break;
- default:
- dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not"
- " supported\n", ctx->intg_alg);
- goto out_err;
- }
- /* XXX since we don't support WRAP, perhaps we don't care... */
- if (ctx->conf_alg != NID_cast5_cbc) {
- dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n",
- ctx->conf_alg);
+
+ if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
+ dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm."
+ "only support hmac-md5 I-ALG.\n");
+ goto out_err;
+ } else
+ checksum_type = CKSUMTYPE_HMAC_MD5;
+
+ if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
+ dprintk("RPC: gss_spkm3_seal: unsupported C-ALG algorithm\n");
goto out_err;
}
@@ -96,10 +96,10 @@ spkm3_make_token(struct spkm3_ctx *ctx,
/* Calculate checksum over the mic-header */
asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
- ctxelen, ctxzbit);
-
- if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len,
- text, 0, &md5cksum))
+ ctxelen, ctxzbit);
+ if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
+ (char *)mic_hdr.data, mic_hdr.len,
+ text, 0, &md5cksum))
goto out_err;
asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
@@ -121,7 +121,66 @@ spkm3_make_token(struct spkm3_ctx *ctx,
return GSS_S_COMPLETE;
out_err:
+ if (md5cksum.data)
+ kfree(md5cksum.data);
+
token->data = NULL;
token->len = 0;
return GSS_S_FAILURE;
}
+
+static int
+spkm3_checksummer(struct scatterlist *sg, void *data)
+{
+ struct hash_desc *desc = data;
+
+ return crypto_hash_update(desc, sg, sg->length);
+}
+
+/* checksum the plaintext data and hdrlen bytes of the token header */
+s32
+make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
+ unsigned int hdrlen, struct xdr_buf *body,
+ unsigned int body_offset, struct xdr_netobj *cksum)
+{
+ char *cksumname;
+ struct hash_desc desc; /* XXX add to ctx? */
+ struct scatterlist sg[1];
+ int err;
+
+ switch (cksumtype) {
+ case CKSUMTYPE_HMAC_MD5:
+ cksumname = "md5";
+ break;
+ default:
+ dprintk("RPC: spkm3_make_checksum:"
+ " unsupported checksum %d", cksumtype);
+ return GSS_S_FAILURE;
+ }
+
+ if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
+
+ desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(desc.tfm))
+ return GSS_S_FAILURE;
+ cksum->len = crypto_hash_digestsize(desc.tfm);
+ desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ err = crypto_hash_setkey(desc.tfm, key->data, key->len);
+ if (err)
+ goto out;
+
+ sg_set_buf(sg, header, hdrlen);
+ crypto_hash_update(&desc, sg, 1);
+
+ xdr_process_buf(body, body_offset, body->len - body_offset,
+ spkm3_checksummer, &desc);
+ crypto_hash_final(&desc, cksum->data);
+
+out:
+ crypto_free_hash(desc.tfm);
+
+ return err ? GSS_S_FAILURE : 0;
+}
+
+EXPORT_SYMBOL(make_spkm3_checksum);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 854a983ccf2..35188b6ea8f 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -172,10 +172,10 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
*(u8 *)hptr++ = zbit;
memcpy(hptr, ctxdata, elen);
hptr += elen;
- *hdrlen = hptr - top;
+ *hdrlen = hptr - top;
}
-
-/*
+
+/*
* spkm3_mic_innercontext_token()
*
* *tokp points to the beginning of the SPKM_MIC token described
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 8537f581ef9..e54581ca757 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -54,70 +54,70 @@ spkm3_read_token(struct spkm3_ctx *ctx,
struct xdr_buf *message_buffer, /* signbuf */
int toktype)
{
+ s32 checksum_type;
s32 code;
struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
unsigned char *ptr = (unsigned char *)read_token->data;
- unsigned char *cksum;
+ unsigned char *cksum;
int bodysize, md5elen;
int mic_hdrlen;
u32 ret = GSS_S_DEFECTIVE_TOKEN;
- dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len);
-
if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
&bodysize, &ptr, read_token->len))
goto out;
/* decode the token */
- if (toktype == SPKM_MIC_TOK) {
-
- if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
- goto out;
-
- if (*cksum++ != 0x03) {
- dprintk("RPC: spkm3_read_token BAD checksum type\n");
- goto out;
- }
- md5elen = *cksum++;
- cksum++; /* move past the zbit */
-
- if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
- goto out;
-
- /* HARD CODED FOR MD5 */
-
- /* compute the checksum of the message.
- * ptr + 2 = start of header piece of checksum
- * mic_hdrlen + 2 = length of header piece of checksum
- */
- ret = GSS_S_DEFECTIVE_TOKEN;
- code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2,
- mic_hdrlen + 2,
- message_buffer, 0, &md5cksum);
-
- if (code)
- goto out;
-
- dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n",
- wire_cksum.len);
- dprintk(" md5cksum.data\n");
- print_hexl((u32 *) md5cksum.data, 16, 0);
- dprintk(" cksum.data:\n");
- print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0);
-
- ret = GSS_S_BAD_SIG;
- code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
- if (code)
- goto out;
-
- } else {
- dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype);
+ if (toktype != SPKM_MIC_TOK) {
+ dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
+ goto out;
+ }
+
+ if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
+ goto out;
+
+ if (*cksum++ != 0x03) {
+ dprintk("RPC: spkm3_read_token BAD checksum type\n");
+ goto out;
+ }
+ md5elen = *cksum++;
+ cksum++; /* move past the zbit */
+
+ if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
+ goto out;
+
+ /* HARD CODED FOR MD5 */
+
+ /* compute the checksum of the message.
+ * ptr + 2 = start of header piece of checksum
+ * mic_hdrlen + 2 = length of header piece of checksum
+ */
+ ret = GSS_S_DEFECTIVE_TOKEN;
+ if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
+ dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm\n");
+ goto out;
+ }
+
+ checksum_type = CKSUMTYPE_HMAC_MD5;
+
+ code = make_spkm3_checksum(checksum_type,
+ &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
+ message_buffer, 0, &md5cksum);
+
+ if (code)
+ goto out;
+
+ ret = GSS_S_BAD_SIG;
+ code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
+ if (code) {
+ dprintk("RPC: bad MIC checksum\n");
goto out;
}
+
/* XXX: need to add expiration and sequencing */
ret = GSS_S_COMPLETE;
out:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1f0f079ffa6..066c64a97fd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -113,9 +113,7 @@ static int rsi_match(struct cache_head *a, struct cache_head *b)
static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len)
{
dst->len = len;
- dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL);
- if (dst->data)
- memcpy(dst->data, src, len);
+ dst->data = (len ? kmemdup(src, len, GFP_KERNEL) : NULL);
if (len && !dst->data)
return -ENOMEM;
return 0;
@@ -756,10 +754,9 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
if (!new)
goto out;
kref_init(&new->h.ref);
- new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
+ new->h.name = kstrdup(name, GFP_KERNEL);
if (!new->h.name)
goto out_free_dom;
- strcpy(new->h.name, name);
new->h.flavour = &svcauthops_gss;
new->pseudoflavor = pseudoflavor;
@@ -807,19 +804,19 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
integ_len = svc_getnl(&buf->head[0]);
if (integ_len & 3)
- goto out;
+ return stat;
if (integ_len > buf->len)
- goto out;
+ return stat;
if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
BUG();
/* copy out mic... */
if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
BUG();
if (mic.len > RPC_MAX_AUTH_SIZE)
- goto out;
+ return stat;
mic.data = kmalloc(mic.len, GFP_KERNEL);
if (!mic.data)
- goto out;
+ return stat;
if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
goto out;
maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
@@ -829,6 +826,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
goto out;
stat = 0;
out:
+ kfree(mic.data);
return stat;
}
@@ -1068,7 +1066,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
}
switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
case -EAGAIN:
- goto drop;
+ case -ETIMEDOUT:
case -ENOENT:
goto drop;
case 0:
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 00cb388ece0..14274490f92 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,7 +34,7 @@
#define RPCDBG_FACILITY RPCDBG_CACHE
-static void cache_defer_req(struct cache_req *req, struct cache_head *item);
+static int cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item);
static void cache_init(struct cache_head *h)
@@ -185,6 +185,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
*
* Returns 0 if the cache_head can be used, or cache_puts it and returns
* -EAGAIN if upcall is pending,
+ * -ETIMEDOUT if upcall failed and should be retried,
* -ENOENT if cache entry was negative
*/
int cache_check(struct cache_detail *detail,
@@ -236,7 +237,8 @@ int cache_check(struct cache_detail *detail,
}
if (rv == -EAGAIN)
- cache_defer_req(rqstp, h);
+ if (cache_defer_req(rqstp, h) != 0)
+ rv = -ETIMEDOUT;
if (rv)
cache_put(h, detail);
@@ -284,8 +286,8 @@ static struct file_operations cache_file_operations;
static struct file_operations content_file_operations;
static struct file_operations cache_flush_operations;
-static void do_cache_clean(void *data);
-static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
+static void do_cache_clean(struct work_struct *work);
+static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
void cache_register(struct cache_detail *cd)
{
@@ -337,7 +339,7 @@ void cache_register(struct cache_detail *cd)
spin_unlock(&cache_list_lock);
/* start the cleaning process */
- schedule_work(&cache_cleaner);
+ schedule_delayed_work(&cache_cleaner, 0);
}
int cache_unregister(struct cache_detail *cd)
@@ -461,7 +463,7 @@ static int cache_clean(void)
/*
* We want to regularly clean the cache, so we need to schedule some work ...
*/
-static void do_cache_clean(void *data)
+static void do_cache_clean(struct work_struct *work)
{
int delay = 5;
if (cache_clean() == -1)
@@ -523,14 +525,21 @@ static LIST_HEAD(cache_defer_list);
static struct list_head cache_defer_hash[DFR_HASHSIZE];
static int cache_defer_cnt;
-static void cache_defer_req(struct cache_req *req, struct cache_head *item)
+static int cache_defer_req(struct cache_req *req, struct cache_head *item)
{
struct cache_deferred_req *dreq;
int hash = DFR_HASH(item);
+ if (cache_defer_cnt >= DFR_MAX) {
+ /* too much in the cache, randomly drop this one,
+ * or continue and drop the oldest below
+ */
+ if (net_random()&1)
+ return -ETIMEDOUT;
+ }
dreq = req->defer(req);
if (dreq == NULL)
- return;
+ return -ETIMEDOUT;
dreq->item = item;
dreq->recv_time = get_seconds();
@@ -546,17 +555,8 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item)
/* it is in, now maybe clean up */
dreq = NULL;
if (++cache_defer_cnt > DFR_MAX) {
- /* too much in the cache, randomly drop
- * first or last
- */
- if (net_random()&1)
- dreq = list_entry(cache_defer_list.next,
- struct cache_deferred_req,
- recent);
- else
- dreq = list_entry(cache_defer_list.prev,
- struct cache_deferred_req,
- recent);
+ dreq = list_entry(cache_defer_list.prev,
+ struct cache_deferred_req, recent);
list_del(&dreq->recent);
list_del(&dreq->hash);
cache_defer_cnt--;
@@ -571,6 +571,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item)
/* must have just been validated... */
cache_revisit_request(item);
}
+ return 0;
}
static void cache_revisit_request(struct cache_head *item)
@@ -670,7 +671,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
struct cache_reader *rp = filp->private_data;
struct cache_request *rq;
- struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
+ struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
int err;
if (count == 0)
@@ -747,7 +748,7 @@ cache_write(struct file *filp, const char __user *buf, size_t count,
loff_t *ppos)
{
int err;
- struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
+ struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
if (count == 0)
return 0;
@@ -778,7 +779,7 @@ cache_poll(struct file *filp, poll_table *wait)
unsigned int mask;
struct cache_reader *rp = filp->private_data;
struct cache_queue *cq;
- struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
+ struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
poll_wait(filp, &queue_wait, wait);
@@ -1254,7 +1255,7 @@ static struct file_operations content_file_operations = {
static ssize_t read_flush(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
+ struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
char tbuf[20];
unsigned long p = *ppos;
int len;
@@ -1275,7 +1276,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
static ssize_t write_flush(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
+ struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
char tbuf[20];
char *ep;
long flushtime;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 78696f2dc7d..aba528b9ae7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -27,6 +27,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/smp_lock.h>
#include <linux/utsname.h>
#include <linux/workqueue.h>
@@ -141,6 +142,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
clnt->cl_vers = version->number;
clnt->cl_stats = program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
+ err = -ENOMEM;
+ if (clnt->cl_metrics == NULL)
+ goto out_no_stats;
+ clnt->cl_program = program;
if (!xprt_bound(clnt->cl_xprt))
clnt->cl_autobind = 1;
@@ -173,6 +178,8 @@ out_no_auth:
rpc_put_mount();
}
out_no_path:
+ rpc_free_iostats(clnt->cl_metrics);
+out_no_stats:
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
kfree(clnt);
@@ -252,13 +259,19 @@ struct rpc_clnt *
rpc_clone_client(struct rpc_clnt *clnt)
{
struct rpc_clnt *new;
+ int err = -ENOMEM;
- new = kmalloc(sizeof(*new), GFP_KERNEL);
+ new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
if (!new)
goto out_no_clnt;
- memcpy(new, clnt, sizeof(*new));
atomic_set(&new->cl_count, 1);
atomic_set(&new->cl_users, 0);
+ new->cl_metrics = rpc_alloc_iostats(clnt);
+ if (new->cl_metrics == NULL)
+ goto out_no_stats;
+ err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
+ if (err != 0)
+ goto out_no_path;
new->cl_parent = clnt;
atomic_inc(&clnt->cl_count);
new->cl_xprt = xprt_get(clnt->cl_xprt);
@@ -266,16 +279,17 @@ rpc_clone_client(struct rpc_clnt *clnt)
new->cl_autobind = 0;
new->cl_oneshot = 0;
new->cl_dead = 0;
- if (!IS_ERR(new->cl_dentry))
- dget(new->cl_dentry);
rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
if (new->cl_auth)
atomic_inc(&new->cl_auth->au_count);
- new->cl_metrics = rpc_alloc_iostats(clnt);
return new;
+out_no_path:
+ rpc_free_iostats(new->cl_metrics);
+out_no_stats:
+ kfree(new);
out_no_clnt:
- printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
- return ERR_PTR(-ENOMEM);
+ dprintk("RPC: %s returned error %d\n", __FUNCTION__, err);
+ return ERR_PTR(err);
}
/*
@@ -328,16 +342,14 @@ rpc_destroy_client(struct rpc_clnt *clnt)
rpcauth_destroy(clnt->cl_auth);
clnt->cl_auth = NULL;
}
- if (clnt->cl_parent != clnt) {
- if (!IS_ERR(clnt->cl_dentry))
- dput(clnt->cl_dentry);
- rpc_destroy_client(clnt->cl_parent);
- goto out_free;
- }
if (!IS_ERR(clnt->cl_dentry)) {
rpc_rmdir(clnt->cl_dentry);
rpc_put_mount();
}
+ if (clnt->cl_parent != clnt) {
+ rpc_destroy_client(clnt->cl_parent);
+ goto out_free;
+ }
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
out_free:
@@ -467,10 +479,9 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
BUG_ON(flags & RPC_TASK_ASYNC);
- status = -ENOMEM;
task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
if (task == NULL)
- goto out;
+ return -ENOMEM;
/* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
rpc_task_sigmask(task, &oldset);
@@ -479,15 +490,17 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
/* Set up the call info struct and execute the task */
status = task->tk_status;
- if (status == 0) {
- atomic_inc(&task->tk_count);
- status = rpc_execute(task);
- if (status == 0)
- status = task->tk_status;
+ if (status != 0) {
+ rpc_release_task(task);
+ goto out;
}
- rpc_restore_sigmask(&oldset);
- rpc_release_task(task);
+ atomic_inc(&task->tk_count);
+ status = rpc_execute(task);
+ if (status == 0)
+ status = task->tk_status;
+ rpc_put_task(task);
out:
+ rpc_restore_sigmask(&oldset);
return status;
}
@@ -529,8 +542,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
rpc_restore_sigmask(&oldset);
return status;
out_release:
- if (tk_ops->rpc_release != NULL)
- tk_ops->rpc_release(data);
+ rpc_release_calldata(tk_ops, data);
return status;
}
@@ -582,7 +594,11 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr);
char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
{
struct rpc_xprt *xprt = clnt->cl_xprt;
- return xprt->ops->print_addr(xprt, format);
+
+ if (xprt->address_strings[format] != NULL)
+ return xprt->address_strings[format];
+ else
+ return "unprintable";
}
EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
@@ -812,8 +828,10 @@ call_encode(struct rpc_task *task)
if (encode == NULL)
return;
+ lock_kernel();
task->tk_status = rpcauth_wrap_req(task, encode, req, p,
task->tk_msg.rpc_argp);
+ unlock_kernel();
if (task->tk_status == -ENOMEM) {
/* XXX: Is this sane? */
rpc_delay(task, 3*HZ);
@@ -1144,9 +1162,12 @@ call_decode(struct rpc_task *task)
task->tk_action = rpc_exit_task;
- if (decode)
+ if (decode) {
+ lock_kernel();
task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
task->tk_msg.rpc_resp);
+ unlock_kernel();
+ }
dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
task->tk_status);
return;
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index e52afab413d..3946ec3eb51 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -101,14 +101,14 @@ void rpc_getport(struct rpc_task *task)
/* Autobind on cloned rpc clients is discouraged */
BUG_ON(clnt->cl_parent != clnt);
+ status = -EACCES; /* tell caller to check again */
+ if (xprt_test_and_set_binding(xprt))
+ goto bailout_nowake;
+
/* Put self on queue before sending rpcbind request, in case
* pmap_getport_done completes before we return from rpc_run_task */
rpc_sleep_on(&xprt->binding, task, NULL, NULL);
- status = -EACCES; /* tell caller to check again */
- if (xprt_test_and_set_binding(xprt))
- goto bailout_nofree;
-
/* Someone else may have bound if we slept */
status = 0;
if (xprt_bound(xprt))
@@ -134,7 +134,7 @@ void rpc_getport(struct rpc_task *task)
child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
if (IS_ERR(child))
goto bailout;
- rpc_release_task(child);
+ rpc_put_task(child);
task->tk_xprt->stat.bind_count++;
return;
@@ -143,8 +143,9 @@ bailout:
pmap_map_free(map);
xprt_put(xprt);
bailout_nofree:
- task->tk_status = status;
pmap_wake_portmap_waiters(xprt, status);
+bailout_nowake:
+ task->tk_status = status;
}
#ifdef CONFIG_ROOT_NFS
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9a0b41a97f9..89273d35e0c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -33,7 +33,7 @@ static int rpc_mount_count;
static struct file_system_type rpc_pipe_fs_type;
-static kmem_cache_t *rpc_inode_cachep __read_mostly;
+static struct kmem_cache *rpc_inode_cachep __read_mostly;
#define RPC_UPCALL_TIMEOUT (30*HZ)
@@ -54,10 +54,11 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
}
static void
-rpc_timeout_upcall_queue(void *data)
+rpc_timeout_upcall_queue(struct work_struct *work)
{
LIST_HEAD(free_list);
- struct rpc_inode *rpci = (struct rpc_inode *)data;
+ struct rpc_inode *rpci =
+ container_of(work, struct rpc_inode, queue_timeout.work);
struct inode *inode = &rpci->vfs_inode;
void (*destroy_msg)(struct rpc_pipe_msg *);
@@ -142,7 +143,7 @@ static struct inode *
rpc_alloc_inode(struct super_block *sb)
{
struct rpc_inode *rpci;
- rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL);
+ rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
if (!rpci)
return NULL;
return &rpci->vfs_inode;
@@ -213,7 +214,7 @@ out:
static ssize_t
rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = filp->f_path.dentry->d_inode;
struct rpc_inode *rpci = RPC_I(inode);
struct rpc_pipe_msg *msg;
int res = 0;
@@ -256,7 +257,7 @@ out_unlock:
static ssize_t
rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = filp->f_path.dentry->d_inode;
struct rpc_inode *rpci = RPC_I(inode);
int res;
@@ -274,7 +275,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
struct rpc_inode *rpci;
unsigned int mask = 0;
- rpci = RPC_I(filp->f_dentry->d_inode);
+ rpci = RPC_I(filp->f_path.dentry->d_inode);
poll_wait(filp, &rpci->waitq, wait);
mask = POLLOUT | POLLWRNORM;
@@ -289,7 +290,7 @@ static int
rpc_pipe_ioctl(struct inode *ino, struct file *filp,
unsigned int cmd, unsigned long arg)
{
- struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+ struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
int len;
switch (cmd) {
@@ -823,7 +824,7 @@ static struct file_system_type rpc_pipe_fs_type = {
};
static void
-init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
{
struct rpc_inode *rpci = (struct rpc_inode *) foo;
@@ -837,7 +838,8 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
INIT_LIST_HEAD(&rpci->pipe);
rpci->pipelen = 0;
init_waitqueue_head(&rpci->waitq);
- INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci);
+ INIT_DELAYED_WORK(&rpci->queue_timeout,
+ rpc_timeout_upcall_queue);
rpci->ops = NULL;
}
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a1ab4eed41f..79bc4cdf5d4 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -34,14 +34,14 @@ static int rpc_task_id;
#define RPC_BUFFER_MAXSIZE (2048)
#define RPC_BUFFER_POOLSIZE (8)
#define RPC_TASK_POOLSIZE (8)
-static kmem_cache_t *rpc_task_slabp __read_mostly;
-static kmem_cache_t *rpc_buffer_slabp __read_mostly;
+static struct kmem_cache *rpc_task_slabp __read_mostly;
+static struct kmem_cache *rpc_buffer_slabp __read_mostly;
static mempool_t *rpc_task_mempool __read_mostly;
static mempool_t *rpc_buffer_mempool __read_mostly;
static void __rpc_default_timer(struct rpc_task *task);
static void rpciod_killall(void);
-static void rpc_async_schedule(void *);
+static void rpc_async_schedule(struct work_struct *);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -266,12 +266,28 @@ static int rpc_wait_bit_interruptible(void *word)
return 0;
}
+static void rpc_set_active(struct rpc_task *task)
+{
+ if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
+ return;
+ spin_lock(&rpc_sched_lock);
+#ifdef RPC_DEBUG
+ task->tk_magic = RPC_TASK_MAGIC_ID;
+ task->tk_pid = rpc_task_id++;
+#endif
+ /* Add to global list of all tasks */
+ list_add_tail(&task->tk_task, &all_tasks);
+ spin_unlock(&rpc_sched_lock);
+}
+
/*
* Mark an RPC call as having completed by clearing the 'active' bit
*/
-static inline void rpc_mark_complete_task(struct rpc_task *task)
+static void rpc_mark_complete_task(struct rpc_task *task)
{
- rpc_clear_active(task);
+ smp_mb__before_clear_bit();
+ clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ smp_mb__after_clear_bit();
wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
}
@@ -295,17 +311,19 @@ EXPORT_SYMBOL(__rpc_wait_for_completion_task);
*/
static void rpc_make_runnable(struct rpc_task *task)
{
- int do_ret;
-
BUG_ON(task->tk_timeout_fn);
- do_ret = rpc_test_and_set_running(task);
rpc_clear_queued(task);
- if (do_ret)
+ if (rpc_test_and_set_running(task))
return;
+ /* We might have raced */
+ if (RPC_IS_QUEUED(task)) {
+ rpc_clear_running(task);
+ return;
+ }
if (RPC_IS_ASYNC(task)) {
int status;
- INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+ INIT_WORK(&task->u.tk_work, rpc_async_schedule);
status = queue_work(task->tk_workqueue, &task->u.tk_work);
if (status < 0) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
@@ -333,9 +351,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
return;
}
- /* Mark the task as being activated if so needed */
- rpc_set_active(task);
-
__rpc_add_wait_queue(q, task);
BUG_ON(task->tk_callback != NULL);
@@ -346,6 +361,9 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer)
{
+ /* Mark the task as being activated if so needed */
+ rpc_set_active(task);
+
/*
* Protect the queue operations.
*/
@@ -409,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task)
*/
void rpc_wake_up_task(struct rpc_task *task)
{
+ rcu_read_lock_bh();
if (rpc_start_wakeup(task)) {
if (RPC_IS_QUEUED(task)) {
struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
- spin_lock_bh(&queue->lock);
+ /* Note: we're already in a bh-safe context */
+ spin_lock(&queue->lock);
__rpc_do_wake_up_task(task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
rpc_finish_wakeup(task);
}
+ rcu_read_unlock_bh();
}
/*
@@ -481,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
struct rpc_task *task = NULL;
dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
- spin_lock_bh(&queue->lock);
+ rcu_read_lock_bh();
+ spin_lock(&queue->lock);
if (RPC_IS_PRIORITY(queue))
task = __rpc_wake_up_next_priority(queue);
else {
task_for_first(task, &queue->tasks[0])
__rpc_wake_up_task(task);
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
+ rcu_read_unlock_bh();
return task;
}
@@ -504,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
struct rpc_task *task, *next;
struct list_head *head;
- spin_lock_bh(&queue->lock);
+ rcu_read_lock_bh();
+ spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
list_for_each_entry_safe(task, next, head, u.tk_wait.list)
@@ -513,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
break;
head--;
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
+ rcu_read_unlock_bh();
}
/**
@@ -528,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
struct rpc_task *task, *next;
struct list_head *head;
- spin_lock_bh(&queue->lock);
+ rcu_read_lock_bh();
+ spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
@@ -539,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
break;
head--;
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
+ rcu_read_unlock_bh();
}
static void __rpc_atrun(struct rpc_task *task)
@@ -561,7 +588,9 @@ void rpc_delay(struct rpc_task *task, unsigned long delay)
*/
static void rpc_prepare_task(struct rpc_task *task)
{
+ lock_kernel();
task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
+ unlock_kernel();
}
/*
@@ -571,7 +600,9 @@ void rpc_exit_task(struct rpc_task *task)
{
task->tk_action = NULL;
if (task->tk_ops->rpc_call_done != NULL) {
+ lock_kernel();
task->tk_ops->rpc_call_done(task, task->tk_calldata);
+ unlock_kernel();
if (task->tk_action != NULL) {
WARN_ON(RPC_ASSASSINATED(task));
/* Always release the RPC slot and buffer memory */
@@ -581,6 +612,15 @@ void rpc_exit_task(struct rpc_task *task)
}
EXPORT_SYMBOL(rpc_exit_task);
+void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
+{
+ if (ops->rpc_release != NULL) {
+ lock_kernel();
+ ops->rpc_release(calldata);
+ unlock_kernel();
+ }
+}
+
/*
* This is the RPC `scheduler' (or rather, the finite state machine).
*/
@@ -615,9 +655,7 @@ static int __rpc_execute(struct rpc_task *task)
*/
save_callback=task->tk_callback;
task->tk_callback=NULL;
- lock_kernel();
save_callback(task);
- unlock_kernel();
}
/*
@@ -628,9 +666,7 @@ static int __rpc_execute(struct rpc_task *task)
if (!RPC_IS_QUEUED(task)) {
if (task->tk_action == NULL)
break;
- lock_kernel();
task->tk_action(task);
- unlock_kernel();
}
/*
@@ -671,8 +707,6 @@ static int __rpc_execute(struct rpc_task *task)
}
dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
- /* Wake up anyone who is waiting for task completion */
- rpc_mark_complete_task(task);
/* Release all resources associated with the task */
rpc_release_task(task);
return status;
@@ -695,9 +729,9 @@ rpc_execute(struct rpc_task *task)
return __rpc_execute(task);
}
-static void rpc_async_schedule(void *arg)
+static void rpc_async_schedule(struct work_struct *work)
{
- __rpc_execute((struct rpc_task *)arg);
+ __rpc_execute(container_of(work, struct rpc_task, u.tk_work));
}
/**
@@ -786,15 +820,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
task->tk_flags |= RPC_TASK_NOINTR;
}
-#ifdef RPC_DEBUG
- task->tk_magic = RPC_TASK_MAGIC_ID;
- task->tk_pid = rpc_task_id++;
-#endif
- /* Add to global list of all tasks */
- spin_lock(&rpc_sched_lock);
- list_add_tail(&task->tk_task, &all_tasks);
- spin_unlock(&rpc_sched_lock);
-
BUG_ON(task->tk_ops == NULL);
/* starting timestamp */
@@ -810,8 +835,9 @@ rpc_alloc_task(void)
return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
}
-static void rpc_free_task(struct rpc_task *task)
+static void rpc_free_task(struct rcu_head *rcu)
{
+ struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu);
dprintk("RPC: %4d freeing task\n", task->tk_pid);
mempool_free(task, rpc_task_mempool);
}
@@ -847,16 +873,34 @@ cleanup:
goto out;
}
-void rpc_release_task(struct rpc_task *task)
+
+void rpc_put_task(struct rpc_task *task)
{
const struct rpc_call_ops *tk_ops = task->tk_ops;
void *calldata = task->tk_calldata;
+ if (!atomic_dec_and_test(&task->tk_count))
+ return;
+ /* Release resources */
+ if (task->tk_rqstp)
+ xprt_release(task);
+ if (task->tk_msg.rpc_cred)
+ rpcauth_unbindcred(task);
+ if (task->tk_client) {
+ rpc_release_client(task->tk_client);
+ task->tk_client = NULL;
+ }
+ if (task->tk_flags & RPC_TASK_DYNAMIC)
+ call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
+ rpc_release_calldata(tk_ops, calldata);
+}
+EXPORT_SYMBOL(rpc_put_task);
+
+void rpc_release_task(struct rpc_task *task)
+{
#ifdef RPC_DEBUG
BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
#endif
- if (!atomic_dec_and_test(&task->tk_count))
- return;
dprintk("RPC: %4d release task\n", task->tk_pid);
/* Remove from global task list */
@@ -869,23 +913,13 @@ void rpc_release_task(struct rpc_task *task)
/* Synchronously delete any running timer */
rpc_delete_timer(task);
- /* Release resources */
- if (task->tk_rqstp)
- xprt_release(task);
- if (task->tk_msg.rpc_cred)
- rpcauth_unbindcred(task);
- if (task->tk_client) {
- rpc_release_client(task->tk_client);
- task->tk_client = NULL;
- }
-
#ifdef RPC_DEBUG
task->tk_magic = 0;
#endif
- if (task->tk_flags & RPC_TASK_DYNAMIC)
- rpc_free_task(task);
- if (tk_ops->rpc_release)
- tk_ops->rpc_release(calldata);
+ /* Wake up anyone who is waiting for task completion */
+ rpc_mark_complete_task(task);
+
+ rpc_put_task(task);
}
/**
@@ -902,8 +936,7 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
struct rpc_task *task;
task = rpc_new_task(clnt, flags, ops, data);
if (task == NULL) {
- if (ops->rpc_release != NULL)
- ops->rpc_release(data);
+ rpc_release_calldata(ops, data);
return ERR_PTR(-ENOMEM);
}
atomic_inc(&task->tk_count);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 6f17527b9e6..634885b0c04 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -16,7 +16,7 @@
/**
- * skb_read_bits - copy some data bits from skb to internal buffer
+ * xdr_skb_read_bits - copy some data bits from skb to internal buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
@@ -24,11 +24,11 @@
* Possibly called several times to iterate over an sk_buff and copy
* data out of it.
*/
-static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len)
+size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
if (len > desc->count)
len = desc->count;
- if (skb_copy_bits(desc->skb, desc->offset, to, len))
+ if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
return 0;
desc->count -= len;
desc->offset += len;
@@ -36,16 +36,17 @@ static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len)
}
/**
- * skb_read_and_csum_bits - copy and checksum from skb to buffer
+ * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
* Same as skb_read_bits, but calculate a checksum at the same time.
*/
-static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
+static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
- unsigned int csum2, pos;
+ unsigned int pos;
+ __wsum csum2;
if (len > desc->count)
len = desc->count;
@@ -65,7 +66,7 @@ static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
* @copy_actor: virtual method for copying data
*
*/
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor)
+ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
@@ -147,7 +148,7 @@ out:
*/
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
- skb_reader_t desc;
+ struct xdr_skb_reader desc;
desc.skb = skb;
desc.offset = sizeof(struct udphdr);
@@ -157,22 +158,22 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
goto no_checksum;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
+ if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
return -1;
if (desc.offset != skb->len) {
- unsigned int csum2;
+ __wsum csum2;
csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
}
if (desc.count)
return -1;
- if ((unsigned short)csum_fold(desc.csum))
+ if (csum_fold(desc.csum))
return -1;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
return 0;
no_checksum:
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
+ if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
return -1;
if (desc.count)
return -1;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 192dff5dabc..d85fddeb638 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -33,7 +33,6 @@ EXPORT_SYMBOL(rpciod_down);
EXPORT_SYMBOL(rpciod_up);
EXPORT_SYMBOL(rpc_new_task);
EXPORT_SYMBOL(rpc_wake_up_status);
-EXPORT_SYMBOL(rpc_release_task);
/* RPC client functions */
EXPORT_SYMBOL(rpc_clone_client);
@@ -139,6 +138,8 @@ EXPORT_SYMBOL(nlm_debug);
extern int register_rpc_pipefs(void);
extern void unregister_rpc_pipefs(void);
extern struct cache_detail ip_map_cache;
+extern int init_socket_xprt(void);
+extern void cleanup_socket_xprt(void);
static int __init
init_sunrpc(void)
@@ -156,6 +157,7 @@ init_sunrpc(void)
rpc_proc_init();
#endif
cache_register(&ip_map_cache);
+ init_socket_xprt();
out:
return err;
}
@@ -163,6 +165,7 @@ out:
static void __exit
cleanup_sunrpc(void)
{
+ cleanup_socket_xprt();
unregister_rpc_pipefs();
rpc_destroy_mempool();
if (cache_unregister(&ip_map_cache))
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index eb44ec929ca..f3001f3626f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -308,7 +308,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
serv->sv_nrpools = npools;
serv->sv_pools =
- kcalloc(sizeof(struct svc_pool), serv->sv_nrpools,
+ kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
GFP_KERNEL);
if (!serv->sv_pools) {
kfree(serv);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 8f2320aded5..c7bb5f7f21a 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -119,13 +119,15 @@ EXPORT_SYMBOL(svc_auth_unregister);
#define DN_HASHMASK (DN_HASHMAX-1)
static struct hlist_head auth_domain_table[DN_HASHMAX];
-static spinlock_t auth_domain_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t auth_domain_lock =
+ __SPIN_LOCK_UNLOCKED(auth_domain_lock);
void auth_domain_put(struct auth_domain *dom)
{
if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) {
hlist_del(&dom->hash);
dom->flavour->domain_release(dom);
+ spin_unlock(&auth_domain_lock);
}
}
@@ -147,10 +149,8 @@ auth_domain_lookup(char *name, struct auth_domain *new)
return hp;
}
}
- if (new) {
+ if (new)
hlist_add_head(&new->hash, head);
- kref_get(&new->ref);
- }
spin_unlock(&auth_domain_lock);
return new;
}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index e1bd933629f..0d1e8fb83b9 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -53,6 +53,10 @@ struct auth_domain *unix_domain_find(char *name)
return NULL;
kref_init(&new->h.ref);
new->h.name = kstrdup(name, GFP_KERNEL);
+ if (new->h.name == NULL) {
+ kfree(new);
+ return NULL;
+ }
new->h.flavour = &svcauth_unix;
new->addr_changes = 0;
rv = auth_domain_lookup(name, &new->h);
@@ -101,9 +105,9 @@ static void ip_map_put(struct kref *kref)
* IP addresses in reverse-endian (i.e. on a little-endian machine).
* So use a trivial but reliable hash instead
*/
-static inline int hash_ip(unsigned long ip)
+static inline int hash_ip(__be32 ip)
{
- int hash = ip ^ (ip>>16);
+ int hash = (__force u32)ip ^ ((__force u32)ip>>16);
return (hash ^ (hash>>8)) & 0xff;
}
#endif
@@ -284,7 +288,7 @@ static struct ip_map *ip_map_lookup(char *class, struct in_addr addr)
ip.m_addr = addr;
ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h,
hash_str(class, IP_HASHBITS) ^
- hash_ip((unsigned long)addr.s_addr));
+ hash_ip(addr.s_addr));
if (ch)
return container_of(ch, struct ip_map, h);
@@ -313,7 +317,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
ch = sunrpc_cache_update(&ip_map_cache,
&ip.h, &ipm->h,
hash_str(ipm->m_class, IP_HASHBITS) ^
- hash_ip((unsigned long)ipm->m_addr.s_addr));
+ hash_ip(ipm->m_addr.s_addr));
if (!ch)
return -ENOMEM;
cache_put(ch, &ip_map_cache);
@@ -435,6 +439,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
default:
BUG();
case -EAGAIN:
+ case -ETIMEDOUT:
return SVC_DROP;
case -ENOENT:
return SVC_DENIED;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 96521f16342..99f54fb6d66 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -32,6 +32,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/file.h>
+#include <linux/freezer.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -84,6 +85,35 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req);
*/
static int svc_conn_age_period = 6*60;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key svc_key[2];
+static struct lock_class_key svc_slock_key[2];
+
+static inline void svc_reclassify_socket(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ BUG_ON(sk->sk_lock.owner != NULL);
+ switch (sk->sk_family) {
+ case AF_INET:
+ sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
+ &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]);
+ break;
+
+ case AF_INET6:
+ sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
+ &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]);
+ break;
+
+ default:
+ BUG();
+ }
+}
+#else
+static inline void svc_reclassify_socket(struct socket *sock)
+{
+}
+#endif
+
/*
* Queue up an idle server thread. Must have pool->sp_lock held.
* Note: this is really a stack rather than a queue, so that we only
@@ -299,9 +329,15 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
static inline void
svc_sock_put(struct svc_sock *svsk)
{
- if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
+ if (atomic_dec_and_test(&svsk->sk_inuse) &&
+ test_bit(SK_DEAD, &svsk->sk_flags)) {
dprintk("svc: releasing dead socket\n");
- sock_release(svsk->sk_sock);
+ if (svsk->sk_sock->file)
+ sockfd_put(svsk->sk_sock);
+ else
+ sock_release(svsk->sk_sock);
+ if (svsk->sk_info_authunix != NULL)
+ svcauth_unix_info_release(svsk->sk_info_authunix);
kfree(svsk);
}
}
@@ -1550,6 +1586,8 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
return error;
+ svc_reclassify_socket(sock);
+
if (type == SOCK_STREAM)
sock->sk->sk_reuse = 1; /* allow address reuse */
error = kernel_bind(sock, (struct sockaddr *) sin,
@@ -1604,20 +1642,13 @@ svc_delete_socket(struct svc_sock *svsk)
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
- if (!atomic_read(&svsk->sk_inuse)) {
- spin_unlock_bh(&serv->sv_lock);
- if (svsk->sk_sock->file)
- sockfd_put(svsk->sk_sock);
- else
- sock_release(svsk->sk_sock);
- if (svsk->sk_info_authunix != NULL)
- svcauth_unix_info_release(svsk->sk_info_authunix);
- kfree(svsk);
- } else {
- spin_unlock_bh(&serv->sv_lock);
- dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
- /* svsk->sk_server = NULL; */
- }
+ /* This atomic_inc should be needed - svc_delete_socket
+ * should have the semantic of dropping a reference.
+ * But it doesn't yet....
+ */
+ atomic_inc(&svsk->sk_inuse);
+ spin_unlock_bh(&serv->sv_lock);
+ svc_sock_put(svsk);
}
/*
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index d89b048ad6b..82b27528d0c 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,7 +18,6 @@
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
-#include <linux/sunrpc/xprt.h>
/*
* Declare the debug flags here
@@ -119,11 +118,6 @@ done:
}
-static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
-static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
-static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
-static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
-
static ctl_table debug_table[] = {
{
.ctl_name = CTL_RPCDEBUG,
@@ -157,50 +151,6 @@ static ctl_table debug_table[] = {
.mode = 0644,
.proc_handler = &proc_dodebug
},
- {
- .ctl_name = CTL_SLOTTABLE_UDP,
- .procname = "udp_slot_table_entries",
- .data = &xprt_udp_slot_table_entries,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &min_slot_table_size,
- .extra2 = &max_slot_table_size
- },
- {
- .ctl_name = CTL_SLOTTABLE_TCP,
- .procname = "tcp_slot_table_entries",
- .data = &xprt_tcp_slot_table_entries,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &min_slot_table_size,
- .extra2 = &max_slot_table_size
- },
- {
- .ctl_name = CTL_MIN_RESVPORT,
- .procname = "min_resvport",
- .data = &xprt_min_resvport,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &xprt_min_resvport_limit,
- .extra2 = &xprt_max_resvport_limit
- },
- {
- .ctl_name = CTL_MAX_RESVPORT,
- .procname = "max_resvport",
- .data = &xprt_max_resvport,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &xprt_min_resvport_limit,
- .extra2 = &xprt_max_resvport_limit
- },
{ .ctl_name = 0 }
};
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 9022eb8b37e..a0af250ca31 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -640,41 +640,30 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
buf->buflen = buf->len = iov->iov_len;
}
-/* Sets subiov to the intersection of iov with the buffer of length len
- * starting base bytes after iov. Indicates empty intersection by setting
- * length of subiov to zero. Decrements len by length of subiov, sets base
- * to zero (or decrements it by length of iov if subiov is empty). */
-static void
-iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len)
-{
- if (*base > iov->iov_len) {
- subiov->iov_base = NULL;
- subiov->iov_len = 0;
- *base -= iov->iov_len;
- } else {
- subiov->iov_base = iov->iov_base + *base;
- subiov->iov_len = min(*len, (int)iov->iov_len - *base);
- *base = 0;
- }
- *len -= subiov->iov_len;
-}
-
/* Sets subbuf to the portion of buf of length len beginning base bytes
* from the start of buf. Returns -1 if base of length are out of bounds. */
int
xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
- int base, int len)
+ unsigned int base, unsigned int len)
{
- int i;
-
subbuf->buflen = subbuf->len = len;
- iov_subsegment(buf->head, subbuf->head, &base, &len);
+ if (base < buf->head[0].iov_len) {
+ subbuf->head[0].iov_base = buf->head[0].iov_base + base;
+ subbuf->head[0].iov_len = min_t(unsigned int, len,
+ buf->head[0].iov_len - base);
+ len -= subbuf->head[0].iov_len;
+ base = 0;
+ } else {
+ subbuf->head[0].iov_base = NULL;
+ subbuf->head[0].iov_len = 0;
+ base -= buf->head[0].iov_len;
+ }
if (base < buf->page_len) {
- i = (base + buf->page_base) >> PAGE_CACHE_SHIFT;
- subbuf->pages = &buf->pages[i];
- subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK;
- subbuf->page_len = min((int)buf->page_len - base, len);
+ subbuf->page_len = min(buf->page_len - base, len);
+ base += buf->page_base;
+ subbuf->page_base = base & ~PAGE_CACHE_MASK;
+ subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT];
len -= subbuf->page_len;
base = 0;
} else {
@@ -682,66 +671,85 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
subbuf->page_len = 0;
}
- iov_subsegment(buf->tail, subbuf->tail, &base, &len);
+ if (base < buf->tail[0].iov_len) {
+ subbuf->tail[0].iov_base = buf->tail[0].iov_base + base;
+ subbuf->tail[0].iov_len = min_t(unsigned int, len,
+ buf->tail[0].iov_len - base);
+ len -= subbuf->tail[0].iov_len;
+ base = 0;
+ } else {
+ subbuf->tail[0].iov_base = NULL;
+ subbuf->tail[0].iov_len = 0;
+ base -= buf->tail[0].iov_len;
+ }
+
if (base || len)
return -1;
return 0;
}
-/* obj is assumed to point to allocated memory of size at least len: */
-int
-read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
- struct xdr_buf subbuf;
- int this_len;
- int status;
+ unsigned int this_len;
- status = xdr_buf_subsegment(buf, &subbuf, base, len);
- if (status)
- goto out;
- this_len = min(len, (int)subbuf.head[0].iov_len);
- memcpy(obj, subbuf.head[0].iov_base, this_len);
+ this_len = min_t(unsigned int, len, subbuf->head[0].iov_len);
+ memcpy(obj, subbuf->head[0].iov_base, this_len);
len -= this_len;
obj += this_len;
- this_len = min(len, (int)subbuf.page_len);
+ this_len = min_t(unsigned int, len, subbuf->page_len);
if (this_len)
- _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len);
+ _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len);
len -= this_len;
obj += this_len;
- this_len = min(len, (int)subbuf.tail[0].iov_len);
- memcpy(obj, subbuf.tail[0].iov_base, this_len);
-out:
- return status;
+ this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
+ memcpy(obj, subbuf->tail[0].iov_base, this_len);
}
/* obj is assumed to point to allocated memory of size at least len: */
-int
-write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
{
struct xdr_buf subbuf;
- int this_len;
int status;
status = xdr_buf_subsegment(buf, &subbuf, base, len);
- if (status)
- goto out;
- this_len = min(len, (int)subbuf.head[0].iov_len);
- memcpy(subbuf.head[0].iov_base, obj, this_len);
+ if (status != 0)
+ return status;
+ __read_bytes_from_xdr_buf(&subbuf, obj, len);
+ return 0;
+}
+
+static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
+{
+ unsigned int this_len;
+
+ this_len = min_t(unsigned int, len, subbuf->head[0].iov_len);
+ memcpy(subbuf->head[0].iov_base, obj, this_len);
len -= this_len;
obj += this_len;
- this_len = min(len, (int)subbuf.page_len);
+ this_len = min_t(unsigned int, len, subbuf->page_len);
if (this_len)
- _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len);
+ _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len);
len -= this_len;
obj += this_len;
- this_len = min(len, (int)subbuf.tail[0].iov_len);
- memcpy(subbuf.tail[0].iov_base, obj, this_len);
-out:
- return status;
+ this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
+ memcpy(subbuf->tail[0].iov_base, obj, this_len);
+}
+
+/* obj is assumed to point to allocated memory of size at least len: */
+int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
+{
+ struct xdr_buf subbuf;
+ int status;
+
+ status = xdr_buf_subsegment(buf, &subbuf, base, len);
+ if (status != 0)
+ return status;
+ __write_bytes_to_xdr_buf(&subbuf, obj, len);
+ return 0;
}
int
-xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
+xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
{
__be32 raw;
int status;
@@ -754,7 +762,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
}
int
-xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
+xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
{
__be32 raw = htonl(obj);
@@ -765,44 +773,37 @@ xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
* entirely in the head or the tail, set object to point to it; otherwise
* try to find space for it at the end of the tail, copy it there, and
* set obj to point to it. */
-int
-xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
+int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
{
- u32 tail_offset = buf->head[0].iov_len + buf->page_len;
- u32 obj_end_offset;
+ struct xdr_buf subbuf;
if (xdr_decode_word(buf, offset, &obj->len))
- goto out;
- obj_end_offset = offset + 4 + obj->len;
-
- if (obj_end_offset <= buf->head[0].iov_len) {
- /* The obj is contained entirely in the head: */
- obj->data = buf->head[0].iov_base + offset + 4;
- } else if (offset + 4 >= tail_offset) {
- if (obj_end_offset - tail_offset
- > buf->tail[0].iov_len)
- goto out;
- /* The obj is contained entirely in the tail: */
- obj->data = buf->tail[0].iov_base
- + offset - tail_offset + 4;
- } else {
- /* use end of tail as storage for obj:
- * (We don't copy to the beginning because then we'd have
- * to worry about doing a potentially overlapping copy.
- * This assumes the object is at most half the length of the
- * tail.) */
- if (obj->len > buf->tail[0].iov_len)
- goto out;
- obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len -
- obj->len;
- if (read_bytes_from_xdr_buf(buf, offset + 4,
- obj->data, obj->len))
- goto out;
+ return -EFAULT;
+ if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
+ return -EFAULT;
- }
+ /* Is the obj contained entirely in the head? */
+ obj->data = subbuf.head[0].iov_base;
+ if (subbuf.head[0].iov_len == obj->len)
+ return 0;
+ /* ..or is the obj contained entirely in the tail? */
+ obj->data = subbuf.tail[0].iov_base;
+ if (subbuf.tail[0].iov_len == obj->len)
+ return 0;
+
+ /* use end of tail as storage for obj:
+ * (We don't copy to the beginning because then we'd have
+ * to worry about doing a potentially overlapping copy.
+ * This assumes the object is at most half the length of the
+ * tail.) */
+ if (obj->len > buf->buflen - buf->len)
+ return -ENOMEM;
+ if (buf->tail[0].iov_len != 0)
+ obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+ else
+ obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
+ __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
return 0;
-out:
- return -1;
}
/* Returns 0 on success, or else a negative error code. */
@@ -1020,3 +1021,71 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
return xdr_xcode_array2(buf, base, desc, 1);
}
+
+int
+xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
+ int (*actor)(struct scatterlist *, void *), void *data)
+{
+ int i, ret = 0;
+ unsigned page_len, thislen, page_offset;
+ struct scatterlist sg[1];
+
+ if (offset >= buf->head[0].iov_len) {
+ offset -= buf->head[0].iov_len;
+ } else {
+ thislen = buf->head[0].iov_len - offset;
+ if (thislen > len)
+ thislen = len;
+ sg_set_buf(sg, buf->head[0].iov_base + offset, thislen);
+ ret = actor(sg, data);
+ if (ret)
+ goto out;
+ offset = 0;
+ len -= thislen;
+ }
+ if (len == 0)
+ goto out;
+
+ if (offset >= buf->page_len) {
+ offset -= buf->page_len;
+ } else {
+ page_len = buf->page_len - offset;
+ if (page_len > len)
+ page_len = len;
+ len -= page_len;
+ page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
+ i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
+ thislen = PAGE_CACHE_SIZE - page_offset;
+ do {
+ if (thislen > page_len)
+ thislen = page_len;
+ sg->page = buf->pages[i];
+ sg->offset = page_offset;
+ sg->length = thislen;
+ ret = actor(sg, data);
+ if (ret)
+ goto out;
+ page_len -= thislen;
+ i++;
+ page_offset = 0;
+ thislen = PAGE_CACHE_SIZE;
+ } while (page_len != 0);
+ offset = 0;
+ }
+ if (len == 0)
+ goto out;
+ if (offset < buf->tail[0].iov_len) {
+ thislen = buf->tail[0].iov_len - offset;
+ if (thislen > len)
+ thislen = len;
+ sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen);
+ ret = actor(sg, data);
+ len -= thislen;
+ }
+ if (len != 0)
+ ret = -EINVAL;
+out:
+ return ret;
+}
+EXPORT_SYMBOL(xdr_process_buf);
+
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 80857470dc1..7a3999f0a4a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -459,7 +459,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
if (to->to_maxval && req->rq_timeout >= to->to_maxval)
req->rq_timeout = to->to_maxval;
req->rq_retries++;
- pprintk("RPC: %lu retrans\n", jiffies);
} else {
req->rq_timeout = to->to_initval;
req->rq_retries = 0;
@@ -468,7 +467,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
spin_lock_bh(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
spin_unlock_bh(&xprt->transport_lock);
- pprintk("RPC: %lu timeout\n", jiffies);
status = -ETIMEDOUT;
}
@@ -479,9 +477,10 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
return status;
}
-static void xprt_autoclose(void *args)
+static void xprt_autoclose(struct work_struct *work)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+ struct rpc_xprt *xprt =
+ container_of(work, struct rpc_xprt, task_cleanup);
xprt_disconnect(xprt);
xprt->ops->close(xprt);
@@ -891,39 +890,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
*/
struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
{
- int result;
struct rpc_xprt *xprt;
struct rpc_rqst *req;
- if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
- dprintk("RPC: xprt_create_transport: no memory\n");
- return ERR_PTR(-ENOMEM);
- }
- if (size <= sizeof(xprt->addr)) {
- memcpy(&xprt->addr, ap, size);
- xprt->addrlen = size;
- } else {
- kfree(xprt);
- dprintk("RPC: xprt_create_transport: address too large\n");
- return ERR_PTR(-EBADF);
- }
-
switch (proto) {
case IPPROTO_UDP:
- result = xs_setup_udp(xprt, to);
+ xprt = xs_setup_udp(ap, size, to);
break;
case IPPROTO_TCP:
- result = xs_setup_tcp(xprt, to);
+ xprt = xs_setup_tcp(ap, size, to);
break;
default:
printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
proto);
return ERR_PTR(-EIO);
}
- if (result) {
- kfree(xprt);
- dprintk("RPC: xprt_create_transport: failed, %d\n", result);
- return ERR_PTR(result);
+ if (IS_ERR(xprt)) {
+ dprintk("RPC: xprt_create_transport: failed, %ld\n",
+ -PTR_ERR(xprt));
+ return xprt;
}
kref_init(&xprt->kref);
@@ -932,7 +917,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
INIT_LIST_HEAD(&xprt->free);
INIT_LIST_HEAD(&xprt->recv);
- INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
+ INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
init_timer(&xprt->timer);
xprt->timer.function = xprt_init_autodisconnect;
xprt->timer.data = (unsigned long) xprt;
@@ -969,8 +954,11 @@ static void xprt_destroy(struct kref *kref)
dprintk("RPC: destroying transport %p\n", xprt);
xprt->shutdown = 1;
del_timer_sync(&xprt->timer);
+
+ /*
+ * Tear down transport state and free the rpc_xprt
+ */
xprt->ops->destroy(xprt);
- kfree(xprt);
}
/**
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 757fc91ef25..49cabffd7fd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -46,6 +46,92 @@ unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
/*
+ * We can register our own files under /proc/sys/sunrpc by
+ * calling register_sysctl_table() again. The files in that
+ * directory become the union of all files registered there.
+ *
+ * We simply need to make sure that we don't collide with
+ * someone else's file names!
+ */
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
+static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+/*
+ * FIXME: changing the UDP slot table size should also resize the UDP
+ * socket buffers for existing UDP transports
+ */
+static ctl_table xs_tunables_table[] = {
+ {
+ .ctl_name = CTL_SLOTTABLE_UDP,
+ .procname = "udp_slot_table_entries",
+ .data = &xprt_udp_slot_table_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_slot_table_size,
+ .extra2 = &max_slot_table_size
+ },
+ {
+ .ctl_name = CTL_SLOTTABLE_TCP,
+ .procname = "tcp_slot_table_entries",
+ .data = &xprt_tcp_slot_table_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_slot_table_size,
+ .extra2 = &max_slot_table_size
+ },
+ {
+ .ctl_name = CTL_MIN_RESVPORT,
+ .procname = "min_resvport",
+ .data = &xprt_min_resvport,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xprt_min_resvport_limit,
+ .extra2 = &xprt_max_resvport_limit
+ },
+ {
+ .ctl_name = CTL_MAX_RESVPORT,
+ .procname = "max_resvport",
+ .data = &xprt_max_resvport,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xprt_min_resvport_limit,
+ .extra2 = &xprt_max_resvport_limit
+ },
+ {
+ .ctl_name = 0,
+ },
+};
+
+static ctl_table sunrpc_table[] = {
+ {
+ .ctl_name = CTL_SUNRPC,
+ .procname = "sunrpc",
+ .mode = 0555,
+ .child = xs_tunables_table
+ },
+ {
+ .ctl_name = 0,
+ },
+};
+
+#endif
+
+/*
* How many times to try sending a request on a socket before waiting
* for the socket buffer to clear.
*/
@@ -125,6 +211,55 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
}
#endif
+struct sock_xprt {
+ struct rpc_xprt xprt;
+
+ /*
+ * Network layer
+ */
+ struct socket * sock;
+ struct sock * inet;
+
+ /*
+ * State of TCP reply receive
+ */
+ __be32 tcp_fraghdr,
+ tcp_xid;
+
+ u32 tcp_offset,
+ tcp_reclen;
+
+ unsigned long tcp_copied,
+ tcp_flags;
+
+ /*
+ * Connection of transports
+ */
+ struct delayed_work connect_worker;
+ unsigned short port;
+
+ /*
+ * UDP socket buffer size parameters
+ */
+ size_t rcvsize,
+ sndsize;
+
+ /*
+ * Saved socket callback addresses
+ */
+ void (*old_data_ready)(struct sock *, int);
+ void (*old_state_change)(struct sock *);
+ void (*old_write_space)(struct sock *);
+};
+
+/*
+ * TCP receive state flags
+ */
+#define TCP_RCV_LAST_FRAG (1UL << 0)
+#define TCP_RCV_COPY_FRAGHDR (1UL << 1)
+#define TCP_RCV_COPY_XID (1UL << 2)
+#define TCP_RCV_COPY_DATA (1UL << 3)
+
static void xs_format_peer_addresses(struct rpc_xprt *xprt)
{
struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
@@ -168,37 +303,52 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt)
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
-static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
+static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
{
- struct kvec iov = {
- .iov_base = xdr->head[0].iov_base + base,
- .iov_len = len - base,
- };
struct msghdr msg = {
.msg_name = addr,
.msg_namelen = addrlen,
- .msg_flags = XS_SENDMSG_FLAGS,
+ .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
+ };
+ struct kvec iov = {
+ .iov_base = vec->iov_base + base,
+ .iov_len = vec->iov_len - base,
};
- if (xdr->len > len)
- msg.msg_flags |= MSG_MORE;
-
- if (likely(iov.iov_len))
+ if (iov.iov_len != 0)
return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
return kernel_sendmsg(sock, &msg, NULL, 0, 0);
}
-static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
{
- struct kvec iov = {
- .iov_base = xdr->tail[0].iov_base + base,
- .iov_len = len - base,
- };
- struct msghdr msg = {
- .msg_flags = XS_SENDMSG_FLAGS,
- };
+ struct page **ppage;
+ unsigned int remainder;
+ int err, sent = 0;
+
+ remainder = xdr->page_len - base;
+ base += xdr->page_base;
+ ppage = xdr->pages + (base >> PAGE_SHIFT);
+ base &= ~PAGE_MASK;
+ for(;;) {
+ unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
+ int flags = XS_SENDMSG_FLAGS;
- return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+ remainder -= len;
+ if (remainder != 0 || more)
+ flags |= MSG_MORE;
+ err = sock->ops->sendpage(sock, *ppage, base, len, flags);
+ if (remainder == 0 || err != len)
+ break;
+ sent += err;
+ ppage++;
+ base = 0;
+ }
+ if (sent == 0)
+ return err;
+ if (err > 0)
+ sent += err;
+ return sent;
}
/**
@@ -210,76 +360,51 @@ static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int b
* @base: starting position in the buffer
*
*/
-static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
{
- struct page **ppage = xdr->pages;
- unsigned int len, pglen = xdr->page_len;
- int err, ret = 0;
+ unsigned int remainder = xdr->len - base;
+ int err, sent = 0;
if (unlikely(!sock))
return -ENOTCONN;
clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+ if (base != 0) {
+ addr = NULL;
+ addrlen = 0;
+ }
- len = xdr->head[0].iov_len;
- if (base < len || (addr != NULL && base == 0)) {
- err = xs_send_head(sock, addr, addrlen, xdr, base, len);
- if (ret == 0)
- ret = err;
- else if (err > 0)
- ret += err;
- if (err != (len - base))
+ if (base < xdr->head[0].iov_len || addr != NULL) {
+ unsigned int len = xdr->head[0].iov_len - base;
+ remainder -= len;
+ err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+ if (remainder == 0 || err != len)
goto out;
+ sent += err;
base = 0;
} else
- base -= len;
+ base -= xdr->head[0].iov_len;
- if (unlikely(pglen == 0))
- goto copy_tail;
- if (unlikely(base >= pglen)) {
- base -= pglen;
- goto copy_tail;
- }
- if (base || xdr->page_base) {
- pglen -= base;
- base += xdr->page_base;
- ppage += base >> PAGE_CACHE_SHIFT;
- base &= ~PAGE_CACHE_MASK;
- }
-
- do {
- int flags = XS_SENDMSG_FLAGS;
-
- len = PAGE_CACHE_SIZE;
- if (base)
- len -= base;
- if (pglen < len)
- len = pglen;
-
- if (pglen != len || xdr->tail[0].iov_len != 0)
- flags |= MSG_MORE;
-
- err = kernel_sendpage(sock, *ppage, base, len, flags);
- if (ret == 0)
- ret = err;
- else if (err > 0)
- ret += err;
- if (err != len)
+ if (base < xdr->page_len) {
+ unsigned int len = xdr->page_len - base;
+ remainder -= len;
+ err = xs_send_pagedata(sock, xdr, base, remainder != 0);
+ if (remainder == 0 || err != len)
goto out;
+ sent += err;
base = 0;
- ppage++;
- } while ((pglen -= len) != 0);
-copy_tail:
- len = xdr->tail[0].iov_len;
- if (base < len) {
- err = xs_send_tail(sock, xdr, base, len);
- if (ret == 0)
- ret = err;
- else if (err > 0)
- ret += err;
- }
+ } else
+ base -= xdr->page_len;
+
+ if (base >= xdr->tail[0].iov_len)
+ return sent;
+ err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
out:
- return ret;
+ if (sent == 0)
+ return err;
+ if (err > 0)
+ sent += err;
+ return sent;
}
/**
@@ -291,19 +416,20 @@ static void xs_nospace(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
dprintk("RPC: %4d xmit incomplete (%u left of %u)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
req->rq_slen);
- if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
+ if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
/* Protect against races with write_space */
spin_lock_bh(&xprt->transport_lock);
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
- else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags))
+ else if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
xprt_wait_for_buffer_space(task);
spin_unlock_bh(&xprt->transport_lock);
@@ -327,6 +453,7 @@ static int xs_udp_send_request(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
int status;
@@ -335,8 +462,10 @@ static int xs_udp_send_request(struct rpc_task *task)
req->rq_svec->iov_len);
req->rq_xtime = jiffies;
- status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
- xprt->addrlen, xdr, req->rq_bytes_sent);
+ status = xs_sendpages(transport->sock,
+ (struct sockaddr *) &xprt->addr,
+ xprt->addrlen, xdr,
+ req->rq_bytes_sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
@@ -392,6 +521,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
int status, retry = 0;
@@ -406,8 +536,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
* called sendmsg(). */
while (1) {
req->rq_xtime = jiffies;
- status = xs_sendpages(xprt->sock, NULL, 0, xdr,
- req->rq_bytes_sent);
+ status = xs_sendpages(transport->sock,
+ NULL, 0, xdr, req->rq_bytes_sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
@@ -485,8 +615,9 @@ out_release:
*/
static void xs_close(struct rpc_xprt *xprt)
{
- struct socket *sock = xprt->sock;
- struct sock *sk = xprt->inet;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct socket *sock = transport->sock;
+ struct sock *sk = transport->inet;
if (!sk)
goto clear_close_wait;
@@ -494,13 +625,13 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
write_lock_bh(&sk->sk_callback_lock);
- xprt->inet = NULL;
- xprt->sock = NULL;
+ transport->inet = NULL;
+ transport->sock = NULL;
sk->sk_user_data = NULL;
- sk->sk_data_ready = xprt->old_data_ready;
- sk->sk_state_change = xprt->old_state_change;
- sk->sk_write_space = xprt->old_write_space;
+ sk->sk_data_ready = transport->old_data_ready;
+ sk->sk_state_change = transport->old_state_change;
+ sk->sk_write_space = transport->old_write_space;
write_unlock_bh(&sk->sk_callback_lock);
sk->sk_no_check = 0;
@@ -519,15 +650,18 @@ clear_close_wait:
*/
static void xs_destroy(struct rpc_xprt *xprt)
{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
dprintk("RPC: xs_destroy xprt %p\n", xprt);
- cancel_delayed_work(&xprt->connect_worker);
+ cancel_delayed_work(&transport->connect_worker);
flush_scheduled_work();
xprt_disconnect(xprt);
xs_close(xprt);
xs_free_peer_addresses(xprt);
kfree(xprt->slot);
+ kfree(xprt);
}
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -603,91 +737,75 @@ static void xs_udp_data_ready(struct sock *sk, int len)
read_unlock(&sk->sk_callback_lock);
}
-static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
-{
- if (len > desc->count)
- len = desc->count;
- if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
- dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n",
- len, desc->count);
- return 0;
- }
- desc->offset += len;
- desc->count -= len;
- dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n",
- len, desc->count);
- return len;
-}
-
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
size_t len, used;
char *p;
- p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
- len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
- used = xs_tcp_copy_data(desc, p, len);
- xprt->tcp_offset += used;
+ p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
+ len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
+ used = xdr_skb_read_bits(desc, p, len);
+ transport->tcp_offset += used;
if (used != len)
return;
- xprt->tcp_reclen = ntohl(xprt->tcp_recm);
- if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
- xprt->tcp_flags |= XPRT_LAST_FRAG;
+ transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
+ if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
+ transport->tcp_flags |= TCP_RCV_LAST_FRAG;
else
- xprt->tcp_flags &= ~XPRT_LAST_FRAG;
- xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
+ transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
+ transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
- xprt->tcp_flags &= ~XPRT_COPY_RECM;
- xprt->tcp_offset = 0;
+ transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
+ transport->tcp_offset = 0;
/* Sanity check of the record length */
- if (unlikely(xprt->tcp_reclen < 4)) {
+ if (unlikely(transport->tcp_reclen < 4)) {
dprintk("RPC: invalid TCP record fragment length\n");
xprt_disconnect(xprt);
return;
}
dprintk("RPC: reading TCP record fragment of length %d\n",
- xprt->tcp_reclen);
+ transport->tcp_reclen);
}
-static void xs_tcp_check_recm(struct rpc_xprt *xprt)
+static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
{
- dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
- xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
- if (xprt->tcp_offset == xprt->tcp_reclen) {
- xprt->tcp_flags |= XPRT_COPY_RECM;
- xprt->tcp_offset = 0;
- if (xprt->tcp_flags & XPRT_LAST_FRAG) {
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
- xprt->tcp_flags |= XPRT_COPY_XID;
- xprt->tcp_copied = 0;
+ if (transport->tcp_offset == transport->tcp_reclen) {
+ transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
+ transport->tcp_offset = 0;
+ if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
+ transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+ transport->tcp_flags |= TCP_RCV_COPY_XID;
+ transport->tcp_copied = 0;
}
}
}
-static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
{
size_t len, used;
char *p;
- len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
+ len = sizeof(transport->tcp_xid) - transport->tcp_offset;
dprintk("RPC: reading XID (%Zu bytes)\n", len);
- p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
- used = xs_tcp_copy_data(desc, p, len);
- xprt->tcp_offset += used;
+ p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
+ used = xdr_skb_read_bits(desc, p, len);
+ transport->tcp_offset += used;
if (used != len)
return;
- xprt->tcp_flags &= ~XPRT_COPY_XID;
- xprt->tcp_flags |= XPRT_COPY_DATA;
- xprt->tcp_copied = 4;
+ transport->tcp_flags &= ~TCP_RCV_COPY_XID;
+ transport->tcp_flags |= TCP_RCV_COPY_DATA;
+ transport->tcp_copied = 4;
dprintk("RPC: reading reply for XID %08x\n",
- ntohl(xprt->tcp_xid));
- xs_tcp_check_recm(xprt);
+ ntohl(transport->tcp_xid));
+ xs_tcp_check_fraghdr(transport);
}
-static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct rpc_rqst *req;
struct xdr_buf *rcvbuf;
size_t len;
@@ -695,116 +813,118 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc
/* Find and lock the request corresponding to this xid */
spin_lock(&xprt->transport_lock);
- req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
+ req = xprt_lookup_rqst(xprt, transport->tcp_xid);
if (!req) {
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
+ transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
dprintk("RPC: XID %08x request not found!\n",
- ntohl(xprt->tcp_xid));
+ ntohl(transport->tcp_xid));
spin_unlock(&xprt->transport_lock);
return;
}
rcvbuf = &req->rq_private_buf;
len = desc->count;
- if (len > xprt->tcp_reclen - xprt->tcp_offset) {
- skb_reader_t my_desc;
+ if (len > transport->tcp_reclen - transport->tcp_offset) {
+ struct xdr_skb_reader my_desc;
- len = xprt->tcp_reclen - xprt->tcp_offset;
+ len = transport->tcp_reclen - transport->tcp_offset;
memcpy(&my_desc, desc, sizeof(my_desc));
my_desc.count = len;
- r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
- &my_desc, xs_tcp_copy_data);
+ r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+ &my_desc, xdr_skb_read_bits);
desc->count -= r;
desc->offset += r;
} else
- r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
- desc, xs_tcp_copy_data);
+ r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+ desc, xdr_skb_read_bits);
if (r > 0) {
- xprt->tcp_copied += r;
- xprt->tcp_offset += r;
+ transport->tcp_copied += r;
+ transport->tcp_offset += r;
}
if (r != len) {
/* Error when copying to the receive buffer,
* usually because we weren't able to allocate
* additional buffer pages. All we can do now
- * is turn off XPRT_COPY_DATA, so the request
+ * is turn off TCP_RCV_COPY_DATA, so the request
* will not receive any additional updates,
* and time out.
* Any remaining data from this record will
* be discarded.
*/
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
+ transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
dprintk("RPC: XID %08x truncated request\n",
- ntohl(xprt->tcp_xid));
+ ntohl(transport->tcp_xid));
dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
- xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
+ xprt, transport->tcp_copied, transport->tcp_offset,
+ transport->tcp_reclen);
goto out;
}
dprintk("RPC: XID %08x read %Zd bytes\n",
- ntohl(xprt->tcp_xid), r);
+ ntohl(transport->tcp_xid), r);
dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
- xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
-
- if (xprt->tcp_copied == req->rq_private_buf.buflen)
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
- else if (xprt->tcp_offset == xprt->tcp_reclen) {
- if (xprt->tcp_flags & XPRT_LAST_FRAG)
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
+ xprt, transport->tcp_copied, transport->tcp_offset,
+ transport->tcp_reclen);
+
+ if (transport->tcp_copied == req->rq_private_buf.buflen)
+ transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+ else if (transport->tcp_offset == transport->tcp_reclen) {
+ if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
+ transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
}
out:
- if (!(xprt->tcp_flags & XPRT_COPY_DATA))
- xprt_complete_rqst(req->rq_task, xprt->tcp_copied);
+ if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
+ xprt_complete_rqst(req->rq_task, transport->tcp_copied);
spin_unlock(&xprt->transport_lock);
- xs_tcp_check_recm(xprt);
+ xs_tcp_check_fraghdr(transport);
}
-static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
{
size_t len;
- len = xprt->tcp_reclen - xprt->tcp_offset;
+ len = transport->tcp_reclen - transport->tcp_offset;
if (len > desc->count)
len = desc->count;
desc->count -= len;
desc->offset += len;
- xprt->tcp_offset += len;
+ transport->tcp_offset += len;
dprintk("RPC: discarded %Zu bytes\n", len);
- xs_tcp_check_recm(xprt);
+ xs_tcp_check_fraghdr(transport);
}
static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
{
struct rpc_xprt *xprt = rd_desc->arg.data;
- skb_reader_t desc = {
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct xdr_skb_reader desc = {
.skb = skb,
.offset = offset,
.count = len,
- .csum = 0
};
dprintk("RPC: xs_tcp_data_recv started\n");
do {
/* Read in a new fragment marker if necessary */
/* Can we ever really expect to get completely empty fragments? */
- if (xprt->tcp_flags & XPRT_COPY_RECM) {
+ if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
xs_tcp_read_fraghdr(xprt, &desc);
continue;
}
/* Read in the xid if necessary */
- if (xprt->tcp_flags & XPRT_COPY_XID) {
- xs_tcp_read_xid(xprt, &desc);
+ if (transport->tcp_flags & TCP_RCV_COPY_XID) {
+ xs_tcp_read_xid(transport, &desc);
continue;
}
/* Read in the request data */
- if (xprt->tcp_flags & XPRT_COPY_DATA) {
+ if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
xs_tcp_read_request(xprt, &desc);
continue;
}
/* Skip over any trailing bytes on short reads */
- xs_tcp_read_discard(xprt, &desc);
+ xs_tcp_read_discard(transport, &desc);
} while (desc.count);
dprintk("RPC: xs_tcp_data_recv done\n");
return len - desc.count;
@@ -858,11 +978,16 @@ static void xs_tcp_state_change(struct sock *sk)
case TCP_ESTABLISHED:
spin_lock_bh(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) {
+ struct sock_xprt *transport = container_of(xprt,
+ struct sock_xprt, xprt);
+
/* Reset TCP record info */
- xprt->tcp_offset = 0;
- xprt->tcp_reclen = 0;
- xprt->tcp_copied = 0;
- xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
+ transport->tcp_offset = 0;
+ transport->tcp_reclen = 0;
+ transport->tcp_copied = 0;
+ transport->tcp_flags =
+ TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
+
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
xprt_wake_pending_tasks(xprt, 0);
}
@@ -951,15 +1076,16 @@ static void xs_tcp_write_space(struct sock *sk)
static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
{
- struct sock *sk = xprt->inet;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
- if (xprt->rcvsize) {
+ if (transport->rcvsize) {
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2;
+ sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2;
}
- if (xprt->sndsize) {
+ if (transport->sndsize) {
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2;
+ sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2;
sk->sk_write_space(sk);
}
}
@@ -974,12 +1100,14 @@ static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
*/
static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize)
{
- xprt->sndsize = 0;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+ transport->sndsize = 0;
if (sndsize)
- xprt->sndsize = sndsize + 1024;
- xprt->rcvsize = 0;
+ transport->sndsize = sndsize + 1024;
+ transport->rcvsize = 0;
if (rcvsize)
- xprt->rcvsize = rcvsize + 1024;
+ transport->rcvsize = rcvsize + 1024;
xs_udp_do_set_buffer_size(xprt);
}
@@ -1003,19 +1131,6 @@ static unsigned short xs_get_random_port(void)
}
/**
- * xs_print_peer_address - format an IPv4 address for printing
- * @xprt: generic transport
- * @format: flags field indicating which parts of the address to render
- */
-static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
-{
- if (xprt->address_strings[format] != NULL)
- return xprt->address_strings[format];
- else
- return "unprintable";
-}
-
-/**
* xs_set_port - reset the port number in the remote endpoint address
* @xprt: generic transport
* @port: new port number
@@ -1030,20 +1145,20 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
sap->sin_port = htons(port);
}
-static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
+static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock)
{
struct sockaddr_in myaddr = {
.sin_family = AF_INET,
};
int err;
- unsigned short port = xprt->port;
+ unsigned short port = transport->port;
do {
myaddr.sin_port = htons(port);
err = kernel_bind(sock, (struct sockaddr *) &myaddr,
sizeof(myaddr));
if (err == 0) {
- xprt->port = port;
+ transport->port = port;
dprintk("RPC: xs_bindresvport bound to port %u\n",
port);
return 0;
@@ -1052,22 +1167,53 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
port = xprt_max_resvport;
else
port--;
- } while (err == -EADDRINUSE && port != xprt->port);
+ } while (err == -EADDRINUSE && port != transport->port);
dprintk("RPC: can't bind to reserved port (%d).\n", -err);
return err;
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key xs_key[2];
+static struct lock_class_key xs_slock_key[2];
+
+static inline void xs_reclassify_socket(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ BUG_ON(sk->sk_lock.owner != NULL);
+ switch (sk->sk_family) {
+ case AF_INET:
+ sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
+ &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
+ break;
+
+ case AF_INET6:
+ sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
+ &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
+ break;
+
+ default:
+ BUG();
+ }
+}
+#else
+static inline void xs_reclassify_socket(struct socket *sock)
+{
+}
+#endif
+
/**
* xs_udp_connect_worker - set up a UDP socket
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
-static void xs_udp_connect_worker(void *args)
+static void xs_udp_connect_worker(struct work_struct *work)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *) args;
- struct socket *sock = xprt->sock;
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
int err, status = -EIO;
if (xprt->shutdown || !xprt_bound(xprt))
@@ -1080,24 +1226,25 @@ static void xs_udp_connect_worker(void *args)
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
+ xs_reclassify_socket(sock);
- if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
+ if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
sock_release(sock);
goto out;
}
dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
- if (!xprt->inet) {
+ if (!transport->inet) {
struct sock *sk = sock->sk;
write_lock_bh(&sk->sk_callback_lock);
sk->sk_user_data = xprt;
- xprt->old_data_ready = sk->sk_data_ready;
- xprt->old_state_change = sk->sk_state_change;
- xprt->old_write_space = sk->sk_write_space;
+ transport->old_data_ready = sk->sk_data_ready;
+ transport->old_state_change = sk->sk_state_change;
+ transport->old_write_space = sk->sk_write_space;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_no_check = UDP_CSUM_NORCV;
@@ -1106,8 +1253,8 @@ static void xs_udp_connect_worker(void *args)
xprt_set_connected(xprt);
/* Reset to new socket */
- xprt->sock = sock;
- xprt->inet = sk;
+ transport->sock = sock;
+ transport->inet = sk;
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -1125,7 +1272,7 @@ out:
static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
{
int result;
- struct socket *sock = xprt->sock;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sockaddr any;
dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1136,7 +1283,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
*/
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
- result = kernel_connect(sock, &any, sizeof(any), 0);
+ result = kernel_connect(transport->sock, &any, sizeof(any), 0);
if (result)
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
@@ -1144,27 +1291,30 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
/**
* xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_connect_worker(void *args)
+static void xs_tcp_connect_worker(struct work_struct *work)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *)args;
- struct socket *sock = xprt->sock;
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
int err, status = -EIO;
if (xprt->shutdown || !xprt_bound(xprt))
goto out;
- if (!xprt->sock) {
+ if (!sock) {
/* start from scratch */
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
goto out;
}
+ xs_reclassify_socket(sock);
- if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
+ if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
sock_release(sock);
goto out;
}
@@ -1173,17 +1323,17 @@ static void xs_tcp_connect_worker(void *args)
xs_tcp_reuse_connection(xprt);
dprintk("RPC: worker connecting xprt %p to address: %s\n",
- xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
- if (!xprt->inet) {
+ if (!transport->inet) {
struct sock *sk = sock->sk;
write_lock_bh(&sk->sk_callback_lock);
sk->sk_user_data = xprt;
- xprt->old_data_ready = sk->sk_data_ready;
- xprt->old_state_change = sk->sk_state_change;
- xprt->old_write_space = sk->sk_write_space;
+ transport->old_data_ready = sk->sk_data_ready;
+ transport->old_state_change = sk->sk_state_change;
+ transport->old_write_space = sk->sk_write_space;
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
@@ -1198,8 +1348,8 @@ static void xs_tcp_connect_worker(void *args)
xprt_clear_connected(xprt);
/* Reset to new socket */
- xprt->sock = sock;
- xprt->inet = sk;
+ transport->sock = sock;
+ transport->inet = sk;
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -1248,21 +1398,22 @@ out_clear:
static void xs_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
if (xprt_test_and_set_connecting(xprt))
return;
- if (xprt->sock != NULL) {
+ if (transport->sock != NULL) {
dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n",
xprt, xprt->reestablish_timeout / HZ);
- schedule_delayed_work(&xprt->connect_worker,
+ schedule_delayed_work(&transport->connect_worker,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
} else {
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
- schedule_work(&xprt->connect_worker);
+ schedule_delayed_work(&transport->connect_worker, 0);
/* flush_scheduled_work can sleep... */
if (!RPC_IS_ASYNC(task))
@@ -1278,8 +1429,10 @@ static void xs_connect(struct rpc_task *task)
*/
static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
- xprt->port,
+ transport->port,
xprt->stat.bind_count,
xprt->stat.sends,
xprt->stat.recvs,
@@ -1296,13 +1449,14 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
*/
static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
long idle_time = 0;
if (xprt_connected(xprt))
idle_time = (long)(jiffies - xprt->last_used) / HZ;
seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
- xprt->port,
+ transport->port,
xprt->stat.bind_count,
xprt->stat.connect_count,
xprt->stat.connect_time,
@@ -1316,7 +1470,6 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
- .print_addr = xs_print_peer_address,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
.rpcbind = rpc_getport,
@@ -1334,7 +1487,6 @@ static struct rpc_xprt_ops xs_udp_ops = {
};
static struct rpc_xprt_ops xs_tcp_ops = {
- .print_addr = xs_print_peer_address,
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.rpcbind = rpc_getport,
@@ -1349,33 +1501,64 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.print_stats = xs_tcp_print_stats,
};
+static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size)
+{
+ struct rpc_xprt *xprt;
+ struct sock_xprt *new;
+
+ if (addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: xs_setup_xprt: address too large\n");
+ return ERR_PTR(-EBADF);
+ }
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (new == NULL) {
+ dprintk("RPC: xs_setup_xprt: couldn't allocate rpc_xprt\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ xprt = &new->xprt;
+
+ xprt->max_reqs = slot_table_size;
+ xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
+ if (xprt->slot == NULL) {
+ kfree(xprt);
+ dprintk("RPC: xs_setup_xprt: couldn't allocate slot table\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(&xprt->addr, addr, addrlen);
+ xprt->addrlen = addrlen;
+ new->port = xs_get_random_port();
+
+ return xprt;
+}
+
/**
* xs_setup_udp - Set up transport to use a UDP socket
- * @xprt: transport to set up
+ * @addr: address of remote server
+ * @addrlen: length of address in bytes
* @to: timeout parameters
*
*/
-int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
+struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
{
- size_t slot_table_size;
- struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
- xprt->max_reqs = xprt_udp_slot_table_entries;
- slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
- xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
- if (xprt->slot == NULL)
- return -ENOMEM;
+ xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(addr->sin_port) != 0)
+ if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
xprt_set_bound(xprt);
- xprt->port = xs_get_random_port();
xprt->prot = IPPROTO_UDP;
xprt->tsh_size = 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
- INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
xprt->bind_timeout = XS_BIND_TO;
xprt->connect_timeout = XS_UDP_CONN_TO;
xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1390,37 +1573,36 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
xs_format_peer_addresses(xprt);
dprintk("RPC: set up transport to address %s\n",
- xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+ xprt->address_strings[RPC_DISPLAY_ALL]);
- return 0;
+ return xprt;
}
/**
* xs_setup_tcp - Set up transport to use a TCP socket
- * @xprt: transport to set up
+ * @addr: address of remote server
+ * @addrlen: length of address in bytes
* @to: timeout parameters
*
*/
-int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
+struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
{
- size_t slot_table_size;
- struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
- xprt->max_reqs = xprt_tcp_slot_table_entries;
- slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
- xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
- if (xprt->slot == NULL)
- return -ENOMEM;
+ xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(addr->sin_port) != 0)
+ if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
xprt_set_bound(xprt);
- xprt->port = xs_get_random_port();
xprt->prot = IPPROTO_TCP;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
- INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
xprt->bind_timeout = XS_BIND_TO;
xprt->connect_timeout = XS_TCP_CONN_TO;
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1435,7 +1617,40 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
xs_format_peer_addresses(xprt);
dprintk("RPC: set up transport to address %s\n",
- xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+ xprt->address_strings[RPC_DISPLAY_ALL]);
+
+ return xprt;
+}
+
+/**
+ * init_socket_xprt - set up xprtsock's sysctls
+ *
+ */
+int init_socket_xprt(void)
+{
+#ifdef RPC_DEBUG
+ if (!sunrpc_table_header) {
+ sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
+#ifdef CONFIG_PROC_FS
+ if (sunrpc_table[0].de)
+ sunrpc_table[0].de->owner = THIS_MODULE;
+#endif
+ }
+#endif
return 0;
}
+
+/**
+ * cleanup_socket_xprt - remove xprtsock's sysctls
+ *
+ */
+void cleanup_socket_xprt(void)
+{
+#ifdef RPC_DEBUG
+ if (sunrpc_table_header) {
+ unregister_sysctl_table(sunrpc_table_header);
+ sunrpc_table_header = NULL;
+ }
+#endif
+}
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 1bb75703f38..730c5c47ed8 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -774,8 +774,8 @@ int tipc_bclink_set_queue_limits(u32 limit)
int tipc_bclink_init(void)
{
- bcbearer = kmalloc(sizeof(*bcbearer), GFP_ATOMIC);
- bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC);
+ bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
+ bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
if (!bcbearer || !bclink) {
nomem:
warn("Multicast link creation failed, no memory\n");
@@ -786,14 +786,12 @@ int tipc_bclink_init(void)
return -ENOMEM;
}
- memset(bcbearer, 0, sizeof(struct bcbearer));
INIT_LIST_HEAD(&bcbearer->bearer.cong_links);
bcbearer->bearer.media = &bcbearer->media;
bcbearer->media.send_msg = tipc_bcbearer_send;
sprintf(bcbearer->media.name, "tipc-multicast");
bcl = &bclink->link;
- memset(bclink, 0, sizeof(struct bclink));
INIT_LIST_HEAD(&bcl->waiting_ports);
bcl->next_out_no = 1;
spin_lock_init(&bclink->node.lock);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index ed1351ed05e..baf55c459c8 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -107,7 +107,7 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
{
struct sk_buff *buf;
- u32 value_net;
+ __be32 value_net;
buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value)));
if (buf) {
@@ -208,7 +208,7 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg,
if (mng.link_subscriptions > 64)
break;
- sub = (struct subscr_data *)kmalloc(sizeof(*sub),
+ sub = kmalloc(sizeof(*sub),
GFP_ATOMIC);
if (sub == NULL) {
warn("Memory squeeze; dropped remote link subscription\n");
@@ -284,8 +284,7 @@ static struct sk_buff *cfg_set_own_addr(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- addr = *(u32 *)TLV_DATA(req_tlv_area);
- addr = ntohl(addr);
+ addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (addr == tipc_own_addr)
return tipc_cfg_reply_none();
if (!tipc_addr_node_valid(addr))
@@ -319,8 +318,7 @@ static struct sk_buff *cfg_set_remote_mng(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
tipc_remote_management = (value != 0);
return tipc_cfg_reply_none();
}
@@ -332,8 +330,7 @@ static struct sk_buff *cfg_set_max_publications(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value != delimit(value, 1, 65535))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max publications must be 1-65535)");
@@ -348,8 +345,7 @@ static struct sk_buff *cfg_set_max_subscriptions(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value != delimit(value, 1, 65535))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max subscriptions must be 1-65535");
@@ -363,8 +359,7 @@ static struct sk_buff *cfg_set_max_ports(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value == tipc_max_ports)
return tipc_cfg_reply_none();
if (value != delimit(value, 127, 65535))
@@ -383,8 +378,7 @@ static struct sk_buff *cfg_set_max_zones(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value == tipc_max_zones)
return tipc_cfg_reply_none();
if (value != delimit(value, 1, 255))
@@ -403,8 +397,7 @@ static struct sk_buff *cfg_set_max_clusters(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value != delimit(value, 1, 1))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (max clusters fixed at 1)");
@@ -417,8 +410,7 @@ static struct sk_buff *cfg_set_max_nodes(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value == tipc_max_nodes)
return tipc_cfg_reply_none();
if (value != delimit(value, 8, 2047))
@@ -437,8 +429,7 @@ static struct sk_buff *cfg_set_max_slaves(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value != 0)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (max secondary nodes fixed at 0)");
@@ -451,8 +442,7 @@ static struct sk_buff *cfg_set_netid(void)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value == tipc_net_id)
return tipc_cfg_reply_none();
if (value != delimit(value, 1, 9999))
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index d8af4c28695..627f99b7afd 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -393,8 +393,7 @@ struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = *(u32 *)TLV_DATA(req_tlv_area);
- value = ntohl(value);
+ value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (value != delimit(value, 0, 32768))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (log size must be 0-32768)");
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index ae6ddf00a1a..eb80778d6d9 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -42,7 +42,7 @@ struct queue_item {
unsigned long data;
};
-static kmem_cache_t *tipc_queue_item_cache;
+static struct kmem_cache *tipc_queue_item_cache;
static struct list_head signal_queue_head;
static DEFINE_SPINLOCK(qitem_lock);
static int handler_enabled = 0;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 03bd659c43c..7bf87cb26ef 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -66,11 +66,11 @@
*/
struct distr_item {
- u32 type;
- u32 lower;
- u32 upper;
- u32 ref;
- u32 key;
+ __be32 type;
+ __be32 lower;
+ __be32 upper;
+ __be32 ref;
+ __be32 key;
};
/**
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 886bda5e88d..4111a31def7 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -60,7 +60,7 @@ struct node *tipc_node_create(u32 addr)
struct node *n_ptr;
struct node **curr_node;
- n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC);
+ n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC);
if (!n_ptr) {
warn("Node creation failed, no memory\n");
return NULL;
@@ -75,7 +75,6 @@ struct node *tipc_node_create(u32 addr)
return NULL;
}
- memset(n_ptr, 0, sizeof(*n_ptr));
n_ptr->addr = addr;
spin_lock_init(&n_ptr->lock);
INIT_LIST_HEAD(&n_ptr->nsub);
@@ -597,8 +596,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- domain = *(u32 *)TLV_DATA(req_tlv_area);
- domain = ntohl(domain);
+ domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (!tipc_addr_domain_valid(domain))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (network address)");
@@ -642,8 +640,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- domain = *(u32 *)TLV_DATA(req_tlv_area);
- domain = ntohl(domain);
+ domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (!tipc_addr_domain_valid(domain))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (network address)");
@@ -664,8 +661,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
/* Add TLV for broadcast link */
- link_info.dest = tipc_own_addr & 0xfffff00;
- link_info.dest = htonl(link_info.dest);
+ link_info.dest = htonl(tipc_own_addr & 0xfffff00);
link_info.up = htonl(1);
sprintf(link_info.str, tipc_bclink_name);
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
diff --git a/net/tipc/port.c b/net/tipc/port.c
index c1a1a76759b..b7f3199523c 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1136,11 +1136,12 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
int res = -EINVAL;
p_ptr = tipc_port_lock(ref);
+ if (!p_ptr)
+ return -EINVAL;
+
dbg("tipc_publ %u, p_ptr = %x, conn = %x, scope = %x, "
"lower = %u, upper = %u\n",
ref, p_ptr, p_ptr->publ.connected, scope, seq->lower, seq->upper);
- if (!p_ptr)
- return -EINVAL;
if (p_ptr->publ.connected)
goto exit;
if (seq->lower > seq->upper)
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 7a918f12a5d..ddade7388aa 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -350,7 +350,7 @@ static void subscr_subscribe(struct tipc_subscr *s,
/* Allocate subscription object */
- sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
+ sub = kzalloc(sizeof(*sub), GFP_ATOMIC);
if (!sub) {
warn("Subscription rejected, no memory\n");
subscr_terminate(subscriber);
@@ -359,7 +359,6 @@ static void subscr_subscribe(struct tipc_subscr *s,
/* Initialize subscription object */
- memset(sub, 0, sizeof(*sub));
sub->seq.type = htohl(s->seq.type, subscriber->swap);
sub->seq.lower = htohl(s->seq.lower, subscriber->swap);
sub->seq.upper = htohl(s->seq.upper, subscriber->swap);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b43a27828df..2f208c7f4d4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -151,8 +151,9 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
* each socket state is protected by separate rwlock.
*/
-static inline unsigned unix_hash_fold(unsigned hash)
+static inline unsigned unix_hash_fold(__wsum n)
{
+ unsigned hash = (__force unsigned)n;
hash ^= hash>>16;
hash ^= hash>>8;
return hash&(UNIX_HASH_SIZE-1);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 746c2f4a5fa..f14ad6635fc 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ atomic_t unix_tot_inflight = ATOMIC_INIT(0);
static struct sock *unix_get_socket(struct file *filp)
{
struct sock *u_sock = NULL;
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = filp->f_path.dentry->d_inode;
/*
* Socket ?
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index 6f39faa1583..c2059733e15 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -13,7 +13,7 @@
* Due Credit:
* Wanpipe socket layer is based on Packet and
* the X25 socket layers. The above sockets were
-* used for the specific use of Sangoma Technoloiges
+* used for the specific use of Sangoma Technologies
* API programs.
* Packet socket Authors: Ross Biro, Fred N. van Kempen and
* Alan Cox.
@@ -23,7 +23,7 @@
* Apr 25, 2000 Nenad Corbic o Added the ability to send zero length packets.
* Mar 13, 2000 Nenad Corbic o Added a tx buffer check via ioctl call.
* Mar 06, 2000 Nenad Corbic o Fixed the corrupt sock lcn problem.
-* Server and client applicaton can run
+* Server and client application can run
* simultaneously without conflicts.
* Feb 29, 2000 Nenad Corbic o Added support for PVC protocols, such as
* CHDLC, Frame Relay and HDLC API.
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 9479659277a..769cdd62c1b 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -3,7 +3,7 @@
*
* This module is completely hardware-independent and provides
* the following common services for the WAN Link Drivers:
-* o WAN device managenment (registering, unregistering)
+* o WAN device management (registering, unregistering)
* o Network interface management
* o Physical connection management (dial-up, incoming calls)
* o Logical connection management (switched virtual circuits)
@@ -62,63 +62,6 @@
#define KMEM_SAFETYZONE 8
-/***********FOR DEBUGGING PURPOSES*********************************************
-static void * dbg_kmalloc(unsigned int size, int prio, int line) {
- int i = 0;
- void * v = kmalloc(size+sizeof(unsigned int)+2*KMEM_SAFETYZONE*8,prio);
- char * c1 = v;
- c1 += sizeof(unsigned int);
- *((unsigned int *)v) = size;
-
- for (i = 0; i < KMEM_SAFETYZONE; i++) {
- c1[0] = 'D'; c1[1] = 'E'; c1[2] = 'A'; c1[3] = 'D';
- c1[4] = 'B'; c1[5] = 'E'; c1[6] = 'E'; c1[7] = 'F';
- c1 += 8;
- }
- c1 += size;
- for (i = 0; i < KMEM_SAFETYZONE; i++) {
- c1[0] = 'M'; c1[1] = 'U'; c1[2] = 'N'; c1[3] = 'G';
- c1[4] = 'W'; c1[5] = 'A'; c1[6] = 'L'; c1[7] = 'L';
- c1 += 8;
- }
- v = ((char *)v) + sizeof(unsigned int) + KMEM_SAFETYZONE*8;
- printk(KERN_INFO "line %d kmalloc(%d,%d) = %p\n",line,size,prio,v);
- return v;
-}
-static void dbg_kfree(void * v, int line) {
- unsigned int * sp = (unsigned int *)(((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8));
- unsigned int size = *sp;
- char * c1 = ((char *)v) - KMEM_SAFETYZONE*8;
- int i = 0;
- for (i = 0; i < KMEM_SAFETYZONE; i++) {
- if ( c1[0] != 'D' || c1[1] != 'E' || c1[2] != 'A' || c1[3] != 'D'
- || c1[4] != 'B' || c1[5] != 'E' || c1[6] != 'E' || c1[7] != 'F') {
- printk(KERN_INFO "kmalloced block at %p has been corrupted (underrun)!\n",v);
- printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8,
- c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] );
- }
- c1 += 8;
- }
- c1 += size;
- for (i = 0; i < KMEM_SAFETYZONE; i++) {
- if ( c1[0] != 'M' || c1[1] != 'U' || c1[2] != 'N' || c1[3] != 'G'
- || c1[4] != 'W' || c1[5] != 'A' || c1[6] != 'L' || c1[7] != 'L'
- ) {
- printk(KERN_INFO "kmalloced block at %p has been corrupted (overrun):\n",v);
- printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8,
- c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] );
- }
- c1 += 8;
- }
- printk(KERN_INFO "line %d kfree(%p)\n",line,v);
- v = ((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8);
- kfree(v);
-}
-
-#define kmalloc(x,y) dbg_kmalloc(x,y,__LINE__)
-#define kfree(x) dbg_kfree(x,__LINE__)
-*****************************************************************************/
-
/*
* Function Prototypes
*/
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 5a0dbeb6bbe..6b381fc0383 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -119,6 +119,23 @@ static struct xfrm_algo_desc aalg_list[] = {
.sadb_alg_maxbits = 160
}
},
+{
+ .name = "xcbc(aes)",
+
+ .uinfo = {
+ .auth = {
+ .icv_truncbits = 96,
+ .icv_fullbits = 128,
+ }
+ },
+
+ .desc = {
+ .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC,
+ .sadb_alg_ivlen = 0,
+ .sadb_alg_minbits = 128,
+ .sadb_alg_maxbits = 128
+ }
+},
};
static struct xfrm_algo_desc ealg_list[] = {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index e8198a2c785..414f8907038 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -12,7 +12,7 @@
#include <net/ip.h>
#include <net/xfrm.h>
-static kmem_cache_t *secpath_cachep __read_mostly;
+static struct kmem_cache *secpath_cachep __read_mostly;
void __secpath_destroy(struct sec_path *sp)
{
@@ -27,7 +27,7 @@ struct sec_path *secpath_dup(struct sec_path *src)
{
struct sec_path *sp;
- sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC);
+ sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC);
if (!sp)
return NULL;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7736b23c3f0..bebd40e5a62 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -25,6 +25,7 @@
#include <linux/cache.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <linux/audit.h>
#include "xfrm_hash.h"
@@ -39,7 +40,7 @@ EXPORT_SYMBOL(xfrm_policy_count);
static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
-static kmem_cache_t *xfrm_dst_cache __read_mostly;
+static struct kmem_cache *xfrm_dst_cache __read_mostly;
static struct work_struct xfrm_policy_gc_work;
static HLIST_HEAD(xfrm_policy_gc_list);
@@ -50,6 +51,40 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family);
static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo);
+static inline int
+__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+{
+ return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
+ addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
+ !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
+ !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
+ (fl->proto == sel->proto || !sel->proto) &&
+ (fl->oif == sel->ifindex || !sel->ifindex);
+}
+
+static inline int
+__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+{
+ return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
+ addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
+ !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
+ !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
+ (fl->proto == sel->proto || !sel->proto) &&
+ (fl->oif == sel->ifindex || !sel->ifindex);
+}
+
+int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
+ unsigned short family)
+{
+ switch (family) {
+ case AF_INET:
+ return __xfrm4_selector_match(sel, fl);
+ case AF_INET6:
+ return __xfrm6_selector_match(sel, fl);
+ }
+ return 0;
+}
+
int xfrm_register_type(struct xfrm_type *type, unsigned short family)
{
struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
@@ -358,7 +393,7 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
xfrm_pol_put(policy);
}
-static void xfrm_policy_gc_task(void *data)
+static void xfrm_policy_gc_task(struct work_struct *work)
{
struct xfrm_policy *policy;
struct hlist_node *entry, *tmp;
@@ -546,7 +581,7 @@ static inline int xfrm_byidx_should_resize(int total)
static DEFINE_MUTEX(hash_resize_mutex);
-static void xfrm_hash_resize(void *__unused)
+static void xfrm_hash_resize(struct work_struct *__unused)
{
int dir, total;
@@ -563,7 +598,7 @@ static void xfrm_hash_resize(void *__unused)
mutex_unlock(&hash_resize_mutex);
}
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
/* Generate new index... KAME seems to generate them ordered by cost
* of an absolute inpredictability of ordering of rules. This will not pass. */
@@ -770,7 +805,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
}
EXPORT_SYMBOL(xfrm_policy_byid);
-void xfrm_policy_flush(u8 type)
+void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
{
int dir;
@@ -790,6 +825,9 @@ void xfrm_policy_flush(u8 type)
hlist_del(&pol->byidx);
write_unlock_bh(&xfrm_policy_lock);
+ xfrm_audit_log(audit_info->loginuid, audit_info->secid,
+ AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL);
+
xfrm_policy_kill(pol);
killed++;
@@ -808,6 +846,11 @@ void xfrm_policy_flush(u8 type)
hlist_del(&pol->byidx);
write_unlock_bh(&xfrm_policy_lock);
+ xfrm_audit_log(audit_info->loginuid,
+ audit_info->secid,
+ AUDIT_MAC_IPSEC_DELSPD, 1,
+ pol, NULL);
+
xfrm_policy_kill(pol);
killed++;
@@ -826,33 +869,12 @@ EXPORT_SYMBOL(xfrm_policy_flush);
int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
void *data)
{
- struct xfrm_policy *pol;
+ struct xfrm_policy *pol, *last = NULL;
struct hlist_node *entry;
- int dir, count, error;
+ int dir, last_dir = 0, count, error;
read_lock_bh(&xfrm_policy_lock);
count = 0;
- for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
- struct hlist_head *table = xfrm_policy_bydst[dir].table;
- int i;
-
- hlist_for_each_entry(pol, entry,
- &xfrm_policy_inexact[dir], bydst) {
- if (pol->type == type)
- count++;
- }
- for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
- hlist_for_each_entry(pol, entry, table + i, bydst) {
- if (pol->type == type)
- count++;
- }
- }
- }
-
- if (count == 0) {
- error = -ENOENT;
- goto out;
- }
for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
struct hlist_head *table = xfrm_policy_bydst[dir].table;
@@ -862,21 +884,37 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*)
&xfrm_policy_inexact[dir], bydst) {
if (pol->type != type)
continue;
- error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
- if (error)
- goto out;
+ if (last) {
+ error = func(last, last_dir % XFRM_POLICY_MAX,
+ count, data);
+ if (error)
+ goto out;
+ }
+ last = pol;
+ last_dir = dir;
+ count++;
}
for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
hlist_for_each_entry(pol, entry, table + i, bydst) {
if (pol->type != type)
continue;
- error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
- if (error)
- goto out;
+ if (last) {
+ error = func(last, last_dir % XFRM_POLICY_MAX,
+ count, data);
+ if (error)
+ goto out;
+ }
+ last = pol;
+ last_dir = dir;
+ count++;
}
}
}
- error = 0;
+ if (count == 0) {
+ error = -ENOENT;
+ goto out;
+ }
+ error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
out:
read_unlock_bh(&xfrm_policy_lock);
return error;
@@ -1177,6 +1215,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
if (tmpl->mode == XFRM_MODE_TUNNEL) {
remote = &tmpl->id.daddr;
local = &tmpl->saddr;
+ family = tmpl->encap_family;
if (xfrm_addr_any(local, family)) {
error = xfrm_get_saddr(&tmp, remote, family);
if (error)
@@ -1894,7 +1933,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
return 0;
- if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol))
+ if (fl && pol &&
+ !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
return 0;
if (dst->xfrm->km.state != XFRM_STATE_VALID)
return 0;
@@ -1946,6 +1986,117 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
EXPORT_SYMBOL(xfrm_bundle_ok);
+#ifdef CONFIG_AUDITSYSCALL
+/* Audit addition and deletion of SAs and ipsec policy */
+
+void xfrm_audit_log(uid_t auid, u32 sid, int type, int result,
+ struct xfrm_policy *xp, struct xfrm_state *x)
+{
+
+ char *secctx;
+ u32 secctx_len;
+ struct xfrm_sec_ctx *sctx = NULL;
+ struct audit_buffer *audit_buf;
+ int family;
+ extern int audit_enabled;
+
+ if (audit_enabled == 0)
+ return;
+
+ audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
+ if (audit_buf == NULL)
+ return;
+
+ switch(type) {
+ case AUDIT_MAC_IPSEC_ADDSA:
+ audit_log_format(audit_buf, "SAD add: auid=%u", auid);
+ break;
+ case AUDIT_MAC_IPSEC_DELSA:
+ audit_log_format(audit_buf, "SAD delete: auid=%u", auid);
+ break;
+ case AUDIT_MAC_IPSEC_ADDSPD:
+ audit_log_format(audit_buf, "SPD add: auid=%u", auid);
+ break;
+ case AUDIT_MAC_IPSEC_DELSPD:
+ audit_log_format(audit_buf, "SPD delete: auid=%u", auid);
+ break;
+ default:
+ return;
+ }
+
+ if (sid != 0 &&
+ security_secid_to_secctx(sid, &secctx, &secctx_len) == 0)
+ audit_log_format(audit_buf, " subj=%s", secctx);
+ else
+ audit_log_task_context(audit_buf);
+
+ if (xp) {
+ family = xp->selector.family;
+ if (xp->security)
+ sctx = xp->security;
+ } else {
+ family = x->props.family;
+ if (x->security)
+ sctx = x->security;
+ }
+
+ if (sctx)
+ audit_log_format(audit_buf,
+ " sec_alg=%u sec_doi=%u sec_obj=%s",
+ sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str);
+
+ switch(family) {
+ case AF_INET:
+ {
+ struct in_addr saddr, daddr;
+ if (xp) {
+ saddr.s_addr = xp->selector.saddr.a4;
+ daddr.s_addr = xp->selector.daddr.a4;
+ } else {
+ saddr.s_addr = x->props.saddr.a4;
+ daddr.s_addr = x->id.daddr.a4;
+ }
+ audit_log_format(audit_buf,
+ " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
+ NIPQUAD(saddr), NIPQUAD(daddr));
+ }
+ break;
+ case AF_INET6:
+ {
+ struct in6_addr saddr6, daddr6;
+ if (xp) {
+ memcpy(&saddr6, xp->selector.saddr.a6,
+ sizeof(struct in6_addr));
+ memcpy(&daddr6, xp->selector.daddr.a6,
+ sizeof(struct in6_addr));
+ } else {
+ memcpy(&saddr6, x->props.saddr.a6,
+ sizeof(struct in6_addr));
+ memcpy(&daddr6, x->id.daddr.a6,
+ sizeof(struct in6_addr));
+ }
+ audit_log_format(audit_buf,
+ " src=" NIP6_FMT "dst=" NIP6_FMT,
+ NIP6(saddr6), NIP6(daddr6));
+ }
+ break;
+ }
+
+ if (x)
+ audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s",
+ (unsigned long)ntohl(x->id.spi),
+ (unsigned long)ntohl(x->id.spi),
+ x->id.proto == IPPROTO_AH ? "AH" :
+ (x->id.proto == IPPROTO_ESP ?
+ "ESP" : "IPCOMP"));
+
+ audit_log_format(audit_buf, " res=%u", result);
+ audit_log_end(audit_buf);
+}
+
+EXPORT_SYMBOL(xfrm_audit_log);
+#endif /* CONFIG_AUDITSYSCALL */
+
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
int err = 0;
@@ -2080,7 +2231,7 @@ static void __init xfrm_policy_init(void)
panic("XFRM: failed to allocate bydst hash\n");
}
- INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
+ INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
register_netdevice_notifier(&xfrm_dev_notifier);
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 84bbf8474f3..fdb08d9f34a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/cache.h>
#include <asm/uaccess.h>
+#include <linux/audit.h>
#include "xfrm_hash.h"
@@ -115,7 +116,7 @@ static unsigned long xfrm_hash_new_size(void)
static DEFINE_MUTEX(hash_resize_mutex);
-static void xfrm_hash_resize(void *__unused)
+static void xfrm_hash_resize(struct work_struct *__unused)
{
struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
unsigned long nsize, osize;
@@ -168,7 +169,7 @@ out_unlock:
mutex_unlock(&hash_resize_mutex);
}
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
DECLARE_WAIT_QUEUE_HEAD(km_waitq);
EXPORT_SYMBOL(km_waitq);
@@ -207,7 +208,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
kfree(x);
}
-static void xfrm_state_gc_task(void *data)
+static void xfrm_state_gc_task(struct work_struct *data)
{
struct xfrm_state *x;
struct hlist_node *entry, *tmp;
@@ -238,6 +239,7 @@ static void xfrm_timer_handler(unsigned long data)
unsigned long now = (unsigned long)xtime.tv_sec;
long next = LONG_MAX;
int warn = 0;
+ int err = 0;
spin_lock(&x->lock);
if (x->km.state == XFRM_STATE_DEAD)
@@ -295,9 +297,14 @@ expired:
next = 2;
goto resched;
}
- if (!__xfrm_state_delete(x) && x->id.spi)
+
+ err = __xfrm_state_delete(x);
+ if (!err && x->id.spi)
km_state_expired(x, 1, 0);
+ xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
+ AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
out:
spin_unlock(&x->lock);
}
@@ -384,9 +391,10 @@ int xfrm_state_delete(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete);
-void xfrm_state_flush(u8 proto)
+void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
{
int i;
+ int err = 0;
spin_lock_bh(&xfrm_state_lock);
for (i = 0; i <= xfrm_state_hmask; i++) {
@@ -399,7 +407,11 @@ restart:
xfrm_state_hold(x);
spin_unlock_bh(&xfrm_state_lock);
- xfrm_state_delete(x);
+ err = xfrm_state_delete(x);
+ xfrm_audit_log(audit_info->loginuid,
+ audit_info->secid,
+ AUDIT_MAC_IPSEC_DELSA,
+ err ? 0 : 1, NULL, x);
xfrm_state_put(x);
spin_lock_bh(&xfrm_state_lock);
@@ -505,6 +517,14 @@ __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
x->id.proto, family);
}
+static void xfrm_hash_grow_check(int have_hash_collision)
+{
+ if (have_hash_collision &&
+ (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
+ xfrm_state_num > xfrm_state_hmask)
+ schedule_work(&xfrm_hash_work);
+}
+
struct xfrm_state *
xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
struct flowi *fl, struct xfrm_tmpl *tmpl,
@@ -598,6 +618,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
add_timer(&x->timer);
+ xfrm_state_num++;
+ xfrm_hash_grow_check(x->bydst.next != NULL);
} else {
x->km.state = XFRM_STATE_DEAD;
xfrm_state_put(x);
@@ -614,14 +636,6 @@ out:
return x;
}
-static void xfrm_hash_grow_check(int have_hash_collision)
-{
- if (have_hash_collision &&
- (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
- xfrm_state_num > xfrm_state_hmask)
- schedule_work(&xfrm_hash_work);
-}
-
static void __xfrm_state_insert(struct xfrm_state *x)
{
unsigned int h;
@@ -1097,7 +1111,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
void *data)
{
int i;
- struct xfrm_state *x;
+ struct xfrm_state *x, *last = NULL;
struct hlist_node *entry;
int count = 0;
int err = 0;
@@ -1105,24 +1119,22 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
spin_lock_bh(&xfrm_state_lock);
for (i = 0; i <= xfrm_state_hmask; i++) {
hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
- if (xfrm_id_proto_match(x->id.proto, proto))
- count++;
+ if (!xfrm_id_proto_match(x->id.proto, proto))
+ continue;
+ if (last) {
+ err = func(last, count, data);
+ if (err)
+ goto out;
+ }
+ last = x;
+ count++;
}
}
if (count == 0) {
err = -ENOENT;
goto out;
}
-
- for (i = 0; i <= xfrm_state_hmask; i++) {
- hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
- if (!xfrm_id_proto_match(x->id.proto, proto))
- continue;
- err = func(x, --count, data);
- if (err)
- goto out;
- }
- }
+ err = func(last, 0, data);
out:
spin_unlock_bh(&xfrm_state_lock);
return err;
@@ -1302,7 +1314,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
}
EXPORT_SYMBOL(km_query);
-int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
{
int err = -EINVAL;
struct xfrm_mgr *km;
@@ -1566,6 +1578,6 @@ void __init xfrm_state_init(void)
panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
- INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
+ INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2b2e59d8ffb..e5372b11fc8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,6 +31,7 @@
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#include <linux/in6.h>
#endif
+#include <linux/audit.h>
static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
{
@@ -244,11 +245,10 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
*props = algo->desc.sadb_alg_id;
len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8;
- p = kmalloc(len, GFP_KERNEL);
+ p = kmemdup(ualg, len, GFP_KERNEL);
if (!p)
return -ENOMEM;
- memcpy(p, ualg, len);
strcpy(p->alg_name, algo->name);
*algpp = p;
return 0;
@@ -263,11 +263,10 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
return 0;
uencap = RTA_DATA(rta);
- p = kmalloc(sizeof(*p), GFP_KERNEL);
+ p = kmemdup(uencap, sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
- memcpy(p, uencap, sizeof(*p));
*encapp = p;
return 0;
}
@@ -305,11 +304,10 @@ static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg)
return 0;
uaddrp = RTA_DATA(rta);
- p = kmalloc(sizeof(*p), GFP_KERNEL);
+ p = kmemdup(uaddrp, sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
- memcpy(p, uaddrp, sizeof(*p));
*addrpp = p;
return 0;
}
@@ -323,7 +321,7 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
x->props.replay_window = p->replay_window;
x->props.reqid = p->reqid;
x->props.family = p->family;
- x->props.saddr = p->saddr;
+ memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
x->props.flags = p->flags;
}
@@ -457,6 +455,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
else
err = xfrm_state_update(x);
+ xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+ AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x);
+
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
__xfrm_state_put(x);
@@ -495,6 +496,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
goto out;
}
+ err = -ESRCH;
x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto,
p->family);
}
@@ -525,6 +527,10 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
}
err = xfrm_state_delete(x);
+
+ xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+ AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+
if (err < 0)
goto out;
@@ -545,7 +551,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
memcpy(&p->lft, &x->lft, sizeof(p->lft));
memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
memcpy(&p->stats, &x->stats, sizeof(p->stats));
- p->saddr = x->props.saddr;
+ memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr));
p->mode = x->props.mode;
p->replay_window = x->props.replay_window;
p->reqid = x->props.reqid;
@@ -652,7 +658,6 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
if (!skb)
return ERR_PTR(-ENOMEM);
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
info.in_skb = in_skb;
info.out_skb = skb;
info.nlmsg_seq = seq;
@@ -772,7 +777,7 @@ out_noput:
return err;
}
-static int verify_policy_dir(__u8 dir)
+static int verify_policy_dir(u8 dir)
{
switch (dir) {
case XFRM_POLICY_IN:
@@ -787,7 +792,7 @@ static int verify_policy_dir(__u8 dir)
return 0;
}
-static int verify_policy_type(__u8 type)
+static int verify_policy_type(u8 type)
{
switch (type) {
case XFRM_POLICY_TYPE_MAIN:
@@ -874,22 +879,57 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
t->aalgos = ut->aalgos;
t->ealgos = ut->ealgos;
t->calgos = ut->calgos;
+ t->encap_family = ut->family;
}
}
+static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
+{
+ int i;
+
+ if (nr > XFRM_MAX_DEPTH)
+ return -EINVAL;
+
+ for (i = 0; i < nr; i++) {
+ /* We never validated the ut->family value, so many
+ * applications simply leave it at zero. The check was
+ * never made and ut->family was ignored because all
+ * templates could be assumed to have the same family as
+ * the policy itself. Now that we will have ipv4-in-ipv6
+ * and ipv6-in-ipv4 tunnels, this is no longer true.
+ */
+ if (!ut[i].family)
+ ut[i].family = family;
+
+ switch (ut[i].family) {
+ case AF_INET:
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ break;
+#endif
+ default:
+ return -EINVAL;
+ };
+ }
+
+ return 0;
+}
+
static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
{
struct rtattr *rt = xfrma[XFRMA_TMPL-1];
- struct xfrm_user_tmpl *utmpl;
- int nr;
if (!rt) {
pol->xfrm_nr = 0;
} else {
- nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+ struct xfrm_user_tmpl *utmpl = RTA_DATA(rt);
+ int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+ int err;
- if (nr > XFRM_MAX_DEPTH)
- return -EINVAL;
+ err = validate_tmpl(nr, utmpl, pol->family);
+ if (err)
+ return err;
copy_templates(pol, RTA_DATA(rt), nr);
}
@@ -900,7 +940,7 @@ static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma)
{
struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1];
struct xfrm_userpolicy_type *upt;
- __u8 type = XFRM_POLICY_TYPE_MAIN;
+ u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
if (rt) {
@@ -998,6 +1038,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
* a type XFRM_MSG_UPDPOLICY - JHS */
excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
err = xfrm_policy_insert(p->dir, xp, excl);
+ xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+ AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
+
if (err) {
security_xfrm_policy_free(xp);
kfree(xp);
@@ -1027,7 +1070,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
memcpy(&up->id, &kp->id, sizeof(up->id));
- up->family = xp->family;
+ up->family = kp->encap_family;
memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
up->reqid = kp->reqid;
up->mode = kp->mode;
@@ -1082,12 +1125,12 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
}
#ifdef CONFIG_XFRM_SUB_POLICY
-static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+static int copy_to_user_policy_type(u8 type, struct sk_buff *skb)
{
struct xfrm_userpolicy_type upt;
memset(&upt, 0, sizeof(upt));
- upt.type = xp->type;
+ upt.type = type;
RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
@@ -1098,7 +1141,7 @@ rtattr_failure:
}
#else
-static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+static inline int copy_to_user_policy_type(u8 type, struct sk_buff *skb)
{
return 0;
}
@@ -1127,7 +1170,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
goto nlmsg_failure;
if (copy_to_user_sec_ctx(xp, skb))
goto nlmsg_failure;
- if (copy_to_user_policy_type(xp, skb) < 0)
+ if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
@@ -1170,7 +1213,6 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
if (!skb)
return ERR_PTR(-ENOMEM);
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
info.in_skb = in_skb;
info.out_skb = skb;
info.nlmsg_seq = seq;
@@ -1189,7 +1231,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
{
struct xfrm_policy *xp;
struct xfrm_userpolicy_id *p;
- __u8 type = XFRM_POLICY_TYPE_MAIN;
+ u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
struct km_event c;
int delete;
@@ -1226,6 +1268,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
security_xfrm_policy_free(&tmp);
}
+ if (delete)
+ xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+ AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
+
if (xp == NULL)
return -ENOENT;
@@ -1260,8 +1306,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma
{
struct km_event c;
struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
+ struct xfrm_audit audit_info;
- xfrm_state_flush(p->proto);
+ audit_info.loginuid = NETLINK_CB(skb).loginuid;
+ audit_info.secid = NETLINK_CB(skb).sid;
+ xfrm_state_flush(p->proto, &audit_info);
c.data.proto = p->proto;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
@@ -1283,10 +1332,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
id = NLMSG_DATA(nlh);
nlh->nlmsg_flags = 0;
- id->sa_id.daddr = x->id.daddr;
+ memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr));
id->sa_id.spi = x->id.spi;
id->sa_id.family = x->props.family;
id->sa_id.proto = x->id.proto;
+ memcpy(&id->saddr, &x->props.saddr,sizeof(x->props.saddr));
+ id->reqid = x->props.reqid;
id->flags = c->data.aevent;
RTA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay);
@@ -1407,14 +1458,17 @@ out:
static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
{
struct km_event c;
- __u8 type = XFRM_POLICY_TYPE_MAIN;
+ u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
+ struct xfrm_audit audit_info;
err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
if (err)
return err;
- xfrm_policy_flush(type);
+ audit_info.loginuid = NETLINK_CB(skb).loginuid;
+ audit_info.secid = NETLINK_CB(skb).sid;
+ xfrm_policy_flush(type, &audit_info);
c.data.type = type;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
@@ -1428,7 +1482,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
struct xfrm_policy *xp;
struct xfrm_user_polexpire *up = NLMSG_DATA(nlh);
struct xfrm_userpolicy_info *p = &up->pol;
- __u8 type = XFRM_POLICY_TYPE_MAIN;
+ u8 type = XFRM_POLICY_TYPE_MAIN;
int err = -ENOENT;
err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
@@ -1469,6 +1523,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
err = 0;
if (up->hard) {
xfrm_policy_delete(xp, p->dir);
+ xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+ AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL);
+
} else {
// reset the timers here?
printk("Dont know what to do with soft policy expire\n");
@@ -1500,8 +1557,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **
goto out;
km_state_expired(x, ue->hard, current->pid);
- if (ue->hard)
+ if (ue->hard) {
__xfrm_state_delete(x);
+ xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
+ AUDIT_MAC_IPSEC_DELSA, 1, NULL, x);
+ }
out:
spin_unlock_bh(&x->lock);
xfrm_state_put(x);
@@ -1530,7 +1590,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xf
}
/* build an XP */
- xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); if (!xp) {
+ xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err);
+ if (!xp) {
kfree(x);
return err;
}
@@ -1907,7 +1968,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
goto nlmsg_failure;
if (copy_to_user_state_sec_ctx(x, skb))
goto nlmsg_failure;
- if (copy_to_user_policy_type(xp, skb) < 0)
+ if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
@@ -1927,6 +1988,9 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
+#ifdef CONFIG_XFRM_SUB_POLICY
+ len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
+#endif
skb = alloc_skb(len, GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
@@ -1976,7 +2040,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
return NULL;
nr = ((len - sizeof(*p)) / sizeof(*ut));
- if (nr > XFRM_MAX_DEPTH)
+ if (validate_tmpl(nr, ut, p->sel.family))
return NULL;
if (p->dir > XFRM_POLICY_OUT)
@@ -2014,7 +2078,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
goto nlmsg_failure;
if (copy_to_user_sec_ctx(xp, skb))
goto nlmsg_failure;
- if (copy_to_user_policy_type(xp, skb) < 0)
+ if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
upe->hard = !!hard;
@@ -2034,6 +2098,9 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
+#ifdef CONFIG_XFRM_SUB_POLICY
+ len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
+#endif
skb = alloc_skb(len, GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
@@ -2060,6 +2127,9 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
len += RTA_SPACE(headlen);
headlen = sizeof(*id);
}
+#ifdef CONFIG_XFRM_SUB_POLICY
+ len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
+#endif
len += NLMSG_SPACE(headlen);
skb = alloc_skb(len, GFP_ATOMIC);
@@ -2087,7 +2157,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
copy_to_user_policy(xp, p, dir);
if (copy_to_user_tmpl(xp, skb) < 0)
goto nlmsg_failure;
- if (copy_to_user_policy_type(xp, skb) < 0)
+ if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
@@ -2106,10 +2176,11 @@ static int xfrm_notify_policy_flush(struct km_event *c)
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned char *b;
+ int len = 0;
#ifdef CONFIG_XFRM_SUB_POLICY
- struct xfrm_userpolicy_type upt;
+ len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
#endif
- int len = NLMSG_LENGTH(0);
+ len += NLMSG_LENGTH(0);
skb = alloc_skb(len, GFP_ATOMIC);
if (skb == NULL)
@@ -2119,12 +2190,8 @@ static int xfrm_notify_policy_flush(struct km_event *c)
nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
nlh->nlmsg_flags = 0;
-
-#ifdef CONFIG_XFRM_SUB_POLICY
- memset(&upt, 0, sizeof(upt));
- upt.type = c->data.type;
- RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
-#endif
+ if (copy_to_user_policy_type(c->data.type, skb) < 0)
+ goto nlmsg_failure;
nlh->nlmsg_len = skb->tail - b;
@@ -2132,9 +2199,6 @@ static int xfrm_notify_policy_flush(struct km_event *c)
return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
nlmsg_failure:
-#ifdef CONFIG_XFRM_SUB_POLICY
-rtattr_failure:
-#endif
kfree_skb(skb);
return -1;
}