aboutsummaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig78
-rw-r--r--net/ipv6/Makefile11
-rw-r--r--net/ipv6/addrconf.c2519
-rw-r--r--net/ipv6/addrconf_core.c75
-rw-r--r--net/ipv6/addrlabel.c182
-rw-r--r--net/ipv6/af_inet6.c545
-rw-r--r--net/ipv6/ah6.c179
-rw-r--r--net/ipv6/anycast.c99
-rw-r--r--net/ipv6/datagram.c290
-rw-r--r--net/ipv6/esp6.c274
-rw-r--r--net/ipv6/exthdrs.c204
-rw-r--r--net/ipv6/exthdrs_core.c185
-rw-r--r--net/ipv6/exthdrs_offload.c41
-rw-r--r--net/ipv6/fib6_rules.c81
-rw-r--r--net/ipv6/icmp.c437
-rw-r--r--net/ipv6/inet6_connection_sock.c209
-rw-r--r--net/ipv6/inet6_hashtables.c143
-rw-r--r--net/ipv6/ip6_checksum.c124
-rw-r--r--net/ipv6/ip6_fib.c725
-rw-r--r--net/ipv6/ip6_flowlabel.c356
-rw-r--r--net/ipv6/ip6_gre.c1722
-rw-r--r--net/ipv6/ip6_icmp.c47
-rw-r--r--net/ipv6/ip6_input.c106
-rw-r--r--net/ipv6/ip6_offload.c337
-rw-r--r--net/ipv6/ip6_offload.h18
-rw-r--r--net/ipv6/ip6_output.c791
-rw-r--r--net/ipv6/ip6_tunnel.c837
-rw-r--r--net/ipv6/ip6_vti.c1160
-rw-r--r--net/ipv6/ip6mr.c530
-rw-r--r--net/ipv6/ipcomp6.c51
-rw-r--r--net/ipv6/ipv6_sockglue.c126
-rw-r--r--net/ipv6/mcast.c679
-rw-r--r--net/ipv6/mip6.c89
-rw-r--r--net/ipv6/ndisc.c1026
-rw-r--r--net/ipv6/netfilter.c79
-rw-r--r--net/ipv6/netfilter/Kconfig118
-rw-r--r--net/ipv6/netfilter/Makefile18
-rw-r--r--net/ipv6/netfilter/ip6_queue.c638
-rw-r--r--net/ipv6/netfilter/ip6_tables.c208
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c528
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c137
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c153
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c163
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c505
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c4
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c4
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c140
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c4
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c18
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c32
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c330
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c9
-rw-r--r--net/ipv6/netfilter/ip6table_security.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c369
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c126
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c292
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c21
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c128
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c205
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c88
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c76
-rw-r--r--net/ipv6/output_core.c98
-rw-r--r--net/ipv6/ping.c275
-rw-r--r--net/ipv6/proc.c81
-rw-r--r--net/ipv6/protocol.c37
-rw-r--r--net/ipv6/raw.c473
-rw-r--r--net/ipv6/reassembly.c240
-rw-r--r--net/ipv6/route.c2078
-rw-r--r--net/ipv6/sit.c1070
-rw-r--r--net/ipv6/syncookies.c152
-rw-r--r--net/ipv6/sysctl_net_ipv6.c96
-rw-r--r--net/ipv6/tcp_ipv6.c1290
-rw-r--r--net/ipv6/tcpv6_offload.c93
-rw-r--r--net/ipv6/tunnel6.c13
-rw-r--r--net/ipv6/udp.c785
-rw-r--r--net/ipv6/udp_impl.h41
-rw-r--r--net/ipv6/udp_offload.c140
-rw-r--r--net/ipv6/udplite.c16
-rw-r--r--net/ipv6/xfrm6_mode_beet.c16
-rw-r--r--net/ipv6/xfrm6_mode_ro.c3
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c32
-rw-r--r--net/ipv6/xfrm6_output.c107
-rw-r--r--net/ipv6/xfrm6_policy.c171
-rw-r--r--net/ipv6/xfrm6_protocol.c279
-rw-r--r--net/ipv6/xfrm6_state.c27
-rw-r--r--net/ipv6/xfrm6_tunnel.c35
89 files changed, 17639 insertions, 9070 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 36d7437ac05..438a73aa777 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -11,7 +11,7 @@ menuconfig IPV6
You will still be able to do traditional IPv4 networking as well.
For general information about IPv6, see
- <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+ <https://en.wikipedia.org/wiki/IPv6>.
For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
For specific information about IPv6 under Linux, read the HOWTO at
<http://www.bieringer.de/linux/IPv6/>.
@@ -21,24 +21,6 @@ menuconfig IPV6
if IPV6
-config IPV6_PRIVACY
- bool "IPv6: Privacy Extensions (RFC 3041) support"
- ---help---
- Privacy Extensions for Stateless Address Autoconfiguration in IPv6
- support. With this option, additional periodically-altered
- pseudo-random global-scope unicast address(es) will be assigned to
- your interface(s).
-
- We use our standard pseudo-random algorithm to generate the
- randomized interface identifier, instead of one described in RFC 3041.
-
- By default the kernel does not generate temporary addresses.
- To use temporary addresses, do
-
- echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
-
- See <file:Documentation/networking/ip-sysctl.txt> for details.
-
config IPV6_ROUTER_PREF
bool "IPv6: Router Preference (RFC 4191) support"
---help---
@@ -50,16 +32,15 @@ config IPV6_ROUTER_PREF
If unsure, say N.
config IPV6_ROUTE_INFO
- bool "IPv6: Route Information (RFC 4191) support (EXPERIMENTAL)"
- depends on IPV6_ROUTER_PREF && EXPERIMENTAL
+ bool "IPv6: Route Information (RFC 4191) support"
+ depends on IPV6_ROUTER_PREF
---help---
This is experimental support of Route Information.
If unsure, say N.
config IPV6_OPTIMISTIC_DAD
- bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ bool "IPv6: Enable RFC 4429 Optimistic DAD"
---help---
This is experimental support for optimistic Duplicate
Address Detection. It allows for autoconfigured addresses
@@ -69,7 +50,7 @@ config IPV6_OPTIMISTIC_DAD
config INET6_AH
tristate "IPv6: AH transformation"
- select XFRM
+ select XFRM_ALGO
select CRYPTO
select CRYPTO_HMAC
select CRYPTO_MD5
@@ -81,7 +62,7 @@ config INET6_AH
config INET6_ESP
tristate "IPv6: ESP transformation"
- select XFRM
+ select XFRM_ALGO
select CRYPTO
select CRYPTO_AUTHENC
select CRYPTO_HMAC
@@ -105,8 +86,7 @@ config INET6_IPCOMP
If unsure, say Y.
config IPV6_MIP6
- tristate "IPv6: Mobility (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ tristate "IPv6: Mobility"
select XFRM
---help---
Support for IPv6 Mobility described in RFC 3775.
@@ -150,15 +130,27 @@ config INET6_XFRM_MODE_BEET
If unsure, say Y.
config INET6_XFRM_MODE_ROUTEOPTIMIZATION
- tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ tristate "IPv6: MIPv6 route optimization mode"
select XFRM
---help---
Support for MIPv6 route optimization mode.
+config IPV6_VTI
+tristate "Virtual (secure) IPv6: tunneling"
+ select IPV6_TUNNEL
+ select NET_IP_TUNNEL
+ depends on INET6_XFRM_MODE_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This can be used with xfrm mode tunnel to give
+ the notion of a secure tunnel for IPSEC and then use routing protocol
+ on top.
+
config IPV6_SIT
tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
select INET_TUNNEL
+ select NET_IP_TUNNEL
select IPV6_NDISC_NODETYPE
default y
---help---
@@ -171,8 +163,8 @@ config IPV6_SIT
Saying M here will produce a module called sit. If unsure, say Y.
config IPV6_SIT_6RD
- bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)"
- depends on IPV6_SIT && EXPERIMENTAL
+ bool "IPv6: IPv6 Rapid Deployment (6RD)"
+ depends on IPV6_SIT
default n
---help---
IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
@@ -201,9 +193,25 @@ config IPV6_TUNNEL
If unsure, say N.
+config IPV6_GRE
+ tristate "IPv6: GRE tunnel"
+ select IPV6_TUNNEL
+ select NET_IP_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This particular tunneling driver implements
+ GRE (Generic Routing Encapsulation) and at this time allows
+ encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
+ This driver is useful if the other endpoint is a Cisco router: Cisco
+ likes GRE much better than the other Linux tunneling driver ("IP
+ tunneling" above). In addition, GRE allows multicast redistribution
+ through the tunnel.
+
+ Saying M here will produce a module called ip6_gre. If unsure, say N.
+
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
- depends on EXPERIMENTAL
select FIB_RULES
---help---
Support multiple routing tables.
@@ -223,8 +231,8 @@ config IPV6_SUBTREES
If unsure, say N.
config IPV6_MROUTE
- bool "IPv6: multicast routing (EXPERIMENTAL)"
- depends on IPV6 && EXPERIMENTAL
+ bool "IPv6: multicast routing"
+ depends on IPV6
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
@@ -244,7 +252,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES
If unsure, say N.
config IPV6_PIMSM_V2
- bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
+ bool "IPv6: PIM-SM version 2 support"
depends on IPV6_MROUTE
---help---
Support for IPv6 PIM multicast routing protocol PIM-SMv2.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac..2fe68364bb2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,14 +7,16 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
- raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
+
ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
@@ -34,9 +36,12 @@ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
obj-$(CONFIG_NETFILTER) += netfilter/
+obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
-obj-y += addrconf_core.o exthdrs_core.o
+obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
+obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1023ad0d2b1..5667b3003af 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -38,6 +38,8 @@
* status etc.
*/
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -61,11 +63,14 @@
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/string.h>
+#include <linux/hash.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>
+#include <net/af_ieee802154.h>
+#include <net/firewire.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/ndisc.h>
@@ -77,24 +82,22 @@
#include <net/pkt_sched.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
-
-#ifdef CONFIG_IPV6_PRIVACY
+#include <linux/netconf.h>
#include <linux/random.h>
-#endif
-
#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/export.h>
/* Set to 3 to get tracing... */
#define ACONF_DEBUG 2
#if ACONF_DEBUG >= 3
-#define ADBG(x) printk x
+#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define ADBG(x)
+#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -104,10 +107,6 @@ static inline u32 cstamp_delta(unsigned long cstamp)
return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}
-#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
-#define ADDRCONF_TIMER_FUZZ (HZ / 4)
-#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
-
#ifdef CONFIG_SYSCTL
static void addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -121,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
}
#endif
-#ifdef CONFIG_IPV6_PRIVACY
-static int __ipv6_regen_rndid(struct inet6_dev *idev);
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void __ipv6_regen_rndid(struct inet6_dev *idev);
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static void ipv6_regen_rndid(unsigned long data);
-#endif
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -136,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -148,8 +147,13 @@ static void addrconf_type_change(struct net_device *dev,
unsigned long event);
static int addrconf_ifdown(struct net_device *dev, int how);
-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
-static void addrconf_dad_timer(unsigned long data);
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+ int plen,
+ const struct net_device *dev,
+ u32 flags, u32 noflags);
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp);
+static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
@@ -161,8 +165,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
struct net_device *dev);
-static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-
static struct ipv6_devconf ipv6_devconf __read_mostly = {
.forwarding = 0,
.hop_limit = IPV6_DEFAULT_HOPLIMIT,
@@ -171,17 +173,17 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_redirects = 1,
.autoconf = 1,
.force_mld_version = 0,
+ .mldv1_unsolicited_report_interval = 10 * HZ,
+ .mldv2_unsolicited_report_interval = HZ,
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
-#endif
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_pinfo = 1,
@@ -196,6 +198,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -205,17 +208,18 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_ra = 1,
.accept_redirects = 1,
.autoconf = 1,
+ .force_mld_version = 0,
+ .mldv1_unsolicited_report_interval = 10 * HZ,
+ .mldv2_unsolicited_report_interval = HZ,
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
-#endif
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_pinfo = 1,
@@ -230,121 +234,78 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
-/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
-
/* Check if a valid qdisc is available */
static inline bool addrconf_qdisc_ok(const struct net_device *dev)
{
return !qdisc_tx_is_noop(dev);
}
-/* Check if a route is valid prefix route */
-static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
+static void addrconf_del_rs_timer(struct inet6_dev *idev)
{
- return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
+ if (del_timer(&idev->rs_timer))
+ __in6_dev_put(idev);
}
-static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
{
- if (del_timer(&ifp->timer))
+ if (cancel_delayed_work(&ifp->dad_work))
__in6_ifa_put(ifp);
}
-enum addrconf_timer_t {
- AC_NONE,
- AC_DAD,
- AC_RS,
-};
+static void addrconf_mod_rs_timer(struct inet6_dev *idev,
+ unsigned long when)
+{
+ if (!timer_pending(&idev->rs_timer))
+ in6_dev_hold(idev);
+ mod_timer(&idev->rs_timer, jiffies + when);
+}
-static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
- enum addrconf_timer_t what,
- unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+ unsigned long delay)
{
- if (!del_timer(&ifp->timer))
+ if (!delayed_work_pending(&ifp->dad_work))
in6_ifa_hold(ifp);
-
- switch (what) {
- case AC_DAD:
- ifp->timer.function = addrconf_dad_timer;
- break;
- case AC_RS:
- ifp->timer.function = addrconf_rs_timer;
- break;
- default:
- break;
- }
- ifp->timer.expires = jiffies + when;
- add_timer(&ifp->timer);
+ mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
{
- if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ int i;
+
+ idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+ if (!idev->stats.ipv6)
goto err_ip;
- if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
- sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+
+ for_each_possible_cpu(i) {
+ struct ipstats_mib *addrconf_stats;
+ addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
+ u64_stats_init(&addrconf_stats->syncp);
+ }
+
+
+ idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
+ GFP_KERNEL);
+ if (!idev->stats.icmpv6dev)
goto err_icmp;
- if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
- sizeof(struct icmpv6msg_mib),
- __alignof__(struct icmpv6msg_mib)) < 0)
+ idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
+ GFP_KERNEL);
+ if (!idev->stats.icmpv6msgdev)
goto err_icmpmsg;
return 0;
err_icmpmsg:
- snmp_mib_free((void __percpu **)idev->stats.icmpv6);
+ kfree(idev->stats.icmpv6dev);
err_icmp:
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
err_ip:
return -ENOMEM;
}
-static void snmp6_free_dev(struct inet6_dev *idev)
-{
- snmp_mib_free((void __percpu **)idev->stats.icmpv6msg);
- snmp_mib_free((void __percpu **)idev->stats.icmpv6);
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
-}
-
-/* Nobody refers to this device, we may destroy it. */
-
-static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
-{
- struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
- kfree(idev);
-}
-
-void in6_dev_finish_destroy(struct inet6_dev *idev)
-{
- struct net_device *dev = idev->dev;
-
- WARN_ON(!list_empty(&idev->addr_list));
- WARN_ON(idev->mc_list != NULL);
-
-#ifdef NET_REFCNT_DEBUG
- printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
-#endif
- dev_put(dev);
- if (!idev->dead) {
- pr_warning("Freeing alive inet6 device %p\n", idev);
- return;
- }
- snmp6_free_dev(idev);
- call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
-}
-
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-
-static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
+static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
@@ -361,7 +322,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
rwlock_init(&ndev->lock);
ndev->dev = dev;
INIT_LIST_HEAD(&ndev->addr_list);
-
+ setup_timer(&ndev->rs_timer, addrconf_rs_timer,
+ (unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
@@ -376,19 +338,19 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG((KERN_WARNING
- "%s(): cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name));
+ ADBG(KERN_WARNING
+ "%s: cannot allocate memory for statistics; dev=%s.\n",
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
- ndev->dead = 1;
- in6_dev_finish_destroy(ndev);
+ dev_put(dev);
+ kfree(ndev);
return NULL;
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG((KERN_WARNING
- "%s(): cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name));
+ ADBG(KERN_WARNING
+ "%s: cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -403,16 +365,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
ndev->cnf.accept_dad = -1;
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
- printk(KERN_INFO
- "%s: Disabled Multicast RS\n",
- dev->name);
+ pr_info("%s: Disabled Multicast RS\n", dev->name);
ndev->cnf.rtr_solicits = 0;
}
#endif
-#ifdef CONFIG_IPV6_PRIVACY
INIT_LIST_HEAD(&ndev->tempaddr_list);
setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
if ((dev->flags&IFF_LOOPBACK) ||
@@ -420,15 +379,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
dev->type == ARPHRD_TUNNEL6 ||
dev->type == ARPHRD_SIT ||
dev->type == ARPHRD_NONE) {
- printk(KERN_INFO
- "%s: Disabled Privacy Extensions\n",
- dev->name);
ndev->cnf.use_tempaddr = -1;
} else {
in6_dev_hold(ndev);
ipv6_regen_rndid((unsigned long) ndev);
}
-#endif
+
+ ndev->token = in6addr_any;
if (netif_running(dev) && addrconf_qdisc_ok(dev))
ndev->if_flags |= IF_READY;
@@ -439,13 +396,20 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);
+ /* Join interface-local all-node multicast group */
+ ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
/* Join all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
+ /* Join all-router multicast group if forwarding is set */
+ if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+
return ndev;
}
-static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
+static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -463,6 +427,226 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
return idev;
}
+static int inet6_netconf_msgsize_devconf(int type)
+{
+ int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
+ + nla_total_size(4); /* NETCONFA_IFINDEX */
+
+ /* type -1 is used for ALL */
+ if (type == -1 || type == NETCONFA_FORWARDING)
+ size += nla_total_size(4);
+#ifdef CONFIG_IPV6_MROUTE
+ if (type == -1 || type == NETCONFA_MC_FORWARDING)
+ size += nla_total_size(4);
+#endif
+ if (type == -1 || type == NETCONFA_PROXY_NEIGH)
+ size += nla_total_size(4);
+
+ return size;
+}
+
+static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
+ struct ipv6_devconf *devconf, u32 portid,
+ u32 seq, int event, unsigned int flags,
+ int type)
+{
+ struct nlmsghdr *nlh;
+ struct netconfmsg *ncm;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
+ flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ncm = nlmsg_data(nlh);
+ ncm->ncm_family = AF_INET6;
+
+ if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
+ goto nla_put_failure;
+
+ /* type -1 is used for ALL */
+ if ((type == -1 || type == NETCONFA_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+ goto nla_put_failure;
+#ifdef CONFIG_IPV6_MROUTE
+ if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_MC_FORWARDING,
+ devconf->mc_forwarding) < 0)
+ goto nla_put_failure;
+#endif
+ if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
+ nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
+ struct ipv6_devconf *devconf)
+{
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
+ RTM_NEWNETCONF, 0, type);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ return;
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
+}
+
+static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
+ [NETCONFA_IFINDEX] = { .len = sizeof(int) },
+ [NETCONFA_FORWARDING] = { .len = sizeof(int) },
+ [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
+};
+
+static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
+ struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NETCONFA_MAX+1];
+ struct netconfmsg *ncm;
+ struct sk_buff *skb;
+ struct ipv6_devconf *devconf;
+ struct inet6_dev *in6_dev;
+ struct net_device *dev;
+ int ifindex;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
+ devconf_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ err = EINVAL;
+ if (!tb[NETCONFA_IFINDEX])
+ goto errout;
+
+ ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
+ switch (ifindex) {
+ case NETCONFA_IFINDEX_ALL:
+ devconf = net->ipv6.devconf_all;
+ break;
+ case NETCONFA_IFINDEX_DEFAULT:
+ devconf = net->ipv6.devconf_dflt;
+ break;
+ default:
+ dev = __dev_get_by_index(net, ifindex);
+ if (dev == NULL)
+ goto errout;
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev == NULL)
+ goto errout;
+ devconf = &in6_dev->cnf;
+ break;
+ }
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
+ -1);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout:
+ return err;
+}
+
+static int inet6_netconf_dump_devconf(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx, s_idx;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct hlist_head *head;
+
+ s_h = cb->args[0];
+ s_idx = idx = cb->args[1];
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ rcu_read_lock();
+ cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
+ net->dev_base_seq;
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ goto cont;
+
+ if (inet6_netconf_fill_devconf(skb, dev->ifindex,
+ &idev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ -1) <= 0) {
+ rcu_read_unlock();
+ goto done;
+ }
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+ }
+ if (h == NETDEV_HASHENTRIES) {
+ if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ -1) <= 0)
+ goto done;
+ else
+ h++;
+ }
+ if (h == NETDEV_HASHENTRIES + 1) {
+ if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ -1) <= 0)
+ goto done;
+ else
+ h++;
+ }
+done:
+ cb->args[0] = h;
+ cb->args[1] = idx;
+
+ return skb->len;
+}
+
#ifdef CONFIG_SYSCTL
static void dev_forward_change(struct inet6_dev *idev)
{
@@ -474,11 +658,16 @@ static void dev_forward_change(struct inet6_dev *idev)
dev = idev->dev;
if (idev->cnf.forwarding)
dev_disable_lro(dev);
- if (dev && (dev->flags & IFF_MULTICAST)) {
- if (idev->cnf.forwarding)
+ if (dev->flags & IFF_MULTICAST) {
+ if (idev->cnf.forwarding) {
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
- else
+ ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+ ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+ } else {
ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+ ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+ ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+ }
}
list_for_each_entry(ifa, &idev->addr_list, if_list) {
@@ -489,6 +678,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
addrconf_leave_anycast(ifa);
}
+ inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+ dev->ifindex, &idev->cnf);
}
@@ -497,8 +688,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -507,64 +697,68 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
dev_forward_change(idev);
}
}
- rcu_read_unlock();
}
-static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
+static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
{
struct net *net;
+ int old;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
net = (struct net *)table->extra2;
- if (p == &net->ipv6.devconf_dflt->forwarding)
+ old = *p;
+ *p = newf;
+
+ if (p == &net->ipv6.devconf_dflt->forwarding) {
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+ rtnl_unlock();
return 0;
-
- if (!rtnl_trylock()) {
- /* Restore the original values before restarting */
- *p = old;
- return restart_syscall();
}
if (p == &net->ipv6.devconf_all->forwarding) {
- __s32 newf = net->ipv6.devconf_all->forwarding;
net->ipv6.devconf_dflt->forwarding = newf;
addrconf_forward_change(net, newf);
- } else if ((!*p) ^ (!old))
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
+ } else if ((!newf) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
rtnl_unlock();
- if (*p)
+ if (newf)
rt6_purge_dflt_routers(net);
return 1;
}
#endif
-static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
-{
- struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
- kfree(ifp);
-}
-
/* Nobody refers to this ifaddr, destroy it */
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
{
WARN_ON(!hlist_unhashed(&ifp->addr_lst));
#ifdef NET_REFCNT_DEBUG
- printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
+ pr_debug("%s\n", __func__);
#endif
in6_dev_put(ifp->idev);
- if (del_timer(&ifp->timer))
- pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+ if (cancel_delayed_work(&ifp->dad_work))
+ pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+ ifp);
if (ifp->state != INET6_IFADDR_STATE_DEAD) {
- pr_warning("Freeing alive inet6 address %p\n", ifp);
+ pr_warn("Freeing alive inet6 address %p\n", ifp);
return;
}
- dst_release(&ifp->rt->dst);
+ ip6_rt_put(ifp->rt);
- call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
+ kfree_rcu(ifp, rcu);
}
static void
@@ -587,22 +781,17 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
list_add_tail(&ifp->if_list, p);
}
-static u32 ipv6_addr_hash(const struct in6_addr *addr)
+static u32 inet6_addr_hash(const struct in6_addr *addr)
{
- /*
- * We perform the hash function over the last 64 bits of the address
- * This will include the IEEE address token on links that support it.
- */
- return jhash_2words((__force u32)addr->s6_addr32[2],
- (__force u32)addr->s6_addr32[3], 0)
- & (IN6_ADDR_HSIZE - 1);
+ return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
}
/* On success it returns ifp with increased reference count */
static struct inet6_ifaddr *
-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
- int scope, u32 flags)
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
+ const struct in6_addr *peer_addr, int pfxlen,
+ int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
{
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
@@ -631,7 +820,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG(("ipv6_add_addr: already assigned\n"));
+ ADBG("ipv6_add_addr: already assigned\n");
err = -EEXIST;
goto out;
}
@@ -639,48 +828,44 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
if (ifa == NULL) {
- ADBG(("ipv6_add_addr: malloc failed\n"));
+ ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
- rt = addrconf_dst_alloc(idev, addr, 0);
+ rt = addrconf_dst_alloc(idev, addr, false);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto out;
}
- ipv6_addr_copy(&ifa->addr, addr);
+ neigh_parms_data_state_setall(idev->nd_parms);
+
+ ifa->addr = *addr;
+ if (peer_addr)
+ ifa->peer_addr = *peer_addr;
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- init_timer(&ifa->timer);
+ INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
INIT_HLIST_NODE(&ifa->addr_lst);
- ifa->timer.data = (unsigned long) ifa;
ifa->scope = scope;
ifa->prefix_len = pfxlen;
ifa->flags = flags | IFA_F_TENTATIVE;
+ ifa->valid_lft = valid_lft;
+ ifa->prefered_lft = prefered_lft;
ifa->cstamp = ifa->tstamp = jiffies;
+ ifa->tokenized = false;
ifa->rt = rt;
- /*
- * part one of RFC 4429, section 3.3
- * We should not configure an address as
- * optimistic if we do not yet know the link
- * layer address of our nexhop router
- */
-
- if (rt->rt6i_nexthop == NULL)
- ifa->flags &= ~IFA_F_OPTIMISTIC;
-
ifa->idev = idev;
in6_dev_hold(idev);
/* For caller */
in6_ifa_hold(ifa);
/* Add to big hash table */
- hash = ipv6_addr_hash(addr);
+ hash = inet6_addr_hash(addr);
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
spin_unlock(&addrconf_hash_lock);
@@ -689,12 +874,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
-#ifdef CONFIG_IPV6_PRIVACY
if (ifa->flags&IFA_F_TEMPORARY) {
list_add(&ifa->tmp_list, &idev->tempaddr_list);
in6_ifa_hold(ifa);
}
-#endif
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
@@ -702,7 +885,7 @@ out2:
rcu_read_unlock_bh();
if (likely(err == 0))
- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+ inet6addr_notifier_call_chain(NETDEV_UP, ifa);
else {
kfree(ifa);
ifa = ERR_PTR(err);
@@ -714,18 +897,97 @@ out:
goto out2;
}
+enum cleanup_prefix_rt_t {
+ CLEANUP_PREFIX_RT_NOP, /* no cleanup action for prefix route */
+ CLEANUP_PREFIX_RT_DEL, /* delete the prefix route */
+ CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */
+};
+
+/*
+ * Check, whether the prefix for ifp would still need a prefix route
+ * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_*
+ * constants.
+ *
+ * 1) we don't purge prefix if address was not permanent.
+ * prefix is managed by its own lifetime.
+ * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE.
+ * 3) if there are no addresses, delete prefix.
+ * 4) if there are still other permanent address(es),
+ * corresponding prefix is still permanent.
+ * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE,
+ * don't purge the prefix, assume user space is managing it.
+ * 6) otherwise, update prefix lifetime to the
+ * longest valid lifetime among the corresponding
+ * addresses on the device.
+ * Note: subsequent RA will update lifetime.
+ **/
+static enum cleanup_prefix_rt_t
+check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
+{
+ struct inet6_ifaddr *ifa;
+ struct inet6_dev *idev = ifp->idev;
+ unsigned long lifetime;
+ enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL;
+
+ *expires = jiffies;
+
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (ifa == ifp)
+ continue;
+ if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ ifp->prefix_len))
+ continue;
+ if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
+ return CLEANUP_PREFIX_RT_NOP;
+
+ action = CLEANUP_PREFIX_RT_EXPIRE;
+
+ spin_lock(&ifa->lock);
+
+ lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+ /*
+ * Note: Because this address is
+ * not permanent, lifetime <
+ * LONG_MAX / HZ here.
+ */
+ if (time_before(*expires, ifa->tstamp + lifetime * HZ))
+ *expires = ifa->tstamp + lifetime * HZ;
+ spin_unlock(&ifa->lock);
+ }
+
+ return action;
+}
+
+static void
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+{
+ struct rt6_info *rt;
+
+ rt = addrconf_get_prefix_route(&ifp->addr,
+ ifp->prefix_len,
+ ifp->idev->dev,
+ 0, RTF_GATEWAY | RTF_DEFAULT);
+ if (rt) {
+ if (del_rt)
+ ip6_del_rt(rt);
+ else {
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ rt6_set_expires(rt, expires);
+ ip6_rt_put(rt);
+ }
+ }
+}
+
+
/* This function wants to get referenced ifp and releases it before return */
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
- struct inet6_ifaddr *ifa, *ifn;
- struct inet6_dev *idev = ifp->idev;
int state;
- int hash;
- int deleted = 0, onlink = 0;
- unsigned long expires = jiffies;
+ enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+ unsigned long expires;
- hash = ipv6_addr_hash(&ifp->addr);
+ ASSERT_RTNL();
spin_lock_bh(&ifp->state_lock);
state = ifp->state;
@@ -739,8 +1001,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
hlist_del_init_rcu(&ifp->addr_lst);
spin_unlock_bh(&addrconf_hash_lock);
- write_lock_bh(&idev->lock);
-#ifdef CONFIG_IPV6_PRIVACY
+ write_lock_bh(&ifp->idev->lock);
+
if (ifp->flags&IFA_F_TEMPORARY) {
list_del(&ifp->tmp_list);
if (ifp->ifpub) {
@@ -749,105 +1011,44 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
__in6_ifa_put(ifp);
}
-#endif
- list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
- if (ifa == ifp) {
- list_del_init(&ifp->if_list);
- __in6_ifa_put(ifp);
+ if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
+ action = check_cleanup_prefix_route(ifp, &expires);
- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
- break;
- deleted = 1;
- continue;
- } else if (ifp->flags & IFA_F_PERMANENT) {
- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
- ifp->prefix_len)) {
- if (ifa->flags & IFA_F_PERMANENT) {
- onlink = 1;
- if (deleted)
- break;
- } else {
- unsigned long lifetime;
-
- if (!onlink)
- onlink = -1;
-
- spin_lock(&ifa->lock);
-
- lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
- /*
- * Note: Because this address is
- * not permanent, lifetime <
- * LONG_MAX / HZ here.
- */
- if (time_before(expires,
- ifa->tstamp + lifetime * HZ))
- expires = ifa->tstamp + lifetime * HZ;
- spin_unlock(&ifa->lock);
- }
- }
- }
- }
- write_unlock_bh(&idev->lock);
+ list_del_init(&ifp->if_list);
+ __in6_ifa_put(ifp);
- addrconf_del_timer(ifp);
+ write_unlock_bh(&ifp->idev->lock);
- ipv6_ifa_notify(RTM_DELADDR, ifp);
+ addrconf_del_dad_work(ifp);
- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
+ ipv6_ifa_notify(RTM_DELADDR, ifp);
- /*
- * Purge or update corresponding prefix
- *
- * 1) we don't purge prefix here if address was not permanent.
- * prefix is managed by its own lifetime.
- * 2) if there're no addresses, delete prefix.
- * 3) if there're still other permanent address(es),
- * corresponding prefix is still permanent.
- * 4) otherwise, update prefix lifetime to the
- * longest valid lifetime among the corresponding
- * addresses on the device.
- * Note: subsequent RA will update lifetime.
- *
- * --yoshfuji
- */
- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
- struct in6_addr prefix;
- struct rt6_info *rt;
- struct net *net = dev_net(ifp->idev->dev);
- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
- rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
+ inet6addr_notifier_call_chain(NETDEV_DOWN, ifp);
- if (rt && addrconf_is_prefix_route(rt)) {
- if (onlink == 0) {
- ip6_del_rt(rt);
- rt = NULL;
- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
- rt->rt6i_expires = expires;
- rt->rt6i_flags |= RTF_EXPIRES;
- }
- }
- dst_release(&rt->dst);
+ if (action != CLEANUP_PREFIX_RT_NOP) {
+ cleanup_prefix_route(ifp, expires,
+ action == CLEANUP_PREFIX_RT_DEL);
}
+ /* clean up prefsrc entries */
+ rt6_remove_prefsrc(ifp);
out:
in6_ifa_put(ifp);
}
-#ifdef CONFIG_IPV6_PRIVACY
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
+ unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
unsigned long regen_advance;
int tmp_plen;
int ret = 0;
- int max_addresses;
u32 addr_flags;
+ unsigned long now = jiffies;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (ift) {
spin_lock_bh(&ift->lock);
memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
@@ -859,9 +1060,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
retry:
in6_dev_hold(idev);
if (idev->cnf.use_tempaddr <= 0) {
- write_unlock(&idev->lock);
- printk(KERN_INFO
- "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+ write_unlock_bh(&idev->lock);
+ pr_info("%s: use_tempaddr is disabled\n", __func__);
in6_dev_put(idev);
ret = -1;
goto out;
@@ -870,27 +1070,18 @@ retry:
if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
idev->cnf.use_tempaddr = -1; /*XXX*/
spin_unlock_bh(&ifp->lock);
- write_unlock(&idev->lock);
- printk(KERN_WARNING
- "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+ write_unlock_bh(&idev->lock);
+ pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
+ __func__);
in6_dev_put(idev);
ret = -1;
goto out;
}
in6_ifa_hold(ifp);
memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
- spin_unlock_bh(&ifp->lock);
- write_unlock(&idev->lock);
- printk(KERN_WARNING
- "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
- }
+ __ipv6_try_regen_rndid(idev, tmpaddr);
memcpy(&addr.s6_addr[8], idev->rndid, 8);
- age = (jiffies - ifp->tstamp) / HZ;
+ age = (now - ifp->tstamp) / HZ;
tmp_valid_lft = min_t(__u32,
ifp->valid_lft,
idev->cnf.temp_valid_lft + age);
@@ -899,22 +1090,23 @@ retry:
idev->cnf.temp_prefered_lft + age -
idev->cnf.max_desync_factor);
tmp_plen = ifp->prefix_len;
- max_addresses = idev->cnf.max_addresses;
- tmp_cstamp = ifp->cstamp;
tmp_tstamp = ifp->tstamp;
spin_unlock_bh(&ifp->lock);
regen_advance = idev->cnf.regen_max_retry *
idev->cnf.dad_transmits *
- idev->nd_parms->retrans_time / HZ;
- write_unlock(&idev->lock);
+ NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
+ write_unlock_bh(&idev->lock);
/* A temporary address is created only if this calculated Preferred
* Lifetime is greater than REGEN_ADVANCE time units. In particular,
* an implementation must not create a temporary address with a zero
* Preferred Lifetime.
+ * Use age calculation as in addrconf_verify to avoid unnecessary
+ * temporary addresses being generated.
*/
- if (tmp_prefered_lft <= regen_advance) {
+ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+ if (tmp_prefered_lft <= regen_advance + age) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
@@ -926,36 +1118,30 @@ retry:
if (ifp->flags & IFA_F_OPTIMISTIC)
addr_flags |= IFA_F_OPTIMISTIC;
- ift = !max_addresses ||
- ipv6_count_addresses(idev) < max_addresses ?
- ipv6_add_addr(idev, &addr, tmp_plen,
- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
- addr_flags) : NULL;
- if (!ift || IS_ERR(ift)) {
+ ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
+ ipv6_addr_scope(&addr), addr_flags,
+ tmp_valid_lft, tmp_prefered_lft);
+ if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
- printk(KERN_INFO
- "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+ pr_info("%s: retry temporary address regeneration\n", __func__);
tmpaddr = &addr;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
goto retry;
}
spin_lock_bh(&ift->lock);
ift->ifpub = ifp;
- ift->valid_lft = tmp_valid_lft;
- ift->prefered_lft = tmp_prefered_lft;
- ift->cstamp = tmp_cstamp;
+ ift->cstamp = now;
ift->tstamp = tmp_tstamp;
spin_unlock_bh(&ift->lock);
- addrconf_dad_start(ift, 0);
+ addrconf_dad_start(ift);
in6_ifa_put(ift);
in6_dev_put(idev);
out:
return ret;
}
-#endif
/*
* Choose an appropriate source address (RFC3484)
@@ -970,9 +1156,7 @@ enum {
#endif
IPV6_SADDR_RULE_OIF,
IPV6_SADDR_RULE_LABEL,
-#ifdef CONFIG_IPV6_PRIVACY
IPV6_SADDR_RULE_PRIVACY,
-#endif
IPV6_SADDR_RULE_ORCHID,
IPV6_SADDR_RULE_PREFIX,
IPV6_SADDR_RULE_MAX
@@ -1043,7 +1227,7 @@ static int ipv6_get_saddr_eval(struct net *net,
* | d is scope of the destination.
* B-d | \
* | \ <- smaller scope is better if
- * B-15 | \ if scope is enough for destinaion.
+ * B-15 | \ if scope is enough for destination.
* | ret = B - scope (-1 <= scope >= d <= 15).
* d-C-1 | /
* |/ <- greater is better
@@ -1086,11 +1270,10 @@ static int ipv6_get_saddr_eval(struct net *net,
&score->ifa->addr, score->addr_type,
score->ifa->idev->dev->ifindex) == dst->label;
break;
-#ifdef CONFIG_IPV6_PRIVACY
case IPV6_SADDR_RULE_PRIVACY:
{
/* Rule 7: Prefer public address
- * Note: prefer temprary address if use_tempaddr >= 2
+ * Note: prefer temporary address if use_tempaddr >= 2
*/
int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
!!(dst->prefs & IPV6_PREFER_SRC_TMP) :
@@ -1098,7 +1281,6 @@ static int ipv6_get_saddr_eval(struct net *net,
ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
break;
}
-#endif
case IPV6_SADDR_RULE_ORCHID:
/* Rule 8-: Prefer ORCHID vs ORCHID or
* non-ORCHID vs non-ORCHID
@@ -1108,8 +1290,10 @@ static int ipv6_get_saddr_eval(struct net *net,
break;
case IPV6_SADDR_RULE_PREFIX:
/* Rule 8: Use longest matching prefix */
- score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr,
- dst->addr);
+ ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
+ if (ret > score->ifa->prefix_len)
+ ret = score->ifa->prefix_len;
+ score->matchlen = ret;
break;
default:
ret = 0;
@@ -1122,7 +1306,7 @@ out:
return ret;
}
-int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
@@ -1243,14 +1427,33 @@ try_nextdev:
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
- ipv6_addr_copy(saddr, &hiscore->ifa->addr);
+ *saddr = hiscore->ifa->addr;
in6_ifa_put(hiscore->ifa);
return 0;
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
+int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ u32 banned_flags)
+{
+ struct inet6_ifaddr *ifp;
+ int err = -EADDRNOTAVAIL;
+
+ list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope > IFA_LINK)
+ break;
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ *addr = ifp->addr;
+ err = 0;
+ break;
+ }
+ }
+ return err;
+}
+
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
- unsigned char banned_flags)
+ u32 banned_flags)
{
struct inet6_dev *idev;
int err = -EADDRNOTAVAIL;
@@ -1258,17 +1461,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- struct inet6_ifaddr *ifp;
-
read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ifp->scope == IFA_LINK &&
- !(ifp->flags & banned_flags)) {
- ipv6_addr_copy(addr, &ifp->addr);
- err = 0;
- break;
- }
- }
+ err = __ipv6_get_lladdr(idev, addr, banned_flags);
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1287,15 +1481,14 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
return cnt;
}
-int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
- struct net_device *dev, int strict)
+int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp;
- struct hlist_node *node;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
- hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1315,11 +1508,10 @@ EXPORT_SYMBOL(ipv6_chk_addr);
static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
struct net_device *dev)
{
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
struct inet6_ifaddr *ifp;
- struct hlist_node *node;
- hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1330,7 +1522,34 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
-int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
+int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
@@ -1352,18 +1571,16 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
rcu_read_unlock();
return onlink;
}
-
EXPORT_SYMBOL(ipv6_chk_prefix);
struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp, *result = NULL;
- unsigned int hash = ipv6_addr_hash(addr);
- struct hlist_node *node;
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1386,7 +1603,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_timer(ifp);
+ addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -1394,7 +1611,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (dad_failed)
ipv6_ifa_notify(0, ifp);
in6_ifa_put(ifp);
-#ifdef CONFIG_IPV6_PRIVACY
} else if (ifp->flags&IFA_F_TEMPORARY) {
struct inet6_ifaddr *ifpub;
spin_lock_bh(&ifp->lock);
@@ -1408,21 +1624,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
-#endif
- } else
+ } else {
ipv6_del_addr(ifp);
+ }
}
static int addrconf_dad_end(struct inet6_ifaddr *ifp)
{
int err = -ENOENT;
- spin_lock(&ifp->state_lock);
+ spin_lock_bh(&ifp->state_lock);
if (ifp->state == INET6_IFADDR_STATE_DAD) {
ifp->state = INET6_IFADDR_STATE_POSTDAD;
err = 0;
}
- spin_unlock(&ifp->state_lock);
+ spin_unlock_bh(&ifp->state_lock);
return err;
}
@@ -1436,9 +1652,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
return;
}
- if (net_ratelimit())
- printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
- ifp->idev->dev->name, &ifp->addr);
+ net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n",
+ ifp->idev->dev->name, &ifp->addr);
if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
struct in6_addr addr;
@@ -1451,20 +1666,27 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
/* DAD failed for link-local based on MAC address */
idev->cnf.disable_ipv6 = 1;
- printk(KERN_INFO "%s: IPv6 being disabled!\n",
+ pr_info("%s: IPv6 being disabled!\n",
ifp->idev->dev->name);
}
}
- addrconf_dad_stop(ifp, 1);
+ spin_lock_bh(&ifp->state_lock);
+ /* transition from _POSTDAD to _ERRDAD */
+ ifp->state = INET6_IFADDR_STATE_ERRDAD;
+ spin_unlock_bh(&ifp->state_lock);
+
+ addrconf_mod_dad_work(ifp, 0);
}
/* Join to solicited addr multicast group. */
-void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
+void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
+ ASSERT_RTNL();
+
if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1472,10 +1694,12 @@ void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
ipv6_dev_mc_inc(dev, &maddr);
}
-void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
+void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
+ ASSERT_RTNL();
+
if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1486,6 +1710,11 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
+ ASSERT_RTNL();
+
+ if (ifp->prefix_len >= 127) /* RFC 6164 */
+ return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
return;
@@ -1495,6 +1724,11 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
+ ASSERT_RTNL();
+
+ if (ifp->prefix_len >= 127) /* RFC 6164 */
+ return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
return;
@@ -1532,13 +1766,36 @@ static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
return 0;
}
+static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
+{
+ if (dev->addr_len != IEEE802154_ADDR_LEN)
+ return -1;
+ memcpy(eui, dev->dev_addr, 8);
+ eui[0] ^= 2;
+ return 0;
+}
+
+static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
+{
+ union fwnet_hwaddr *ha;
+
+ if (dev->addr_len != FWNET_ALEN)
+ return -1;
+
+ ha = (union fwnet_hwaddr *)dev->dev_addr;
+
+ memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
+ eui[0] ^= 2;
+ return 0;
+}
+
static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
{
/* XXX: inherit EUI-64 from other interface -- yoshfuji */
if (dev->addr_len != ARCNET_ALEN)
return -1;
memset(eui, 0, 7);
- eui[7] = *(u8*)dev->dev_addr;
+ eui[7] = *(u8 *)dev->dev_addr;
return 0;
}
@@ -1575,12 +1832,26 @@ static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
return -1;
}
+static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
+{
+ return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
+}
+
+static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
+{
+ memcpy(eui, dev->perm_addr, 3);
+ memcpy(eui + 5, dev->perm_addr + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ return 0;
+}
+
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
switch (dev->type) {
case ARPHRD_ETHER:
case ARPHRD_FDDI:
- case ARPHRD_IEEE802_TR:
return addrconf_ifid_eui48(eui, dev);
case ARPHRD_ARCNET:
return addrconf_ifid_arcnet(eui, dev);
@@ -1588,6 +1859,15 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_infiniband(eui, dev);
case ARPHRD_SIT:
return addrconf_ifid_sit(eui, dev);
+ case ARPHRD_IPGRE:
+ return addrconf_ifid_gre(eui, dev);
+ case ARPHRD_6LOWPAN:
+ case ARPHRD_IEEE802154:
+ return addrconf_ifid_eui64(eui, dev);
+ case ARPHRD_IEEE1394:
+ return addrconf_ifid_ieee1394(eui, dev);
+ case ARPHRD_TUNNEL6:
+ return addrconf_ifid_ip6tnl(eui, dev);
}
return -1;
}
@@ -1598,7 +1878,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope > IFA_LINK)
+ break;
if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
memcpy(eui, ifp->addr.s6_addr+8, 8);
err = 0;
@@ -1609,9 +1891,8 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
return err;
}
-#ifdef CONFIG_IPV6_PRIVACY
/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static int __ipv6_regen_rndid(struct inet6_dev *idev)
+static void __ipv6_regen_rndid(struct inet6_dev *idev)
{
regen:
get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1638,8 +1919,6 @@ regen:
if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
goto regen;
}
-
- return 0;
}
static void ipv6_regen_rndid(unsigned long data)
@@ -1653,17 +1932,16 @@ static void ipv6_regen_rndid(unsigned long data)
if (idev->dead)
goto out;
- if (__ipv6_regen_rndid(idev) < 0)
- goto out;
+ __ipv6_regen_rndid(idev);
expires = jiffies +
idev->cnf.temp_prefered_lft * HZ -
- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
+ idev->cnf.regen_max_retry * idev->cnf.dad_transmits *
+ NEIGH_VAR(idev->nd_parms, RETRANS_TIME) -
idev->cnf.max_desync_factor * HZ;
if (time_before(expires, jiffies)) {
- printk(KERN_WARNING
- "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
- idev->dev->name);
+ pr_warn("%s: too short regeneration interval; timer disabled for %s\n",
+ __func__, idev->dev->name);
goto out;
}
@@ -1676,14 +1954,11 @@ out:
in6_dev_put(idev);
}
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
- int ret = 0;
-
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+{
if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
- ret = __ipv6_regen_rndid(idev);
- return ret;
+ __ipv6_regen_rndid(idev);
}
-#endif
/*
* Add prefix route.
@@ -1704,13 +1979,13 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
.fc_protocol = RTPROT_KERNEL,
};
- ipv6_addr_copy(&cfg.fc_dst, pfx);
+ cfg.fc_dst = *pfx;
/* Prevent useless cloning on PtP SIT.
This thing is done here expecting that the whole
class of non-broadcast devices need not cloning.
*/
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
cfg.fc_flags |= RTF_NONEXTHOP;
#endif
@@ -1718,6 +1993,40 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
ip6_route_add(&cfg);
}
+
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+ int plen,
+ const struct net_device *dev,
+ u32 flags, u32 noflags)
+{
+ struct fib6_node *fn;
+ struct rt6_info *rt = NULL;
+ struct fib6_table *table;
+
+ table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+ if (table == NULL)
+ return NULL;
+
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+ if (!fn)
+ goto out;
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt->dst.dev->ifindex != dev->ifindex)
+ continue;
+ if ((rt->rt6i_flags & flags) != flags)
+ continue;
+ if ((rt->rt6i_flags & noflags) != 0)
+ continue;
+ dst_hold(&rt->dst);
+ break;
+ }
+out:
+ read_unlock_bh(&table->tb6_lock);
+ return rt;
+}
+
+
/* Create "default" multicast route to the interface */
static void addrconf_add_mroute(struct net_device *dev)
@@ -1736,31 +2045,6 @@ static void addrconf_add_mroute(struct net_device *dev)
ip6_route_add(&cfg);
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-static void sit_route_add(struct net_device *dev)
-{
- struct fib6_config cfg = {
- .fc_table = RT6_TABLE_MAIN,
- .fc_metric = IP6_RT_PRIO_ADDRCONF,
- .fc_ifindex = dev->ifindex,
- .fc_dst_len = 96,
- .fc_flags = RTF_UP | RTF_NONEXTHOP,
- .fc_nlinfo.nl_net = dev_net(dev),
- };
-
- /* prefix length - 96 bits "::d.d.d.d" */
- ip6_route_add(&cfg);
-}
-#endif
-
-static void addrconf_add_lroute(struct net_device *dev)
-{
- struct in6_addr addr;
-
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
-}
-
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -1775,14 +2059,80 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return ERR_PTR(-EACCES);
/* Add default multicast route */
- addrconf_add_mroute(dev);
+ if (!(dev->flags & IFF_LOOPBACK))
+ addrconf_add_mroute(dev);
- /* Add link local route */
- addrconf_add_lroute(dev);
return idev;
}
-void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
+static void manage_tempaddrs(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp,
+ __u32 valid_lft, __u32 prefered_lft,
+ bool create, unsigned long now)
+{
+ u32 flags;
+ struct inet6_ifaddr *ift;
+
+ read_lock_bh(&idev->lock);
+ /* update all temporary addresses in the list */
+ list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) {
+ int age, max_valid, max_prefered;
+
+ if (ifp != ift->ifpub)
+ continue;
+
+ /* RFC 4941 section 3.3:
+ * If a received option will extend the lifetime of a public
+ * address, the lifetimes of temporary addresses should
+ * be extended, subject to the overall constraint that no
+ * temporary addresses should ever remain "valid" or "preferred"
+ * for a time longer than (TEMP_VALID_LIFETIME) or
+ * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
+ */
+ age = (now - ift->cstamp) / HZ;
+ max_valid = idev->cnf.temp_valid_lft - age;
+ if (max_valid < 0)
+ max_valid = 0;
+
+ max_prefered = idev->cnf.temp_prefered_lft -
+ idev->cnf.max_desync_factor - age;
+ if (max_prefered < 0)
+ max_prefered = 0;
+
+ if (valid_lft > max_valid)
+ valid_lft = max_valid;
+
+ if (prefered_lft > max_prefered)
+ prefered_lft = max_prefered;
+
+ spin_lock(&ift->lock);
+ flags = ift->flags;
+ ift->valid_lft = valid_lft;
+ ift->prefered_lft = prefered_lft;
+ ift->tstamp = now;
+ if (prefered_lft > 0)
+ ift->flags &= ~IFA_F_DEPRECATED;
+
+ spin_unlock(&ift->lock);
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ift);
+ }
+
+ if ((create || list_empty(&idev->tempaddr_list)) &&
+ idev->cnf.use_tempaddr > 0) {
+ /* When a new public address is created as described
+ * in [ADDRCONF], also create a new temporary address.
+ * Also create a temporary address if it's enabled but
+ * no temporary address currently exists.
+ */
+ read_unlock_bh(&idev->lock);
+ ipv6_create_tempaddr(ifp, NULL);
+ } else {
+ read_unlock_bh(&idev->lock);
+ }
+}
+
+void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
__u32 valid_lft;
@@ -1794,7 +2144,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG(("addrconf: prefix option too short\n"));
+ ADBG("addrconf: prefix option too short\n");
return;
}
@@ -1811,16 +2161,15 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
prefered_lft = ntohl(pinfo->prefered);
if (prefered_lft > valid_lft) {
- if (net_ratelimit())
- printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
+ net_warn_ratelimited("addrconf: prefix option has invalid lifetime\n");
return;
}
in6_dev = in6_dev_get(dev);
if (in6_dev == NULL) {
- if (net_ratelimit())
- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+ net_dbg_ratelimited("addrconf: device %s not configured\n",
+ dev->name);
return;
}
@@ -1847,21 +2196,22 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
if (addrconf_finite_timeout(rt_expires))
rt_expires *= HZ;
- rt = rt6_lookup(net, &pinfo->prefix, NULL,
- dev->ifindex, 1);
+ rt = addrconf_get_prefix_route(&pinfo->prefix,
+ pinfo->prefix_len,
+ dev,
+ RTF_ADDRCONF | RTF_PREFIX_RT,
+ RTF_GATEWAY | RTF_DEFAULT);
- if (rt && addrconf_is_prefix_route(rt)) {
+ if (rt) {
/* Autoconf prefix route */
if (valid_lft == 0) {
ip6_del_rt(rt);
rt = NULL;
} else if (addrconf_finite_timeout(rt_expires)) {
/* not infinity */
- rt->rt6i_expires = jiffies + rt_expires;
- rt->rt6i_flags |= RTF_EXPIRES;
+ rt6_set_expires(rt, jiffies + rt_expires);
} else {
- rt->rt6i_flags &= ~RTF_EXPIRES;
- rt->rt6i_expires = 0;
+ rt6_clean_expires(rt);
}
} else if (valid_lft) {
clock_t expires = 0;
@@ -1874,29 +2224,35 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
dev, expires, flags);
}
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
/* Try to figure out our local address for this prefix */
if (pinfo->autoconf && in6_dev->cnf.autoconf) {
- struct inet6_ifaddr * ifp;
+ struct inet6_ifaddr *ifp;
struct in6_addr addr;
int create = 0, update_lft = 0;
+ bool tokenized = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+
+ if (!ipv6_addr_any(&in6_dev->token)) {
+ read_lock_bh(&in6_dev->lock);
+ memcpy(addr.s6_addr + 8,
+ in6_dev->token.s6_addr + 8, 8);
+ read_unlock_bh(&in6_dev->lock);
+ tokenized = true;
+ } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+ ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
in6_dev_put(in6_dev);
return;
}
goto ok;
}
- if (net_ratelimit())
- printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
- pinfo->prefix_len);
+ net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
+ pinfo->prefix_len);
in6_dev_put(in6_dev);
return;
@@ -1910,7 +2266,7 @@ ok:
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (in6_dev->cnf.optimistic_dad &&
- !net->ipv6.devconf_all->forwarding)
+ !net->ipv6.devconf_all->forwarding && sllao)
addr_flags = IFA_F_OPTIMISTIC;
#endif
@@ -1919,26 +2275,30 @@ ok:
*/
if (!max_addresses ||
ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+ ifp = ipv6_add_addr(in6_dev, &addr, NULL,
+ pinfo->prefix_len,
addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags);
+ addr_flags, valid_lft,
+ prefered_lft);
- if (!ifp || IS_ERR(ifp)) {
+ if (IS_ERR_OR_NULL(ifp)) {
in6_dev_put(in6_dev);
return;
}
- update_lft = create = 1;
+ update_lft = 0;
+ create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
ifp->cstamp = jiffies;
- addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+ ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
+ addrconf_dad_start(ifp);
}
if (ifp) {
- int flags;
+ u32 flags;
unsigned long now;
-#ifdef CONFIG_IPV6_PRIVACY
- struct inet6_ifaddr *ift;
-#endif
u32 stored_lft;
/* update lifetime (RFC2462 5.5.3 e) */
@@ -1948,44 +2308,22 @@ ok:
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
- if (!update_lft && stored_lft) {
- if (valid_lft > MIN_VALID_LIFETIME ||
- valid_lft > stored_lft)
- update_lft = 1;
- else if (stored_lft <= MIN_VALID_LIFETIME) {
- /* valid_lft <= stored_lft is always true */
- /*
- * RFC 4862 Section 5.5.3e:
- * "Note that the preferred lifetime of
- * the corresponding address is always
- * reset to the Preferred Lifetime in
- * the received Prefix Information
- * option, regardless of whether the
- * valid lifetime is also reset or
- * ignored."
- *
- * So if the preferred lifetime in
- * this advertisement is different
- * than what we have stored, but the
- * valid lifetime is invalid, just
- * reset prefered_lft.
- *
- * We must set the valid lifetime
- * to the stored lifetime since we'll
- * be updating the timestamp below,
- * else we'll set it back to the
- * minumum.
- */
- if (prefered_lft != ifp->prefered_lft) {
- valid_lft = stored_lft;
- update_lft = 1;
- }
- } else {
- valid_lft = MIN_VALID_LIFETIME;
- if (valid_lft < prefered_lft)
- prefered_lft = valid_lft;
- update_lft = 1;
- }
+ if (!update_lft && !create && stored_lft) {
+ const u32 minimum_lft = min(
+ stored_lft, (u32)MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
}
if (update_lft) {
@@ -2001,47 +2339,11 @@ ok:
} else
spin_unlock(&ifp->lock);
-#ifdef CONFIG_IPV6_PRIVACY
- read_lock_bh(&in6_dev->lock);
- /* update all temporary addresses in the list */
- list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
- /*
- * When adjusting the lifetimes of an existing
- * temporary address, only lower the lifetimes.
- * Implementations must not increase the
- * lifetimes of an existing temporary address
- * when processing a Prefix Information Option.
- */
- if (ifp != ift->ifpub)
- continue;
-
- spin_lock(&ift->lock);
- flags = ift->flags;
- if (ift->valid_lft > valid_lft &&
- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
- if (ift->prefered_lft > prefered_lft &&
- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
- spin_unlock(&ift->lock);
- if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify(0, ift);
- }
+ manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+ create, now);
- if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
- /*
- * When a new public address is created as described in [ADDRCONF],
- * also create a new temporary address. Also create a temporary
- * address if it's enabled but no temporary address currently exists.
- */
- read_unlock_bh(&in6_dev->lock);
- ipv6_create_tempaddr(ifp, NULL);
- } else {
- read_unlock_bh(&in6_dev->lock);
- }
-#endif
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify();
}
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2071,7 +2373,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
if (dev == NULL)
goto err_exit;
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT) {
const struct net_device_ops *ops = dev->netdev_ops;
struct ifreq ifr;
@@ -2117,9 +2419,11 @@ err_exit:
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
- unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
- __u32 valid_lft)
+static int inet6_addr_add(struct net *net, int ifindex,
+ const struct in6_addr *pfx,
+ const struct in6_addr *peer_pfx,
+ unsigned int plen, __u32 ifa_flags,
+ __u32 prefered_lft, __u32 valid_lft)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2138,6 +2442,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
if (!valid_lft || prefered_lft > valid_lft)
return -EINVAL;
+ if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64)
+ return -EINVAL;
+
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return -ENODEV;
@@ -2166,33 +2473,34 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
prefered_lft = timeout;
}
- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
+ ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
+ valid_lft, prefered_lft);
if (!IS_ERR(ifp)) {
- spin_lock_bh(&ifp->lock);
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
- ifp->tstamp = jiffies;
- spin_unlock_bh(&ifp->lock);
+ if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
+ expires, flags);
+ }
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
- expires, flags);
/*
* Note that section 3.1 of RFC 4429 indicates
* that the Optimistic flag should not be set for
* manually configured addresses
*/
- addrconf_dad_start(ifp, 0);
+ addrconf_dad_start(ifp);
+ if (ifa_flags & IFA_F_MANAGETEMPADDR)
+ manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
+ true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
return PTR_ERR(ifp);
}
-static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
- unsigned int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+ const struct in6_addr *pfx, unsigned int plen)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2215,13 +2523,12 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifa_flags & IFA_F_MANAGETEMPADDR))
+ manage_tempaddrs(idev, ifp, 0, 0, false,
+ jiffies);
ipv6_del_addr(ifp);
-
- /* If the last address is deleted administratively,
- disable IPv6 on this interface.
- */
- if (list_empty(&idev->addr_list))
- addrconf_ifdown(idev->dev, 1);
+ addrconf_verify_rtnl();
return 0;
}
}
@@ -2235,14 +2542,14 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
ireq.ifr6_prefixlen, IFA_F_PERMANENT,
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
rtnl_unlock();
@@ -2254,14 +2561,14 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
rtnl_lock();
- err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen);
rtnl_unlock();
return err;
@@ -2272,7 +2579,9 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
- ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT);
+ ifp = ipv6_add_addr(idev, addr, NULL, plen,
+ scope, IFA_F_PERMANENT,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -2282,13 +2591,14 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
}
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void sit_add_v4_addrs(struct inet6_dev *idev)
{
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope;
+ int scope, plen;
+ u32 pflags = 0;
ASSERT_RTNL();
@@ -2298,24 +2608,27 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (idev->dev->flags&IFF_POINTOPOINT) {
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
+ plen = 64;
} else {
scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
- add_addr(idev, &addr, 128, scope);
+ add_addr(idev, &addr, plen, scope);
+ addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);
return;
}
for_each_netdev(net, dev) {
- struct in_device * in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (in_dev && (dev->flags & IFF_UP)) {
- struct in_ifaddr * ifa;
+ struct in_ifaddr *ifa;
int flag = scope;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
- int plen;
addr.s6_addr32[3] = ifa->ifa_local;
@@ -2326,12 +2639,10 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
continue;
flag |= IFA_HOST;
}
- if (idev->dev->flags&IFF_POINTOPOINT)
- plen = 64;
- else
- plen = 96;
add_addr(idev, &addr, plen, flag);
+ addrconf_prefix_route(&addr, plen, idev->dev, 0,
+ pflags);
}
}
}
@@ -2341,22 +2652,64 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
static void init_loopback(struct net_device *dev)
{
struct inet6_dev *idev;
+ struct net_device *sp_dev;
+ struct inet6_ifaddr *sp_ifa;
+ struct rt6_info *sp_rt;
/* ::1 */
ASSERT_RTNL();
if ((idev = ipv6_find_idev(dev)) == NULL) {
- printk(KERN_DEBUG "init loopback: add_dev failed\n");
+ pr_debug("%s: add_dev failed\n", __func__);
return;
}
add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+
+ /* Add routes to other interface's IPv6 addresses */
+ for_each_netdev(dev_net(dev), sp_dev) {
+ if (!strcmp(sp_dev->name, dev->name))
+ continue;
+
+ idev = __in6_dev_get(sp_dev);
+ if (!idev)
+ continue;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
+
+ if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
+ continue;
+
+ if (sp_ifa->rt) {
+ /* This dst has been added to garbage list when
+ * lo device down, release this obsolete dst and
+ * reallocate a new router for ifa.
+ */
+ if (sp_ifa->rt->dst.obsolete > 0) {
+ ip6_rt_put(sp_ifa->rt);
+ sp_ifa->rt = NULL;
+ } else {
+ continue;
+ }
+ }
+
+ sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
+
+ /* Failure cases are ignored */
+ if (!IS_ERR(sp_rt)) {
+ sp_ifa->rt = sp_rt;
+ ip6_ins_rt(sp_rt);
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
}
-static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
+static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
{
- struct inet6_ifaddr * ifp;
+ struct inet6_ifaddr *ifp;
u32 addr_flags = IFA_F_PERMANENT;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -2366,10 +2719,11 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr
#endif
- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
+ ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
- addrconf_dad_start(ifp, 0);
+ addrconf_dad_start(ifp);
in6_ifa_put(ifp);
}
}
@@ -2377,15 +2731,18 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr
static void addrconf_dev_config(struct net_device *dev)
{
struct in6_addr addr;
- struct inet6_dev * idev;
+ struct inet6_dev *idev;
ASSERT_RTNL();
if ((dev->type != ARPHRD_ETHER) &&
(dev->type != ARPHRD_FDDI) &&
- (dev->type != ARPHRD_IEEE802_TR) &&
(dev->type != ARPHRD_ARCNET) &&
- (dev->type != ARPHRD_INFINIBAND)) {
+ (dev->type != ARPHRD_INFINIBAND) &&
+ (dev->type != ARPHRD_IEEE802154) &&
+ (dev->type != ARPHRD_IEEE1394) &&
+ (dev->type != ARPHRD_TUNNEL6) &&
+ (dev->type != ARPHRD_6LOWPAN)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -2401,7 +2758,7 @@ static void addrconf_dev_config(struct net_device *dev)
addrconf_add_linklocal(idev, &addr);
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void addrconf_sit_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2415,7 +2772,7 @@ static void addrconf_sit_config(struct net_device *dev)
*/
if ((idev = ipv6_find_idev(dev)) == NULL) {
- printk(KERN_DEBUG "init sit: add_dev failed\n");
+ pr_debug("%s: add_dev failed\n", __func__);
return;
}
@@ -2423,7 +2780,6 @@ static void addrconf_sit_config(struct net_device *dev)
struct in6_addr addr;
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
addrconf_add_linklocal(idev, &addr);
return;
@@ -2431,68 +2787,36 @@ static void addrconf_sit_config(struct net_device *dev)
sit_add_v4_addrs(idev);
- if (dev->flags&IFF_POINTOPOINT) {
+ if (dev->flags&IFF_POINTOPOINT)
addrconf_add_mroute(dev);
- addrconf_add_lroute(dev);
- } else
- sit_route_add(dev);
}
#endif
-static inline int
-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
-{
- struct in6_addr lladdr;
-
- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
- addrconf_add_linklocal(idev, &lladdr);
- return 0;
- }
- return -1;
-}
-
-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
-{
- struct net_device *link_dev;
- struct net *net = dev_net(idev->dev);
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
- (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
- for_each_netdev(net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
-}
-
-/*
- * Autoconfigure tunnel with a link-local address so routing protocols,
- * DHCPv6, MLD etc. can be run over the virtual link
- */
-
-static void addrconf_ip6_tnl_config(struct net_device *dev)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
+ struct in6_addr addr;
ASSERT_RTNL();
- idev = addrconf_add_dev(dev);
- if (IS_ERR(idev)) {
- printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
+ pr_debug("%s: add_dev failed\n", __func__);
return;
}
- ip6_tnl_add_linklocal(idev);
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+ addrconf_add_linklocal(idev, &addr);
+ else
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
}
+#endif
static int addrconf_notify(struct notifier_block *this, unsigned long event,
- void * data)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *) data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct inet6_dev *idev = __in6_dev_get(dev);
int run_pending = 0;
int err;
@@ -2514,9 +2838,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (event == NETDEV_UP) {
if (!addrconf_qdisc_ok(dev)) {
/* device is not ready yet. */
- printk(KERN_INFO
- "ADDRCONF(NETDEV_UP): %s: "
- "link is not ready\n",
+ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
dev->name);
break;
}
@@ -2541,23 +2863,23 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
idev->if_flags |= IF_READY;
}
- printk(KERN_INFO
- "ADDRCONF(NETDEV_CHANGE): %s: "
- "link becomes ready\n",
- dev->name);
+ pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
+ dev->name);
run_pending = 1;
}
switch (dev->type) {
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
case ARPHRD_SIT:
addrconf_sit_config(dev);
break;
#endif
- case ARPHRD_TUNNEL6:
- addrconf_ip6_tnl_config(dev);
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
break;
+#endif
case ARPHRD_LOOPBACK:
init_loopback(dev);
break;
@@ -2607,7 +2929,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
/*
- * MTU falled under IPV6_MIN_MTU.
+ * if MTU under IPV6_MIN_MTU.
* Stop IPv6 on this interface.
*/
@@ -2664,8 +2986,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
- LIST_HEAD(keep_list);
- int state;
+ int state, i;
ASSERT_RTNL();
@@ -2684,20 +3005,37 @@ static int addrconf_ifdown(struct net_device *dev, int how)
idev->dead = 1;
/* protected by rtnl_lock */
- rcu_assign_pointer(dev->ip6_ptr, NULL);
+ RCU_INIT_POINTER(dev->ip6_ptr, NULL);
/* Step 1.5: remove snmp6 entry */
snmp6_unregister_dev(idev);
}
+ /* Step 2: clear hash table */
+ for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+ struct hlist_head *h = &inet6_addr_lst[i];
+
+ spin_lock_bh(&addrconf_hash_lock);
+ restart:
+ hlist_for_each_entry_rcu(ifa, h, addr_lst) {
+ if (ifa->idev == idev) {
+ hlist_del_init_rcu(&ifa->addr_lst);
+ addrconf_del_dad_work(ifa);
+ goto restart;
+ }
+ }
+ spin_unlock_bh(&addrconf_hash_lock);
+ }
+
write_lock_bh(&idev->lock);
+ addrconf_del_rs_timer(idev);
+
/* Step 2: clear flags for stateless addrconf */
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
-#ifdef CONFIG_IPV6_PRIVACY
if (how && del_timer(&idev->regen_timer))
in6_dev_put(idev);
@@ -2717,59 +3055,29 @@ static int addrconf_ifdown(struct net_device *dev, int how)
in6_ifa_put(ifa);
write_lock_bh(&idev->lock);
}
-#endif
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_timer(ifa);
-
- /* If just doing link down, and address is permanent
- and not link-local, then retain it. */
- if (!how &&
- (ifa->flags&IFA_F_PERMANENT) &&
- !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
- list_move_tail(&ifa->if_list, &keep_list);
-
- /* If not doing DAD on this address, just keep it. */
- if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
- idev->cnf.accept_dad <= 0 ||
- (ifa->flags & IFA_F_NODAD))
- continue;
+ addrconf_del_dad_work(ifa);
- /* If it was tentative already, no need to notify */
- if (ifa->flags & IFA_F_TENTATIVE)
- continue;
+ list_del(&ifa->if_list);
- /* Flag it for later restoration when link comes up */
- ifa->flags |= IFA_F_TENTATIVE;
- ifa->state = INET6_IFADDR_STATE_DAD;
- } else {
- list_del(&ifa->if_list);
-
- /* clear hash table */
- spin_lock_bh(&addrconf_hash_lock);
- hlist_del_init_rcu(&ifa->addr_lst);
- spin_unlock_bh(&addrconf_hash_lock);
-
- write_unlock_bh(&idev->lock);
- spin_lock_bh(&ifa->state_lock);
- state = ifa->state;
- ifa->state = INET6_IFADDR_STATE_DEAD;
- spin_unlock_bh(&ifa->state_lock);
-
- if (state != INET6_IFADDR_STATE_DEAD) {
- __ipv6_ifa_notify(RTM_DELADDR, ifa);
- atomic_notifier_call_chain(&inet6addr_chain,
- NETDEV_DOWN, ifa);
- }
+ write_unlock_bh(&idev->lock);
+
+ spin_lock_bh(&ifa->state_lock);
+ state = ifa->state;
+ ifa->state = INET6_IFADDR_STATE_DEAD;
+ spin_unlock_bh(&ifa->state_lock);
- in6_ifa_put(ifa);
- write_lock_bh(&idev->lock);
+ if (state != INET6_IFADDR_STATE_DEAD) {
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
}
- }
+ in6_ifa_put(ifa);
- list_splice(&keep_list, &idev->addr_list);
+ write_lock_bh(&idev->lock);
+ }
write_unlock_bh(&idev->lock);
@@ -2793,43 +3101,47 @@ static int addrconf_ifdown(struct net_device *dev, int how)
static void addrconf_rs_timer(unsigned long data)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
- struct inet6_dev *idev = ifp->idev;
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+ struct net_device *dev = idev->dev;
+ struct in6_addr lladdr;
- read_lock(&idev->lock);
+ write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY))
goto out;
- if (idev->cnf.forwarding)
+ if (!ipv6_accept_ra(idev))
goto out;
/* Announcement received after solicitation was sent */
if (idev->if_flags & IF_RA_RCVD)
goto out;
- spin_lock(&ifp->lock);
- if (ifp->probes++ < idev->cnf.rtr_solicits) {
- /* The wait after the last probe can be shorter */
- addrconf_mod_timer(ifp, AC_RS,
- (ifp->probes == idev->cnf.rtr_solicits) ?
- idev->cnf.rtr_solicit_delay :
- idev->cnf.rtr_solicit_interval);
- spin_unlock(&ifp->lock);
+ if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+ write_unlock(&idev->lock);
+ if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+ ndisc_send_rs(dev, &lladdr,
+ &in6addr_linklocal_allrouters);
+ else
+ goto put;
- ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ write_lock(&idev->lock);
+ /* The wait after the last probe can be shorter */
+ addrconf_mod_rs_timer(idev, (idev->rs_probes ==
+ idev->cnf.rtr_solicits) ?
+ idev->cnf.rtr_solicit_delay :
+ idev->cnf.rtr_solicit_interval);
} else {
- spin_unlock(&ifp->lock);
/*
* Note: we do not support deprecated "all on-link"
* assumption any longer.
*/
- printk(KERN_DEBUG "%s: no IPv6 routers present\n",
- idev->dev->name);
+ pr_debug("%s: no IPv6 routers present\n", idev->dev->name);
}
out:
- read_unlock(&idev->lock);
- in6_ifa_put(ifp);
+ write_unlock(&idev->lock);
+put:
+ in6_dev_put(idev);
}
/*
@@ -2843,20 +3155,20 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+ rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
- ifp->probes = idev->cnf.dad_transmits;
- addrconf_mod_timer(ifp, AC_DAD, rand_num);
+ ifp->dad_probes = idev->cnf.dad_transmits;
+ addrconf_mod_dad_work(ifp, rand_num);
}
-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
addrconf_join_solict(dev, &ifp->addr);
- net_srandom(ifp->addr.s6_addr32[3]);
+ prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
read_lock_bh(&idev->lock);
spin_lock(&ifp->lock);
@@ -2901,57 +3213,124 @@ out:
read_unlock_bh(&idev->lock);
}
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+ bool begin_dad = false;
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
+ begin_dad = true;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (begin_dad)
+ addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+ struct inet6_ifaddr,
+ dad_work);
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
- if (!ifp->probes && addrconf_dad_end(ifp))
+ enum {
+ DAD_PROCESS,
+ DAD_BEGIN,
+ DAD_ABORT,
+ } action = DAD_PROCESS;
+
+ rtnl_lock();
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+ action = DAD_BEGIN;
+ ifp->state = INET6_IFADDR_STATE_DAD;
+ } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+ action = DAD_ABORT;
+ ifp->state = INET6_IFADDR_STATE_POSTDAD;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (action == DAD_BEGIN) {
+ addrconf_dad_begin(ifp);
+ goto out;
+ } else if (action == DAD_ABORT) {
+ addrconf_dad_stop(ifp, 1);
+ goto out;
+ }
+
+ if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;
- read_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- read_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
- if (ifp->probes == 0) {
+ if (ifp->dad_probes == 0) {
/*
* DAD was successful
*/
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
addrconf_dad_completed(ifp);
goto out;
}
- ifp->probes--;
- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+ ifp->dad_probes--;
+ addrconf_mod_dad_work(ifp,
+ NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
+ rtnl_unlock();
+}
+
+/* ifp->idev must be at least read locked */
+static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ifpiter;
+ struct inet6_dev *idev = ifp->idev;
+
+ list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) {
+ if (ifpiter->scope > IFA_LINK)
+ break;
+ if (ifp != ifpiter && ifpiter->scope == IFA_LINK &&
+ (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|
+ IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) ==
+ IFA_F_PERMANENT)
+ return false;
+ }
+ return true;
}
static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
{
struct net_device *dev = ifp->idev->dev;
+ struct in6_addr lladdr;
+ bool send_rs, send_mld;
+
+ addrconf_del_dad_work(ifp);
/*
* Configure the address for reception. Now it is valid.
@@ -2959,27 +3338,42 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
ipv6_ifa_notify(RTM_NEWADDR, ifp);
- /* If added prefix is link local and forwarding is off,
- start sending router solicitations.
+ /* If added prefix is link local and we are prepared to process
+ router advertisements, start sending router solicitations.
+ */
+
+ read_lock_bh(&ifp->idev->lock);
+ send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
+ send_rs = send_mld &&
+ ipv6_accept_ra(ifp->idev) &&
+ ifp->idev->cnf.rtr_solicits > 0 &&
+ (dev->flags&IFF_LOOPBACK) == 0;
+ read_unlock_bh(&ifp->idev->lock);
+
+ /* While dad is in progress mld report's source address is in6_addrany.
+ * Resend with proper ll now.
*/
+ if (send_mld)
+ ipv6_mc_dad_complete(ifp->idev);
- if ((ifp->idev->cnf.forwarding == 0 ||
- ifp->idev->cnf.forwarding == 2) &&
- ifp->idev->cnf.rtr_solicits > 0 &&
- (dev->flags&IFF_LOOPBACK) == 0 &&
- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ if (send_rs) {
/*
* If a host as already performed a random delay
* [...] as part of DAD [...] there is no need
* to delay again before sending the first RS
*/
- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+ return;
+ ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters);
- spin_lock_bh(&ifp->lock);
- ifp->probes = 1;
+ write_lock_bh(&ifp->idev->lock);
+ spin_lock(&ifp->lock);
+ ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
- spin_unlock_bh(&ifp->lock);
+ addrconf_mod_rs_timer(ifp->idev,
+ ifp->idev->cnf.rtr_solicit_interval);
+ spin_unlock(&ifp->lock);
+ write_unlock_bh(&ifp->idev->lock);
}
}
@@ -3002,20 +3396,39 @@ static void addrconf_dad_run(struct inet6_dev *idev)
struct if6_iter_state {
struct seq_net_private p;
int bucket;
+ int offset;
};
-static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
+static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
{
struct inet6_ifaddr *ifa = NULL;
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ int p = 0;
+
+ /* initial bucket if pos is 0 */
+ if (pos == 0) {
+ state->bucket = 0;
+ state->offset = 0;
+ }
+
+ for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+ hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
+ addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
+ /* sync with offset */
+ if (p < state->offset) {
+ p++;
+ continue;
+ }
+ state->offset++;
+ return ifa;
+ }
- for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- struct hlist_node *n;
- hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
- addr_lst)
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ /* prepare for next bucket */
+ state->offset = 0;
+ p = 0;
}
return NULL;
}
@@ -3025,38 +3438,33 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
{
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct hlist_node *n = &ifa->addr_lst;
- hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst)
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
+ state->offset++;
+ return ifa;
+ }
while (++state->bucket < IN6_ADDR_HSIZE) {
- hlist_for_each_entry_rcu_bh(ifa, n,
+ state->offset = 0;
+ hlist_for_each_entry_rcu_bh(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
+ state->offset++;
+ return ifa;
}
}
return NULL;
}
-static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
-{
- struct inet6_ifaddr *ifa = if6_get_first(seq);
-
- if (ifa)
- while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
- --pos;
- return pos ? NULL : ifa;
-}
-
static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(rcu_bh)
{
rcu_read_lock_bh();
- return if6_get_idx(seq, *pos);
+ return if6_get_first(seq, *pos);
}
static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -3082,7 +3490,7 @@ static int if6_seq_show(struct seq_file *seq, void *v)
ifp->idev->dev->ifindex,
ifp->prefix_len,
ifp->scope,
- ifp->flags,
+ (u8) ifp->flags,
ifp->idev->dev->name);
return 0;
}
@@ -3110,14 +3518,14 @@ static const struct file_operations if6_fops = {
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
+ if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
return -ENOMEM;
return 0;
}
static void __net_exit if6_proc_net_exit(struct net *net)
{
- proc_net_remove(net, "if_inet6");
+ remove_proc_entry("if_inet6", net->proc_net);
}
static struct pernet_operations if6_proc_net_ops = {
@@ -3136,17 +3544,16 @@ void if6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
/* Check if address is a home address configured on any interface. */
-int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
+int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
{
int ret = 0;
struct inet6_ifaddr *ifp = NULL;
- struct hlist_node *n;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3164,27 +3571,31 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
* Periodic address status verification
*/
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
- struct hlist_node *node;
int i;
+ ASSERT_RTNL();
+
rcu_read_lock_bh();
- spin_lock(&addrconf_verify_lock);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp, node,
- &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
unsigned long age;
- if (ifp->flags & IFA_F_PERMANENT)
+ /* When setting preferred_lft to a value not zero or
+ * infinity, while valid_lft is infinity
+ * IFA_F_PERMANENT has a non-infinity life time.
+ */
+ if ((ifp->flags & IFA_F_PERMANENT) &&
+ (ifp->prefered_lft == INFINITY_LIFE_TIME))
continue;
spin_lock(&ifp->lock);
@@ -3209,7 +3620,8 @@ restart:
ifp->flags |= IFA_F_DEPRECATED;
}
- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+ if ((ifp->valid_lft != INFINITY_LIFE_TIME) &&
+ (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)))
next = ifp->tstamp + ifp->valid_lft * HZ;
spin_unlock(&ifp->lock);
@@ -3221,12 +3633,11 @@ restart:
in6_ifa_put(ifp);
goto restart;
}
-#ifdef CONFIG_IPV6_PRIVACY
} else if ((ifp->flags&IFA_F_TEMPORARY) &&
!(ifp->flags&IFA_F_TENTATIVE)) {
unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
ifp->idev->cnf.dad_transmits *
- ifp->idev->nd_parms->retrans_time / HZ;
+ NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ;
if (age >= ifp->prefered_lft - regen_advance) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
@@ -3249,7 +3660,6 @@ restart:
} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
spin_unlock(&ifp->lock);
-#endif
} else {
/* ifp->prefered_lft <= ifp->valid_lft */
if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -3270,27 +3680,38 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched));
-
- addr_chk_timer.expires = next_sched;
- add_timer(&addr_chk_timer);
- spin_unlock(&addrconf_verify_lock);
+ ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
+ mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
-static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+static void addrconf_verify_work(struct work_struct *w)
+{
+ rtnl_lock();
+ addrconf_verify_rtnl();
+ rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+ mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
+ struct in6_addr **peer_pfx)
{
struct in6_addr *pfx = NULL;
+ *peer_pfx = NULL;
+
if (addr)
pfx = nla_data(addr);
if (local) {
if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
- pfx = NULL;
- else
- pfx = nla_data(local);
+ *peer_pfx = pfx;
+ pfx = nla_data(local);
}
return pfx;
@@ -3300,15 +3721,17 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
[IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
+ [IFA_FLAGS] = { .len = sizeof(u32) },
};
static int
-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
+ struct in6_addr *pfx, *peer_pfx;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3316,23 +3739,37 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return err;
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+ /* We ignore other flags so far. */
+ ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+ return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen);
}
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
u32 prefered_lft, u32 valid_lft)
{
u32 flags;
clock_t expires;
unsigned long timeout;
+ bool was_managetempaddr;
+ bool had_prefixroute;
+
+ ASSERT_RTNL();
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;
+ if (ifa_flags & IFA_F_MANAGETEMPADDR &&
+ (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
+ return -EINVAL;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -3352,7 +3789,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
}
spin_lock_bh(&ifp->lock);
- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
+ was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
+ had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
+ !(ifp->flags & IFA_F_NOPREFIXROUTE);
+ ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD |
+ IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
+ IFA_F_NOPREFIXROUTE);
+ ifp->flags |= ifa_flags;
ifp->tstamp = jiffies;
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
@@ -3361,24 +3804,46 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
- expires, flags);
- addrconf_verify(0);
+ if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
+ expires, flags);
+ } else if (had_prefixroute) {
+ enum cleanup_prefix_rt_t action;
+ unsigned long rt_expires;
+
+ write_lock_bh(&ifp->idev->lock);
+ action = check_cleanup_prefix_route(ifp, &rt_expires);
+ write_unlock_bh(&ifp->idev->lock);
+
+ if (action != CLEANUP_PREFIX_RT_NOP) {
+ cleanup_prefix_route(ifp, rt_expires,
+ action == CLEANUP_PREFIX_RT_DEL);
+ }
+ }
+
+ if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
+ if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+ valid_lft = prefered_lft = 0;
+ manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft,
+ !was_managetempaddr, jiffies);
+ }
+
+ addrconf_verify_rtnl();
return 0;
}
static int
-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
+ struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
- u8 ifa_flags;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3386,7 +3851,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return err;
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
if (pfx == NULL)
return -EINVAL;
@@ -3405,16 +3870,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (dev == NULL)
return -ENODEV;
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
/* We ignore other flags so far. */
- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
+ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
+ IFA_F_NOPREFIXROUTE;
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (ifa == NULL) {
/*
* It would be best to check for !NLM_F_CREATE here but
- * userspace alreay relies on not having to provide this.
+ * userspace already relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, pfx,
+ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
preferred_lft, valid_lft);
}
@@ -3430,7 +3898,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return err;
}
-static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags,
u8 scope, int ifindex)
{
struct ifaddrmsg *ifm;
@@ -3471,24 +3939,27 @@ static inline int rt_scope(int ifa_scope)
static inline int inet6_ifaddr_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(16) /* IFA_LOCAL */
+ nla_total_size(16) /* IFA_ADDRESS */
- + nla_total_size(sizeof(struct ifa_cacheinfo));
+ + nla_total_size(sizeof(struct ifa_cacheinfo))
+ + nla_total_size(4) /* IFA_FLAGS */;
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct nlmsghdr *nlh;
u32 preferred, valid;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
- if (!(ifa->flags&IFA_F_PERMANENT)) {
+ if (!((ifa->flags&IFA_F_PERMANENT) &&
+ (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
valid = ifa->valid_lft;
if (preferred != INFINITY_LIFE_TIME) {
@@ -3509,17 +3980,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
valid = INFINITY_LIFE_TIME;
}
- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
- }
+ if (!ipv6_addr_any(&ifa->peer_addr)) {
+ if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 ||
+ nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0)
+ goto error;
+ } else
+ if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0)
+ goto error;
+
+ if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ goto error;
+
+ if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+ goto error;
return nlmsg_end(skb, nlh);
+
+error:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
- u32 pid, u32 seq, int event, u16 flags)
+ u32 portid, u32 seq, int event, u16 flags)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -3528,7 +4011,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3544,7 +4027,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
}
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -3553,7 +4036,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3594,12 +4077,13 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (++ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDR,
NLM_F_MULTI);
if (err <= 0)
break;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
}
break;
}
@@ -3610,7 +4094,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_GETMULTICAST,
NLM_F_MULTI);
@@ -3625,7 +4109,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_GETANYCAST,
NLM_F_MULTI);
@@ -3651,17 +4135,17 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
- struct hlist_node *node;
s_h = cb->args[0];
s_idx = idx = cb->args[1];
s_ip_idx = ip_idx = cb->args[2];
rcu_read_lock();
+ cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
if (h > s_h || idx > s_idx)
@@ -3709,13 +4193,12 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
return inet6_dump_addr(skb, cb, type);
}
-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
- void *arg)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *addr = NULL;
+ struct in6_addr *addr = NULL, *peer;
struct net_device *dev = NULL;
struct inet6_ifaddr *ifa;
struct sk_buff *skb;
@@ -3725,7 +4208,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
if (err < 0)
goto errout;
- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
if (addr == NULL) {
err = -EINVAL;
goto errout;
@@ -3747,7 +4230,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
goto errout_ifa;
}
- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+ err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWADDR, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3755,7 +4238,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
kfree_skb(skb);
goto errout_ifa;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout_ifa:
in6_ifa_put(ifa);
errout:
@@ -3805,13 +4288,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_SOLICIT_DELAY] =
jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
-#ifdef CONFIG_IPV6_PRIVACY
+ array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
+ jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
+ jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
-#endif
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
@@ -3834,6 +4319,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
+ array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
}
static inline size_t inet6_ifla6_size(void)
@@ -3842,7 +4329,8 @@ static inline size_t inet6_ifla6_size(void)
+ nla_total_size(sizeof(struct ifla_cacheinfo))
+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
- + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+ + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -3855,7 +4343,7 @@ static inline size_t inet6_if_nlmsg_size(void)
+ nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
}
-static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
int items, int bytes)
{
int i;
@@ -3865,12 +4353,12 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
/* Use put_unaligned() because stats may not be aligned for u64. */
put_unaligned(items, &stats[0]);
for (i = 1; i < items; i++)
- put_unaligned(snmp_fold_field(mib, i), &stats[i]);
+ put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
memset(&stats[items], 0, pad);
}
-static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
int items, int bytes, size_t syncpoff)
{
int i;
@@ -3890,11 +4378,11 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
{
switch (attrtype) {
case IFLA_INET6_STATS:
- __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+ __snmp6_fill_stats64(stats, idev->stats.ipv6,
IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
- __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+ __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
break;
}
}
@@ -3904,14 +4392,14 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
struct nlattr *nla;
struct ifla_cacheinfo ci;
- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
-
+ if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
+ goto nla_put_failure;
ci.max_reasm_len = IPV6_MAXPLEN;
ci.tstamp = cstamp_delta(idev->tstamp);
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
- ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
-
+ ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
+ if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
+ goto nla_put_failure;
nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
if (nla == NULL)
goto nla_put_failure;
@@ -3929,6 +4417,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
goto nla_put_failure;
snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+ nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
+ if (nla == NULL)
+ goto nla_put_failure;
+ read_lock_bh(&idev->lock);
+ memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
+ read_unlock_bh(&idev->lock);
+
return 0;
nla_put_failure:
@@ -3956,15 +4451,94 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
return 0;
}
+static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+{
+ struct inet6_ifaddr *ifp;
+ struct net_device *dev = idev->dev;
+ bool update_rs = false;
+ struct in6_addr ll_addr;
+
+ ASSERT_RTNL();
+
+ if (token == NULL)
+ return -EINVAL;
+ if (ipv6_addr_any(token))
+ return -EINVAL;
+ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+ return -EINVAL;
+ if (!ipv6_accept_ra(idev))
+ return -EINVAL;
+ if (idev->cnf.rtr_solicits <= 0)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+
+ BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
+ memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
+
+ write_unlock_bh(&idev->lock);
+
+ if (!idev->dead && (idev->if_flags & IF_READY) &&
+ !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
+ IFA_F_OPTIMISTIC)) {
+
+ /* If we're not ready, then normal ifup will take care
+ * of this. Otherwise, we need to request our rs here.
+ */
+ ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
+ update_rs = true;
+ }
+
+ write_lock_bh(&idev->lock);
+
+ if (update_rs) {
+ idev->if_flags |= IF_RS_SENT;
+ idev->rs_probes = 1;
+ addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+ }
+
+ /* Well, that's kinda nasty ... */
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ spin_lock(&ifp->lock);
+ if (ifp->tokenized) {
+ ifp->valid_lft = 0;
+ ifp->prefered_lft = 0;
+ }
+ spin_unlock(&ifp->lock);
+ }
+
+ write_unlock_bh(&idev->lock);
+ addrconf_verify_rtnl();
+ return 0;
+}
+
+static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+ int err = -EINVAL;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+ if (!idev)
+ return -EAFNOSUPPORT;
+
+ if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+ BUG();
+
+ if (tb[IFLA_INET6_TOKEN])
+ err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+
+ return err;
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3976,15 +4550,13 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_flags = dev_get_flags(dev);
hdr->ifi_change = 0;
- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
-
- if (dev->addr_len)
- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
-
- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
- if (dev->ifindex != dev->iflink)
- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
-
+ if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
+ (dev->addr_len &&
+ nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
+ nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+ (dev->ifindex != dev->iflink &&
+ nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+ goto nla_put_failure;
protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
if (protoinfo == NULL)
goto nla_put_failure;
@@ -4008,7 +4580,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
- struct hlist_node *node;
s_h = cb->args[0];
s_idx = cb->args[1];
@@ -4017,14 +4588,14 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
idev = __in6_dev_get(dev);
if (!idev)
goto cont;
if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWLINK, NLM_F_MULTI) <= 0)
goto out;
@@ -4057,11 +4628,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
return;
errout:
if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
@@ -4072,14 +4643,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
}
static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
- struct prefix_info *pinfo, u32 pid, u32 seq,
+ struct prefix_info *pinfo, u32 portid, u32 seq,
int event, unsigned int flags)
{
struct prefixmsg *pmsg;
struct nlmsghdr *nlh;
struct prefix_cacheinfo ci;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -4097,12 +4668,12 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
if (pinfo->autoconf)
pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
- NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
-
+ if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix))
+ goto nla_put_failure;
ci.preferred_time = ntohl(pinfo->prefered);
ci.valid_time = ntohl(pinfo->valid);
- NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
-
+ if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci))
+ goto nla_put_failure;
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -4137,6 +4708,11 @@ errout:
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
+ struct net *net = dev_net(ifp->idev->dev);
+
+ if (event)
+ ASSERT_RTNL();
+
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
@@ -4151,18 +4727,34 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->idev->dev, 0, 0);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
+ if (!ipv6_addr_any(&ifp->peer_addr)) {
+ struct rt6_info *rt;
+ struct net_device *dev = ifp->idev->dev;
+
+ rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
+ dev->ifindex, 1);
+ if (rt) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ }
+ }
dst_hold(&ifp->rt->dst);
- if (ifp->state == INET6_IFADDR_STATE_DEAD &&
- ip6_del_rt(ifp->rt))
+ if (ip6_del_rt(ifp->rt))
dst_free(&ifp->rt->dst);
break;
}
+ atomic_inc(&net->ipv6.dev_addr_genid);
+ rt_genid_bump_ipv6(net);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4176,15 +4768,23 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
#ifdef CONFIG_SYSCTL
static
-int addrconf_sysctl_forward(ctl_table *ctl, int write,
+int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
+ struct ctl_table lctl;
int ret;
- ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ /*
+ * ctl->data points to idev->cnf.forwarding, we should
+ * not modify it until we get the rtnl lock.
+ */
+ lctl = *ctl;
+ lctl.data = &val;
+
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
if (write)
ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4195,13 +4795,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
static void dev_disable_change(struct inet6_dev *idev)
{
+ struct netdev_notifier_info info;
+
if (!idev || !idev->dev)
return;
+ netdev_notifier_info_init(&info, idev->dev);
if (idev->cnf.disable_ipv6)
- addrconf_notify(NULL, NETDEV_DOWN, idev->dev);
+ addrconf_notify(NULL, NETDEV_DOWN, &info);
else
- addrconf_notify(NULL, NETDEV_UP, idev->dev);
+ addrconf_notify(NULL, NETDEV_UP, &info);
}
static void addrconf_disable_change(struct net *net, __s32 newf)
@@ -4222,26 +4825,27 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
rcu_read_unlock();
}
-static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
+static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
{
struct net *net;
+ int old;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
net = (struct net *)table->extra2;
+ old = *p;
+ *p = newf;
- if (p == &net->ipv6.devconf_dflt->disable_ipv6)
+ if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
+ rtnl_unlock();
return 0;
-
- if (!rtnl_trylock()) {
- /* Restore the original values before restarting */
- *p = old;
- return restart_syscall();
}
if (p == &net->ipv6.devconf_all->disable_ipv6) {
- __s32 newf = net->ipv6.devconf_all->disable_ipv6;
net->ipv6.devconf_dflt->disable_ipv6 = newf;
addrconf_disable_change(net, newf);
- } else if ((!*p) ^ (!old))
+ } else if ((!newf) ^ (!old))
dev_disable_change((struct inet6_dev *)table->extra1);
rtnl_unlock();
@@ -4249,15 +4853,23 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
}
static
-int addrconf_sysctl_disable(ctl_table *ctl, int write,
+int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
+ struct ctl_table lctl;
int ret;
- ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ /*
+ * ctl->data points to idev->cnf.disable_ipv6, we should
+ * not modify it until we get the rtnl lock.
+ */
+ lctl = *ctl;
+ lctl.data = &val;
+
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
if (write)
ret = addrconf_disable_ipv6(ctl, valp, val);
@@ -4266,11 +4878,50 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
return ret;
}
+static
+int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int ret;
+ int old, new;
+
+ old = *valp;
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ new = *valp;
+
+ if (write && old != new) {
+ struct net *net = ctl->extra2;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
+ inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+ else if (valp == &net->ipv6.devconf_all->proxy_ndp)
+ inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
+ else {
+ struct inet6_dev *idev = ctl->extra1;
+
+ inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ idev->dev->ifindex,
+ &idev->cnf);
+ }
+ rtnl_unlock();
+ }
+
+ return ret;
+}
+
+
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
- ctl_table addrconf_vars[DEVCONF_MAX+1];
- char *dev_name;
+ struct ctl_table addrconf_vars[DEVCONF_MAX+1];
} addrconf_sysctl __read_mostly = {
.sysctl_header = NULL,
.addrconf_vars = {
@@ -4351,7 +5002,22 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#ifdef CONFIG_IPV6_PRIVACY
+ {
+ .procname = "mldv1_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv1_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "mldv2_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv2_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
{
.procname = "use_tempaddr",
.data = &ipv6_devconf.use_tempaddr,
@@ -4387,7 +5053,6 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#endif
{
.procname = "max_addresses",
.data = &ipv6_devconf.max_addresses,
@@ -4439,7 +5104,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.proxy_ndp,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_sysctl_proxy_ndp,
},
{
.procname = "accept_source_route",
@@ -4489,6 +5154,20 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "ndisc_notify",
+ .data = &ipv6_devconf.ndisc_notify,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "suppress_frag_ndisc",
+ .data = &ipv6_devconf.suppress_frag_ndisc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
/* sentinel */
}
},
@@ -4499,17 +5178,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
{
int i;
struct addrconf_sysctl_table *t;
-
-#define ADDRCONF_CTL_PATH_DEV 3
-
- struct ctl_path addrconf_ctl_path[] = {
- { .procname = "net", },
- { .procname = "ipv6", },
- { .procname = "conf", },
- { /* to be set */ },
- { },
- };
-
+ char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
if (t == NULL)
@@ -4521,27 +5190,15 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
t->addrconf_vars[i].extra2 = net;
}
- /*
- * Make a copy of dev_name, because '.procname' is regarded as const
- * by sysctl and we wouldn't want anyone to change it under our feet
- * (see SIOCSIFNAME).
- */
- t->dev_name = kstrdup(dev_name, GFP_KERNEL);
- if (!t->dev_name)
- goto free;
+ snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
- addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
-
- t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
- t->addrconf_vars);
+ t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars);
if (t->sysctl_header == NULL)
- goto free_procname;
+ goto free;
p->sysctl = t;
return 0;
-free_procname:
- kfree(t->dev_name);
free:
kfree(t);
out:
@@ -4557,14 +5214,13 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
t = p->sysctl;
p->sysctl = NULL;
- unregister_sysctl_table(t->sysctl_header);
- kfree(t->dev_name);
+ unregister_net_sysctl_table(t->sysctl_header);
kfree(t);
}
static void addrconf_sysctl_register(struct inet6_dev *idev)
{
- neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6",
+ neigh_sysctl_register(idev->dev, idev->nd_parms,
&ndisc_ifinfo_sysctl_change);
__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev, &idev->cnf);
@@ -4581,26 +5237,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
static int __net_init addrconf_init_net(struct net *net)
{
- int err;
+ int err = -ENOMEM;
struct ipv6_devconf *all, *dflt;
- err = -ENOMEM;
- all = &ipv6_devconf;
- dflt = &ipv6_devconf_dflt;
+ all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
+ if (all == NULL)
+ goto err_alloc_all;
- if (!net_eq(net, &init_net)) {
- all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
- if (all == NULL)
- goto err_alloc_all;
+ dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+ if (dflt == NULL)
+ goto err_alloc_dflt;
- dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
- if (dflt == NULL)
- goto err_alloc_dflt;
- } else {
- /* these will be inherited by all namespaces */
- dflt->autoconf = ipv6_defaults.autoconf;
- dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
- }
+ /* these will be inherited by all namespaces */
+ dflt->autoconf = ipv6_defaults.autoconf;
+ dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
net->ipv6.devconf_all = all;
net->ipv6.devconf_dflt = dflt;
@@ -4645,26 +5295,11 @@ static struct pernet_operations addrconf_ops = {
.exit = addrconf_exit_net,
};
-/*
- * Device notifier
- */
-
-int register_inet6addr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&inet6addr_chain, nb);
-}
-EXPORT_SYMBOL(register_inet6addr_notifier);
-
-int unregister_inet6addr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
-}
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-
static struct rtnl_af_ops inet6_ops = {
.family = AF_INET6,
.fill_link_af = inet6_fill_link_af,
.get_link_af_size = inet6_get_link_af_size,
+ .set_link_af = inet6_set_link_af,
};
/*
@@ -4677,8 +5312,8 @@ int __init addrconf_init(void)
err = ipv6_addr_label_init();
if (err < 0) {
- printk(KERN_CRIT "IPv6 Addrconf:"
- " cannot initialize default policy table: %d.\n", err);
+ pr_crit("%s: cannot initialize default policy table: %d\n",
+ __func__, err);
goto out;
}
@@ -4686,6 +5321,12 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
+ addrconf_wq = create_workqueue("ipv6_addrconf");
+ if (!addrconf_wq) {
+ err = -ENOMEM;
+ goto out_nowq;
+ }
+
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
@@ -4716,31 +5357,36 @@ int __init addrconf_init(void)
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify(0);
+ addrconf_verify();
- err = rtnl_af_register(&inet6_ops);
- if (err < 0)
- goto errout_af;
+ rtnl_af_register(&inet6_ops);
- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+ err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
+ NULL);
if (err < 0)
goto errout;
/* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+ __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
+ inet6_dump_ifaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
+ inet6_dump_ifmcaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
+ inet6_dump_ifacaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
+ inet6_netconf_dump_devconf, NULL);
ipv6_addr_label_rtnl_register();
return 0;
errout:
rtnl_af_unregister(&inet6_ops);
-errout_af:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
+ destroy_workqueue(addrconf_wq);
+out_nowq:
unregister_pernet_subsys(&addrconf_ops);
out_addrlabel:
ipv6_addr_label_cleanup();
@@ -4776,7 +5422,8 @@ void addrconf_cleanup(void)
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
spin_unlock_bh(&addrconf_hash_lock);
-
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
+
+ destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 6b03826552e..e6960457f62 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -3,13 +3,16 @@
* not configured or static.
*/
+#include <linux/export.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
-static inline unsigned ipv6_addr_scope2type(unsigned scope)
+static inline unsigned int ipv6_addr_scope2type(unsigned int scope)
{
- switch(scope) {
+ switch (scope) {
case IPV6_ADDR_SCOPE_NODELOCAL:
return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) |
IPV6_ADDR_LOOPBACK);
@@ -77,3 +80,71 @@ int __ipv6_addr_type(const struct in6_addr *addr)
}
EXPORT_SYMBOL(__ipv6_addr_type);
+static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+
+int register_inet6addr_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
+int unregister_inet6addr_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
+int inet6addr_notifier_call_chain(unsigned long val, void *v)
+{
+ return atomic_notifier_call_chain(&inet6addr_chain, val, v);
+}
+EXPORT_SYMBOL(inet6addr_notifier_call_chain);
+
+const struct ipv6_stub *ipv6_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_stub);
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+EXPORT_SYMBOL(in6addr_loopback);
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+EXPORT_SYMBOL(in6addr_any);
+const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allnodes);
+const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allrouters);
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
+
+static void snmp6_free_dev(struct inet6_dev *idev)
+{
+ kfree(idev->stats.icmpv6msgdev);
+ kfree(idev->stats.icmpv6dev);
+ free_percpu(idev->stats.ipv6);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+ struct net_device *dev = idev->dev;
+
+ WARN_ON(!list_empty(&idev->addr_list));
+ WARN_ON(idev->mc_list != NULL);
+ WARN_ON(timer_pending(&idev->rs_timer));
+
+#ifdef NET_REFCNT_DEBUG
+ pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
+#endif
+ dev_put(dev);
+ if (!idev->dead) {
+ pr_warn("Freeing alive inet6 device %p\n", idev);
+ return;
+ }
+ snmp6_free_dev(idev);
+ kfree_rcu(idev, rcu);
+}
+EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index c8993e5a337..731e1e1722d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -6,7 +6,7 @@
*/
/*
* Author:
- * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
*/
#include <linux/kernel.h>
@@ -22,14 +22,13 @@
#if 0
#define ADDRLABEL(x...) printk(x)
#else
-#define ADDRLABEL(x...) do { ; } while(0)
+#define ADDRLABEL(x...) do { ; } while (0)
#endif
/*
* Policy Table
*/
-struct ip6addrlbl_entry
-{
+struct ip6addrlbl_entry {
#ifdef CONFIG_NET_NS
struct net *lbl_net;
#endif
@@ -57,7 +56,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
}
/*
- * Default policy table (RFC3484 + extensions)
+ * Default policy table (RFC6724 + extensions)
*
* prefix addr_type label
* -------------------------------------------------------------------------
@@ -69,13 +68,17 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
* fc00::/7 N/A 5 ULA (RFC 4193)
* 2001::/32 N/A 6 Teredo (RFC 4380)
* 2001:10::/28 N/A 7 ORCHID (RFC 4843)
+ * fec0::/10 N/A 11 Site-local
+ * (deprecated by RFC3879)
+ * 3ffe::/16 N/A 12 6bone
*
* Note: 0xffffffff is used if we do not have any policies.
+ * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
*/
#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
-static const __net_initdata struct ip6addrlbl_init_table
+static const __net_initconst struct ip6addrlbl_init_table
{
const struct in6_addr *prefix;
int prefixlen;
@@ -84,31 +87,39 @@ static const __net_initdata struct ip6addrlbl_init_table
{ /* ::/0 */
.prefix = &in6addr_any,
.label = 1,
- },{ /* fc00::/7 */
- .prefix = &(struct in6_addr){{{ 0xfc }}},
+ }, { /* fc00::/7 */
+ .prefix = &(struct in6_addr){ { { 0xfc } } } ,
.prefixlen = 7,
.label = 5,
- },{ /* 2002::/16 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ }, { /* fec0::/10 */
+ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
+ .prefixlen = 10,
+ .label = 11,
+ }, { /* 2002::/16 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
.prefixlen = 16,
.label = 2,
- },{ /* 2001::/32 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ }, { /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
+ .prefixlen = 16,
+ .label = 12,
+ }, { /* 2001::/32 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
.prefixlen = 32,
.label = 6,
- },{ /* 2001:10::/28 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ }, { /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
.prefixlen = 28,
.label = 7,
- },{ /* ::ffff:0:0 */
- .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ }, { /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
.prefixlen = 96,
.label = 4,
- },{ /* ::/96 */
+ }, { /* ::/96 */
.prefix = &in6addr_any,
.prefixlen = 96,
.label = 3,
- },{ /* ::1/128 */
+ }, { /* ::1/128 */
.prefix = &in6addr_loopback,
.prefixlen = 128,
.label = 0,
@@ -129,7 +140,7 @@ static void ip6addrlbl_free_rcu(struct rcu_head *h)
ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
}
-static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
+static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
{
return atomic_inc_not_zero(&p->refcnt);
}
@@ -141,29 +152,28 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
}
/* Find label */
-static int __ip6addrlbl_match(struct net *net,
- struct ip6addrlbl_entry *p,
- const struct in6_addr *addr,
- int addrtype, int ifindex)
+static bool __ip6addrlbl_match(struct net *net,
+ const struct ip6addrlbl_entry *p,
+ const struct in6_addr *addr,
+ int addrtype, int ifindex)
{
if (!net_eq(ip6addrlbl_net(p), net))
- return 0;
+ return false;
if (p->ifindex && p->ifindex != ifindex)
- return 0;
+ return false;
if (p->addrtype && p->addrtype != addrtype)
- return 0;
+ return false;
if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
- return 0;
- return 1;
+ return false;
+ return true;
}
static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
const struct in6_addr *addr,
int type, int ifindex)
{
- struct hlist_node *pos;
struct ip6addrlbl_entry *p;
- hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (__ip6addrlbl_match(net, p, addr, type, ifindex))
return p;
}
@@ -240,38 +250,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
/* add a label */
static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
{
+ struct hlist_node *n;
+ struct ip6addrlbl_entry *last = NULL, *p = NULL;
int ret = 0;
- ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
- __func__,
- newp, replace);
+ ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
+ replace);
- if (hlist_empty(&ip6addrlbl_table.head)) {
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
- } else {
- struct hlist_node *pos, *n;
- struct ip6addrlbl_entry *p = NULL;
- hlist_for_each_entry_safe(p, pos, n,
- &ip6addrlbl_table.head, list) {
- if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
- p->ifindex == newp->ifindex &&
- ipv6_addr_equal(&p->prefix, &newp->prefix)) {
- if (!replace) {
- ret = -EEXIST;
- goto out;
- }
- hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
- goto out;
- } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
- (p->prefixlen < newp->prefixlen)) {
- hlist_add_before_rcu(&newp->list, &p->list);
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ if (p->prefixlen == newp->prefixlen &&
+ net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+ p->ifindex == newp->ifindex &&
+ ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+ if (!replace) {
+ ret = -EEXIST;
goto out;
}
+ hlist_replace_rcu(&p->list, &newp->list);
+ ip6addrlbl_put(p);
+ goto out;
+ } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+ (p->prefixlen < newp->prefixlen)) {
+ hlist_add_before_rcu(&newp->list, &p->list);
+ goto out;
}
- hlist_add_after_rcu(&p->list, &newp->list);
+ last = p;
}
+ if (last)
+ hlist_add_after_rcu(&last->list, &newp->list);
+ else
+ hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
if (!ret)
ip6addrlbl_table.seq++;
@@ -307,13 +315,13 @@ static int __ip6addrlbl_del(struct net *net,
int ifindex)
{
struct ip6addrlbl_entry *p = NULL;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
int ret = -ESRCH;
ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
__func__, prefix, prefixlen, ifindex);
- hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
net_eq(ip6addrlbl_net(p), net) &&
p->ifindex == ifindex &&
@@ -350,7 +358,7 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
int err = 0;
int i;
- ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
+ ADDRLABEL(KERN_DEBUG "%s\n", __func__);
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
int ret = ip6addrlbl_add(net,
@@ -368,11 +376,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
static void __net_exit ip6addrlbl_net_exit(struct net *net)
{
struct ip6addrlbl_entry *p = NULL;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
/* Remove all labels belonging to the exiting net */
spin_lock(&ip6addrlbl_table.lock);
- hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
if (net_eq(ip6addrlbl_net(p), net)) {
hlist_del_rcu(&p->list);
ip6addrlbl_put(p);
@@ -403,8 +411,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
[IFAL_LABEL] = { .len = sizeof(u32), },
};
-static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
- void *arg)
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifaddrlblmsg *ifal;
@@ -425,10 +432,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!tb[IFAL_ADDRESS])
return -EINVAL;
-
pfx = nla_data(tb[IFAL_ADDRESS]);
- if (!pfx)
- return -EINVAL;
if (!tb[IFAL_LABEL])
return -EINVAL;
@@ -436,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
if (label == IPV6_ADDR_LABEL_DEFAULT)
return -EINVAL;
- switch(nlh->nlmsg_type) {
+ switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
!__dev_get_by_index(net, ifal->ifal_index))
@@ -456,8 +460,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
-static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
- int prefixlen, int ifindex, u32 lseq)
+static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
+ int prefixlen, int ifindex, u32 lseq)
{
struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
ifal->ifal_family = AF_INET6;
@@ -470,10 +474,10 @@ static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
static int ip6addrlbl_fill(struct sk_buff *skb,
struct ip6addrlbl_entry *p,
u32 lseq,
- u32 pid, u32 seq, int event,
+ u32 portid, u32 seq, int event,
unsigned int flags)
{
- struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
+ struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
sizeof(struct ifaddrlblmsg), flags);
if (!nlh)
return -EMSGSIZE;
@@ -493,20 +497,20 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
- struct hlist_node *pos;
int idx = 0, s_idx = cb->args[0];
int err;
rcu_read_lock();
- hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
- if ((err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDRLABEL,
- NLM_F_MULTI)) <= 0)
+ err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI);
+ if (err <= 0)
break;
}
idx++;
@@ -523,8 +527,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
- void *arg)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -551,10 +554,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
if (!tb[IFAL_ADDRESS])
return -EINVAL;
-
addr = nla_data(tb[IFAL_ADDRESS]);
- if (!addr)
- return -EINVAL;
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
@@ -568,13 +568,14 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
goto out;
}
- if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+ if (!skb) {
ip6addrlbl_put(p);
return -ENOBUFS;
}
err = ip6addrlbl_fill(skb, p, lseq,
- NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
RTM_NEWADDRLABEL, 0);
ip6addrlbl_put(p);
@@ -585,15 +586,18 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
goto out;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
out:
return err;
}
void __init ipv6_addr_label_rtnl_register(void)
{
- __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
- __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
- __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
+ __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
+ NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
+ NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
+ ip6addrlbl_dump, NULL);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 54e8e42f7a8..7cb4392690d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -18,6 +18,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "IPv6: " fmt
#include <linux/module.h>
#include <linux/capability.h>
@@ -48,19 +49,19 @@
#include <net/udp.h>
#include <net/udplite.h>
#include <net/tcp.h>
-#include <net/ipip.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/route.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/ndisc.h>
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
#endif
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <linux/mroute6.h>
MODULE_AUTHOR("Cast of dozens");
@@ -78,7 +79,7 @@ struct ipv6_params ipv6_defaults = {
.autoconf = 1,
};
-static int disable_ipv6_mod = 0;
+static int disable_ipv6_mod;
module_param_named(disable, disable_ipv6_mod, int, 0444);
MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
@@ -105,15 +106,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
- if (sock->type != SOCK_RAW &&
- sock->type != SOCK_DGRAM &&
- !inet_ehash_secret)
- build_ehash_secret();
-
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -160,12 +155,12 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern &&
+ !ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -179,9 +174,8 @@ lookup_protocol:
sock_init_data(sock, sk);
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
- sk->sk_reuse = 1;
+ sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
@@ -214,8 +208,9 @@ lookup_protocol:
inet->mc_ttl = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
+ inet->rcv_tos = 0;
- if (ipv4_config.no_pmtu_disc)
+ if (net->ipv4.sysctl_ip_no_pmtu_disc)
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -256,7 +251,7 @@ out_rcu_unlock:
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -272,12 +267,16 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+
+ if (addr->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
addr_type = ipv6_addr_type(&addr->sin6_addr);
if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
lock_sock(sk);
@@ -300,7 +299,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- /* Reproduce AF_INET checks to make the bindings consitant */
+ /* Reproduce AF_INET checks to make the bindings consistent */
v4addr = addr->sin6_addr.s6_addr32[3];
chk_addr_ret = inet_addr_type(net, v4addr);
if (!sysctl_ip_nonlocal_bind &&
@@ -317,7 +316,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net_device *dev = NULL;
rcu_read_lock();
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
addr->sin6_scope_id) {
/* Override any existing binding, if another one
@@ -343,7 +342,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (!inet->transparent &&
+ if (!(inet->freebind || inet->transparent) &&
!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
err = -EADDRNOTAVAIL;
@@ -357,10 +356,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_rcv_saddr = v4addr;
inet->inet_saddr = v4addr;
- ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+ sk->sk_v6_rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
- ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+ np->saddr = addr->sin6_addr;
/* Make sure we are allowed to bind here. */
if (sk->sk_prot->get_port(sk, snum)) {
@@ -386,7 +385,6 @@ out_unlock:
rcu_read_unlock();
goto out;
}
-
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
@@ -404,7 +402,6 @@ int inet6_release(struct socket *sock)
return inet_release(sock);
}
-
EXPORT_SYMBOL(inet6_release);
void inet6_destroy_sock(struct sock *sk)
@@ -415,10 +412,12 @@ void inet6_destroy_sock(struct sock *sk)
/* Release rx options */
- if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
+ skb = xchg(&np->pktoptions, NULL);
+ if (skb != NULL)
kfree_skb(skb);
- if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
+ skb = xchg(&np->rxpmtu, NULL);
+ if (skb != NULL)
kfree_skb(skb);
/* Free flowlabels */
@@ -426,10 +425,10 @@ void inet6_destroy_sock(struct sock *sk)
/* Free tx options */
- if ((opt = xchg(&np->opt, NULL)) != NULL)
+ opt = xchg(&np->opt, NULL);
+ if (opt != NULL)
sock_kfree_s(sk, opt, opt->tot_len);
}
-
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
/*
@@ -439,7 +438,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
{
- struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
+ struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -454,23 +453,22 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
peer == 1)
return -ENOTCONN;
sin->sin6_port = inet->inet_dport;
- ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+ sin->sin6_addr = sk->sk_v6_daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
} else {
- if (ipv6_addr_any(&np->rcv_saddr))
- ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ sin->sin6_addr = np->saddr;
else
- ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+ sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
}
- if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin->sin6_scope_id = sk->sk_bound_dev_if;
+ sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
+ sk->sk_bound_dev_if);
*uaddr_len = sizeof(*sin);
return 0;
}
-
EXPORT_SYMBOL(inet6_getname);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -478,8 +476,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- switch(cmd)
- {
+ switch (cmd) {
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *)arg);
@@ -505,7 +502,6 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
/*NOTREACHED*/
return 0;
}
-
EXPORT_SYMBOL(inet6_ioctl);
const struct proto_ops inet6_stream_ops = {
@@ -611,25 +607,21 @@ out:
return ret;
out_permanent:
- printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
- protocol);
+ pr_err("Attempt to override permanent protocol %d\n", protocol);
goto out;
out_illegal:
- printk(KERN_ERR
- "Ignoring attempt to register invalid socket type %d.\n",
+ pr_err("Ignoring attempt to register invalid socket type %d\n",
p->type);
goto out;
}
-
EXPORT_SYMBOL(inet6_register_protosw);
void
inet6_unregister_protosw(struct inet_protosw *p)
{
if (INET_PROTOSW_PERMANENT & p->flags) {
- printk(KERN_ERR
- "Attempt to unregister permanent protocol %d.\n",
+ pr_err("Attempt to unregister permanent protocol %d\n",
p->protocol);
} else {
spin_lock_bh(&inetsw6_lock);
@@ -639,46 +631,38 @@ inet6_unregister_protosw(struct inet_protosw *p)
synchronize_net();
}
}
-
EXPORT_SYMBOL(inet6_unregister_protosw);
int inet6_sk_rebuild_header(struct sock *sk)
{
- int err;
- struct dst_entry *dst;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct dst_entry *dst;
dst = __sk_dst_check(sk, np->dst_cookie);
if (dst == NULL) {
struct inet_sock *inet = inet_sk(sk);
struct in6_addr *final_p, final;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = sk->sk_protocol;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.fl6_flowlabel = np->flow_label;
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet->inet_dport;
- fl.fl_ip_sport = inet->inet_sport;
- security_sk_classify_flow(sk, &fl);
-
- final_p = fl6_update_dst(&fl, np->opt, &final);
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err) {
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = sk->sk_protocol;
+ fl6.daddr = sk->sk_v6_daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowlabel = np->flow_label;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet->inet_dport;
+ fl6.fl6_sport = inet->inet_sport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
sk->sk_route_caps = 0;
- return err;
- }
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
- sk->sk_err_soft = -err;
- return err;
+ sk->sk_err_soft = -PTR_ERR(dst);
+ return PTR_ERR(dst);
}
__ip6_dst_store(sk, dst, NULL, NULL);
@@ -686,276 +670,31 @@ int inet6_sk_rebuild_header(struct sock *sk)
return 0;
}
-
EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
-int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_skb_parm *opt = IP6CB(skb);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct inet6_skb_parm *opt = IP6CB(skb);
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
np->rxopt.bits.ohopopts)) ||
- ((IPV6_FLOWINFO_MASK &
- *(__be32 *)skb_network_header(skb)) &&
+ (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
np->rxopt.bits.osrcrt)) ||
((opt->dst1 || opt->dst0) &&
(np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
-
EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
-static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
-{
- const struct inet6_protocol *ops = NULL;
-
- for (;;) {
- struct ipv6_opt_hdr *opth;
- int len;
-
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_protos[proto]);
-
- if (unlikely(!ops))
- break;
-
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
-
- if (unlikely(!pskb_may_pull(skb, 8)))
- break;
-
- opth = (void *)skb->data;
- len = ipv6_optlen(opth);
-
- if (unlikely(!pskb_may_pull(skb, len)))
- break;
-
- proto = opth->nexthdr;
- __skb_pull(skb, len);
- }
-
- return proto;
-}
-
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
- struct ipv6hdr *ipv6h;
- const struct inet6_protocol *ops;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- err = -EPROTONOSUPPORT;
-
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[
- ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
- if (likely(ops && ops->gso_send_check)) {
- skb_reset_transport_header(skb);
- err = ops->gso_send_check(skb);
- }
- rcu_read_unlock();
-
-out:
- return err;
-}
-
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct ipv6hdr *ipv6h;
- const struct inet6_protocol *ops;
- int proto;
- struct frag_hdr *fptr;
- unsigned int unfrag_ip6hlen;
- u8 *prevhdr;
- int offset = 0;
-
- if (!(features & NETIF_F_V6_CSUM))
- features &= ~NETIF_F_SG;
-
- if (unlikely(skb_shinfo(skb)->gso_type &
- ~(SKB_GSO_UDP |
- SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_TCPV6 |
- 0)))
- goto out;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- segs = ERR_PTR(-EPROTONOSUPPORT);
-
- proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[proto]);
- if (likely(ops && ops->gso_segment)) {
- skb_reset_transport_header(skb);
- segs = ops->gso_segment(skb, features);
- }
- rcu_read_unlock();
-
- if (unlikely(IS_ERR(segs)))
- goto out;
-
- for (skb = segs; skb; skb = skb->next) {
- ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(skb->len - skb->mac_len -
- sizeof(*ipv6h));
- if (proto == IPPROTO_UDP) {
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)(skb_network_header(skb) +
- unfrag_ip6hlen);
- fptr->frag_off = htons(offset);
- if (skb->next != NULL)
- fptr->frag_off |= htons(IP6_MF);
- offset += (ntohs(ipv6h->payload_len) -
- sizeof(struct frag_hdr));
- }
- }
-
-out:
- return segs;
-}
-
-struct ipv6_gro_cb {
- struct napi_gro_cb napi;
- int proto;
-};
-
-#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb)
-
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- const struct inet6_protocol *ops;
- struct sk_buff **pp = NULL;
- struct sk_buff *p;
- struct ipv6hdr *iph;
- unsigned int nlen;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int proto;
- __wsum csum;
-
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
-
- skb_gro_pull(skb, sizeof(*iph));
- skb_set_transport_header(skb, skb_gro_offset(skb));
-
- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
-
- rcu_read_lock();
- proto = iph->nexthdr;
- ops = rcu_dereference(inet6_protos[proto]);
- if (!ops || !ops->gro_receive) {
- __pskb_pull(skb, skb_gro_offset(skb));
- proto = ipv6_gso_pull_exthdrs(skb, proto);
- skb_gro_pull(skb, -skb_transport_offset(skb));
- skb_reset_transport_header(skb);
- __skb_push(skb, skb_gro_offset(skb));
-
- if (!ops || !ops->gro_receive)
- goto out_unlock;
-
- iph = ipv6_hdr(skb);
- }
-
- IPV6_GRO_CB(skb)->proto = proto;
-
- flush--;
- nlen = skb_network_header_len(skb);
-
- for (p = *head; p; p = p->next) {
- struct ipv6hdr *iph2;
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- iph2 = ipv6_hdr(p);
-
- /* All fields must match except length. */
- if (nlen != skb_network_header_len(p) ||
- memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) ||
- memcmp(&iph->nexthdr, &iph2->nexthdr,
- nlen - offsetof(struct ipv6hdr, nexthdr))) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
-
- NAPI_GRO_CB(p)->flush |= flush;
- }
-
- NAPI_GRO_CB(skb)->flush |= flush;
-
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
-
- pp = ops->gro_receive(head, skb);
-
- skb->csum = csum;
-
-out_unlock:
- rcu_read_unlock();
-
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-static int ipv6_gro_complete(struct sk_buff *skb)
-{
- const struct inet6_protocol *ops;
- struct ipv6hdr *iph = ipv6_hdr(skb);
- int err = -ENOSYS;
-
- iph->payload_len = htons(skb->len - skb_network_offset(skb) -
- sizeof(*iph));
-
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]);
- if (WARN_ON(!ops || !ops->gro_complete))
- goto out_unlock;
-
- err = ops->gro_complete(skb);
-
-out_unlock:
- rcu_read_unlock();
-
- return err;
-}
-
static struct packet_type ipv6_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.func = ipv6_rcv,
- .gso_send_check = ipv6_gso_send_check,
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
};
static int __init ipv6_packet_init(void)
@@ -971,46 +710,52 @@ static void ipv6_packet_cleanup(void)
static int __net_init ipv6_init_mibs(struct net *net)
{
- if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ int i;
+
+ net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udp_stats_in6)
return -ENOMEM;
- if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udplite_stats_in6)
goto err_udplite_mib;
- if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+ if (!net->mib.ipv6_statistics)
goto err_ip_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
- sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+
+ for_each_possible_cpu(i) {
+ struct ipstats_mib *af_inet6_stats;
+ af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
+ u64_stats_init(&af_inet6_stats->syncp);
+ }
+
+
+ net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+ if (!net->mib.icmpv6_statistics)
goto err_icmp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
- sizeof(struct icmpv6msg_mib),
- __alignof__(struct icmpv6msg_mib)) < 0)
+ net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
+ GFP_KERNEL);
+ if (!net->mib.icmpv6msg_statistics)
goto err_icmpmsg_mib;
return 0;
err_icmpmsg_mib:
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
err_icmp_mib:
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+ free_percpu(net->mib.ipv6_statistics);
err_ip_mib:
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
err_udplite_mib:
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+ free_percpu(net->mib.udp_stats_in6);
return -ENOMEM;
}
static void ipv6_cleanup_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics);
+ free_percpu(net->mib.udp_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
+ free_percpu(net->mib.ipv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
+ kfree(net->mib.icmpv6msg_statistics);
}
static int __net_init inet6_net_init(struct net *net)
@@ -1019,6 +764,8 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
+ net->ipv6.sysctl.flowlabel_consistency = 1;
+ atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
if (err)
@@ -1062,22 +809,28 @@ static struct pernet_operations inet6_net_ops = {
.exit = inet6_net_exit,
};
+static const struct ipv6_stub ipv6_stub_impl = {
+ .ipv6_sock_mc_join = ipv6_sock_mc_join,
+ .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
+ .ipv6_dst_lookup = ip6_dst_lookup,
+ .udpv6_encap_enable = udpv6_encap_enable,
+ .ndisc_send_na = ndisc_send_na,
+ .nd_tbl = &nd_tbl,
+};
+
static int __init inet6_init(void)
{
- struct sk_buff *dummy_skb;
struct list_head *r;
int err = 0;
- BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+ BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
/* Register the socket-side information for inet6_create. */
- for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
INIT_LIST_HEAD(r);
if (disable_ipv6_mod) {
- printk(KERN_INFO
- "IPv6: Loaded, but administratively disabled, "
- "reboot required to enable\n");
+ pr_info("Loaded, but administratively disabled, reboot required to enable\n");
goto out;
}
@@ -1097,6 +850,9 @@ static int __init inet6_init(void)
if (err)
goto out_unregister_udplite_proto;
+ err = proto_register(&pingv6_prot, 1);
+ if (err)
+ goto out_unregister_ping_proto;
/* We MUST register RAW sockets before we create the ICMP6,
* IGMP6, or NDISC control sockets.
@@ -1112,15 +868,10 @@ static int __init inet6_init(void)
if (err)
goto out_sock_register_fail;
-#ifdef CONFIG_SYSCTL
- err = ipv6_static_sysctl_register();
- if (err)
- goto static_sysctl_fail;
-#endif
/*
* ipngwg API draft makes clear that the correct semantics
* for TCP and UDP is to consider one TCP and UDP instance
- * in a host availiable by both INET and INET6 APIs and
+ * in a host available by both INET and INET6 APIs and
* able to communicate via both network protocols.
*/
@@ -1139,6 +890,9 @@ static int __init inet6_init(void)
err = igmp6_init();
if (err)
goto igmp_fail;
+
+ ipv6_stub = &ipv6_stub_impl;
+
err = ipv6_netfilter_init();
if (err)
goto netfilter_fail;
@@ -1157,6 +911,9 @@ static int __init inet6_init(void)
err = ip6_route_init();
if (err)
goto ip6_route_fail;
+ err = ndisc_late_init();
+ if (err)
+ goto ndisc_late_fail;
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
@@ -1190,6 +947,10 @@ static int __init inet6_init(void)
if (err)
goto ipv6_packet_fail;
+ err = pingv6_init();
+ if (err)
+ goto pingv6_fail;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -1200,8 +961,10 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- ipv6_packet_cleanup();
+ pingv6_exit();
#endif
+pingv6_fail:
+ ipv6_packet_cleanup();
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
@@ -1217,6 +980,8 @@ ipv6_exthdrs_fail:
addrconf_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
+ ndisc_late_cleanup();
+ndisc_late_fail:
ip6_route_cleanup();
ip6_route_fail:
#ifdef CONFIG_PROC_FS
@@ -1241,14 +1006,12 @@ ipmr_fail:
icmp_fail:
unregister_pernet_subsys(&inet6_net_ops);
register_pernet_fail:
-#ifdef CONFIG_SYSCTL
- ipv6_static_sysctl_unregister();
-static_sysctl_fail:
-#endif
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
out_sock_register_fail:
rawv6_exit();
+out_unregister_ping_proto:
+ proto_unregister(&pingv6_prot);
out_unregister_raw_proto:
proto_unregister(&rawv6_prot);
out_unregister_udplite_proto:
@@ -1261,56 +1024,4 @@ out_unregister_tcp_proto:
}
module_init(inet6_init);
-static void __exit inet6_exit(void)
-{
- if (disable_ipv6_mod)
- return;
-
- /* First of all disallow new sockets creation. */
- sock_unregister(PF_INET6);
- /* Disallow any further netlink messages */
- rtnl_unregister_all(PF_INET6);
-
-#ifdef CONFIG_SYSCTL
- ipv6_sysctl_unregister();
-#endif
- udpv6_exit();
- udplitev6_exit();
- tcpv6_exit();
-
- /* Cleanup code parts. */
- ipv6_packet_cleanup();
- ipv6_frag_exit();
- ipv6_exthdrs_exit();
- addrconf_cleanup();
- ip6_flowlabel_cleanup();
- ip6_route_cleanup();
-#ifdef CONFIG_PROC_FS
-
- /* Cleanup code parts. */
- if6_proc_exit();
- ipv6_misc_proc_exit();
- udplite6_proc_exit();
- raw6_proc_exit();
-#endif
- ipv6_netfilter_fini();
- igmp6_cleanup();
- ndisc_cleanup();
- ip6_mr_cleanup();
- icmpv6_cleanup();
- rawv6_exit();
-
- unregister_pernet_subsys(&inet6_net_ops);
-#ifdef CONFIG_SYSCTL
- ipv6_static_sysctl_unregister();
-#endif
- proto_unregister(&rawv6_prot);
- proto_unregister(&udplitev6_prot);
- proto_unregister(&udpv6_prot);
- proto_unregister(&tcpv6_prot);
-
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
-}
-module_exit(inet6_exit);
-
MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ee82d4ef26c..72a4930bdc0 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors
*
@@ -24,6 +23,8 @@
* This file is derived from net/ipv4/ah.c.
*/
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <crypto/hash.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -33,6 +34,7 @@
#include <linux/pfkeyv2.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -41,7 +43,7 @@
#define IPV6HDR_BASELEN 8
struct tmp_ext {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
struct in6_addr saddr;
#endif
struct in6_addr daddr;
@@ -111,7 +113,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
__alignof__(struct scatterlist));
}
-static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
+static bool zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
{
u8 *opt = (u8 *)opthdr;
int len = ipv6_optlen(opthdr);
@@ -125,7 +127,7 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
switch (opt[off]) {
- case IPV6_TLV_PAD0:
+ case IPV6_TLV_PAD1:
optlen = 1;
break;
default:
@@ -143,13 +145,13 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
len -= optlen;
}
if (len == 0)
- return 1;
+ return true;
bad:
- return 0;
+ return false;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
/**
* ipv6_rearrange_destopt - rearrange IPv6 destination options header
* @iph: IPv6 header
@@ -169,7 +171,7 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
switch (opt[off]) {
- case IPV6_TLV_PAD0:
+ case IPV6_TLV_PAD1:
optlen = 1;
break;
default:
@@ -189,13 +191,13 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
hao = (struct ipv6_destopt_hao *)&opt[off];
if (hao->length != sizeof(hao->addr)) {
- if (net_ratelimit())
- printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
+ net_warn_ratelimited("destopt hao: invalid header length: %u\n",
+ hao->length);
goto bad;
}
- ipv6_addr_copy(&final_addr, &hao->addr);
- ipv6_addr_copy(&hao->addr, &iph->saddr);
- ipv6_addr_copy(&iph->saddr, &final_addr);
+ final_addr = hao->addr;
+ hao->addr = iph->saddr;
+ iph->saddr = final_addr;
}
break;
}
@@ -241,13 +243,13 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
segments = rthdr->hdrlen >> 1;
addrs = ((struct rt0_hdr *)rthdr)->addr;
- ipv6_addr_copy(&final_addr, addrs + segments - 1);
+ final_addr = addrs[segments - 1];
addrs += segments - segments_left;
memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
- ipv6_addr_copy(addrs, &iph->daddr);
- ipv6_addr_copy(&iph->daddr, &final_addr);
+ addrs[0] = iph->daddr;
+ iph->daddr = final_addr;
}
static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
@@ -317,15 +319,13 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(&top_iph->saddr, iph_ext, extlen);
#else
memcpy(&top_iph->daddr, iph_ext, extlen);
#endif
}
- err = ah->nexthdr;
-
kfree(AH_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
}
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -384,7 +395,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(iph_ext, &top_iph->saddr, extlen);
#else
memcpy(iph_ext, &top_iph->daddr, extlen);
@@ -409,12 +420,17 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->reserved = 0;
ah->spi = x->id.spi;
- ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
+ ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
@@ -433,7 +449,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(&top_iph->saddr, iph_ext, extlen);
#else
memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -466,12 +482,15 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
if (err)
goto out;
+ err = ah->nexthdr;
+
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, hdr_len);
__skb_pull(skb, ah_hlen + hdr_len);
- skb_set_transport_header(skb, -hdr_len);
-
- err = ah->nexthdr;
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -hdr_len);
out:
kfree(AH_SKB_CB(skb)->tmp);
xfrm_input_resume(skb, err);
@@ -511,14 +530,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
/* We are going to _remove_ AH header to keep sockets happy,
* so... Later this can change. */
- if (skb_cloned(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(skb, GFP_ATOMIC))
goto out;
skb->ip_summed = CHECKSUM_NONE;
@@ -538,22 +560,32 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- ip6h = ipv6_hdr(skb);
-
- skb_push(skb, hdr_len);
if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
goto out;
nfrags = err;
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ ah = (struct ip_auth_hdr *)skb->data;
+ ip6h = ipv6_hdr(skb);
+
+ skb_push(skb, hdr_len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -568,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -581,8 +619,6 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
- err = NET_XMIT_DROP;
goto out_free;
}
@@ -592,9 +628,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, hdr_len);
- skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + hdr_len);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -hdr_len);
+
err = nexthdr;
out_free:
@@ -603,26 +643,29 @@ out:
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG)
- return;
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
-
- NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
- ntohl(ah->spi), &iph->daddr);
+ return 0;
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
@@ -661,9 +704,9 @@ static int ah6_init_state(struct xfrm_state *x)
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
- x->aalg->alg_name, crypto_ahash_digestsize(ahash),
- aalg_desc->uinfo.auth.icv_fullbits/8);
+ pr_info("AH: %s digestsize %u != %hu\n",
+ x->aalg->alg_name, crypto_ahash_digestsize(ahash),
+ aalg_desc->uinfo.auth.icv_fullbits/8);
goto error;
}
@@ -707,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
@@ -720,21 +768,22 @@ static const struct xfrm_type ah6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
{
if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
- printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
+ pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
- printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
+ pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
}
@@ -744,11 +793,11 @@ static int __init ah6_init(void)
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
- printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
+ pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
- printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
+ pr_info("%s: can't remove xfrm type\n", __func__);
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0e5e943446f..21018324468 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,17 +44,17 @@
#include <net/checksum.h>
-static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
/* Big ac list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
/*
* socket join an anycast group
*/
-int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net_device *dev = NULL;
@@ -64,7 +64,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
int ishost = !net->ipv6.devconf_all->forwarding;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
@@ -75,7 +75,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
if (pac == NULL)
return -ENOMEM;
pac->acl_next = NULL;
- ipv6_addr_copy(&pac->acl_addr, addr);
+ pac->acl_addr = *addr;
rcu_read_lock();
if (ifindex == 0) {
@@ -83,8 +83,8 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
- dev = rt->rt6i_dev;
- dst_release(&rt->dst);
+ dev = rt->dst.dev;
+ ip6_rt_put(rt);
} else if (ishost) {
err = -EADDRNOTAVAIL;
goto error;
@@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
err = ipv6_dev_ac_inc(dev, addr);
if (!err) {
- write_lock_bh(&ipv6_sk_ac_lock);
+ spin_lock_bh(&ipv6_sk_ac_lock);
pac->acl_next = np->ipv6_ac_list;
np->ipv6_ac_list = pac;
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
pac = NULL;
}
@@ -145,14 +145,14 @@ error:
/*
* socket leave an anycast group
*/
-int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net_device *dev;
struct ipv6_ac_socklist *pac, *prev_pac;
struct net *net = sock_net(sk);
- write_lock_bh(&ipv6_sk_ac_lock);
+ spin_lock_bh(&ipv6_sk_ac_lock);
prev_pac = NULL;
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
prev_pac = pac;
}
if (!pac) {
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
return -ENOENT;
}
if (prev_pac)
@@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
else
np->ipv6_ac_list = pac->acl_next;
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
@@ -189,10 +189,13 @@ void ipv6_sock_ac_close(struct sock *sk)
struct net *net = sock_net(sk);
int prev_index;
- write_lock_bh(&ipv6_sk_ac_lock);
+ if (!np->ipv6_ac_list)
+ return;
+
+ spin_lock_bh(&ipv6_sk_ac_lock);
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
- write_unlock_bh(&ipv6_sk_ac_lock);
+ spin_unlock_bh(&ipv6_sk_ac_lock);
prev_index = 0;
rcu_read_lock();
@@ -211,35 +214,6 @@ void ipv6_sock_ac_close(struct sock *sk)
rcu_read_unlock();
}
-#if 0
-/* The function is not used, which is funny. Apparently, author
- * supposed to use it to filter out datagrams inside udp/raw but forgot.
- *
- * It is OK, anycasts are not special comparing to delivery to unicasts.
- */
-
-int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
-{
- struct ipv6_ac_socklist *pac;
- struct ipv6_pinfo *np = inet6_sk(sk);
- int found;
-
- found = 0;
- read_lock(&ipv6_sk_ac_lock);
- for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
- if (ifindex && pac->acl_ifindex != ifindex)
- continue;
- found = ipv6_addr_equal(&pac->acl_addr, addr);
- if (found)
- break;
- }
- read_unlock(&ipv6_sk_ac_lock);
-
- return found;
-}
-
-#endif
-
static void aca_put(struct ifacaddr6 *ac)
{
if (atomic_dec_and_test(&ac->aca_refcnt)) {
@@ -252,7 +226,7 @@ static void aca_put(struct ifacaddr6 *ac)
/*
* device anycast group inc (add if not found)
*/
-int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
+int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca;
struct inet6_dev *idev;
@@ -289,14 +263,14 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
goto out;
}
- rt = addrconf_dst_alloc(idev, addr, 1);
+ rt = addrconf_dst_alloc(idev, addr, true);
if (IS_ERR(rt)) {
kfree(aca);
err = PTR_ERR(rt);
goto out;
}
- ipv6_addr_copy(&aca->aca_addr, addr);
+ aca->aca_addr = *addr;
aca->aca_idev = idev;
aca->aca_rt = rt;
aca->aca_users = 1;
@@ -324,7 +298,7 @@ out:
/*
* device anycast group decrement
*/
-int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
+int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca, *prev_aca;
@@ -358,7 +332,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
}
/* called with rcu_read_lock() */
-static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -371,7 +345,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
* check if the interface has this anycast address
* called with rcu_read_lock()
*/
-static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
+static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
{
struct inet6_dev *idev;
struct ifacaddr6 *aca;
@@ -385,16 +359,16 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
read_unlock_bh(&idev->lock);
return aca != NULL;
}
- return 0;
+ return false;
}
/*
* check if given interface (or any, if dev==0) has this anycast address
*/
-int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
- struct in6_addr *addr)
+bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
+ const struct in6_addr *addr)
{
- int found = 0;
+ bool found = false;
rcu_read_lock();
if (dev)
@@ -402,13 +376,24 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
else
for_each_netdev_rcu(net, dev)
if (ipv6_chk_acast_dev(dev, addr)) {
- found = 1;
+ found = true;
break;
}
rcu_read_unlock();
return found;
}
+/* check if this anycast address is link-local on given interface or
+ * is global
+ */
+bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
+ const struct in6_addr *addr)
+{
+ return ipv6_chk_acast_addr(net,
+ (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
+ dev : NULL),
+ addr);
+}
#ifdef CONFIG_PROC_FS
struct ac6_iter_state {
@@ -535,7 +520,7 @@ static const struct file_operations ac6_seq_fops = {
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
+ if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
return -ENOMEM;
return 0;
@@ -543,7 +528,7 @@ int __net_init ac6_proc_init(struct net *net)
void ac6_proc_exit(struct net *net)
{
- proc_net_remove(net, "anycast6");
+ remove_proc_entry("anycast6", net->proc_net);
}
#endif
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 320bdb877ee..c3bf2d2e519 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -22,6 +22,7 @@
#include <linux/ipv6.h>
#include <linux/route.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
@@ -29,10 +30,16 @@
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/tcp_states.h>
+#include <net/dsfield.h>
#include <linux/errqueue.h>
#include <asm/uaccess.h>
+static bool ipv6_mapped_addr_any(const struct in6_addr *a)
+{
+ return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
+}
+
int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
@@ -40,7 +47,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *daddr, *final_p, final;
struct dst_entry *dst;
- struct flowi fl;
+ struct flowi6 fl6;
struct ip6_flowlabel *flowlabel = NULL;
struct ipv6_txoptions *opt;
int addr_type;
@@ -59,14 +66,13 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- memset(&fl, 0, sizeof(fl));
+ memset(&fl6, 0, sizeof(fl6));
if (np->sndflow) {
- fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
}
}
@@ -93,21 +99,23 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sin.sin_port = usin->sin6_port;
err = ip4_datagram_connect(sk,
- (struct sockaddr*) &sin,
+ (struct sockaddr *) &sin,
sizeof(sin));
ipv4_connected:
if (err)
goto out;
- ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+ ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);
- if (ipv6_addr_any(&np->saddr))
+ if (ipv6_addr_any(&np->saddr) ||
+ ipv6_mapped_addr_any(&np->saddr))
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- if (ipv6_addr_any(&np->rcv_saddr)) {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+ ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &np->rcv_saddr);
+ &sk->sk_v6_rcv_saddr);
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
@@ -115,7 +123,7 @@ ipv4_connected:
goto out;
}
- if (addr_type&IPV6_ADDR_LINKLOCAL) {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
usin->sin6_scope_id) {
if (sk->sk_bound_dev_if &&
@@ -136,8 +144,8 @@ ipv4_connected:
}
}
- ipv6_addr_copy(&np->daddr, daddr);
- np->flow_label = fl.fl6_flowlabel;
+ sk->sk_v6_daddr = *daddr;
+ np->flow_label = fl6.flowlabel;
inet->inet_dport = usin->sin6_port;
@@ -146,53 +154,46 @@ ipv4_connected:
* destination cache for it.
*/
- fl.proto = sk->sk_protocol;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet->inet_dport;
- fl.fl_ip_sport = inet->inet_sport;
+ fl6.flowi6_proto = sk->sk_protocol;
+ fl6.daddr = sk->sk_v6_daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet->inet_dport;
+ fl6.fl6_sport = inet->inet_sport;
- if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
- fl.oif = np->mcast_oif;
+ if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
+ fl6.flowi6_oif = np->mcast_oif;
- security_sk_classify_flow(sk, &fl);
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
opt = flowlabel ? flowlabel->opt : np->opt;
- final_p = fl6_update_dst(&fl, opt, &final);
+ final_p = fl6_update_dst(&fl6, opt, &final);
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ err = 0;
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
goto out;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
- if (err < 0) {
- if (err == -EREMOTE)
- err = ip6_dst_blackhole(sk, &dst, &fl);
- if (err < 0)
- goto out;
}
/* source address lookup done in ip6_dst_lookup */
if (ipv6_addr_any(&np->saddr))
- ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+ np->saddr = fl6.saddr;
- if (ipv6_addr_any(&np->rcv_saddr)) {
- ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ sk->sk_v6_rcv_saddr = fl6.saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
- &np->daddr : NULL,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
+ ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
#endif
NULL);
@@ -202,6 +203,17 @@ out:
fl6_sock_release(flowlabel);
return err;
}
+EXPORT_SYMBOL_GPL(ip6_datagram_connect);
+
+int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr);
+ if (sin6->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+ return ip6_datagram_connect(sk, uaddr, addr_len);
+}
+EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only);
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
@@ -238,7 +250,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
kfree_skb(skb);
}
-void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
+void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
@@ -257,7 +269,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
skb_put(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
- ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+ iph->daddr = fl6->daddr;
serr = SKB_EXT_ERR(skb);
serr->ee.ee_errno = err;
@@ -268,7 +280,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
- serr->port = fl->fl_ip_dport;
+ serr->port = fl6->fl6_dport;
__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
skb_reset_transport_header(skb);
@@ -277,7 +289,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
kfree_skb(skb);
}
-void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6hdr *iph;
@@ -294,20 +306,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
skb_put(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
- ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+ iph->daddr = fl6->daddr;
mtu_info = IP6CBMTU(skb);
- if (!mtu_info) {
- kfree_skb(skb);
- return;
- }
mtu_info->ip6m_mtu = mtu;
mtu_info->ip6m_addr.sin6_family = AF_INET6;
mtu_info->ip6m_addr.sin6_port = 0;
mtu_info->ip6m_addr.sin6_flowinfo = 0;
- mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
- ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+ mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
+ mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
skb_reset_transport_header(skb);
@@ -319,12 +327,12 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
/*
* Handle MSG_ERRQUEUE
*/
-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct sk_buff *skb, *skb2;
- struct sockaddr_in6 *sin;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
struct sockaddr_in6 offender;
@@ -350,26 +358,26 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
serr = SKB_EXT_ERR(skb);
- sin = (struct sockaddr_in6 *)msg->msg_name;
if (sin) {
const unsigned char *nh = skb_network_header(skb);
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = serr->port;
- sin->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
- ipv6_addr_copy(&sin->sin6_addr,
- (struct in6_addr *)(nh + serr->addr_offset));
+ const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
+ struct ipv6hdr, daddr);
+ sin->sin6_addr = ip6h->daddr;
if (np->sndflow)
- sin->sin6_flowinfo =
- (*(__be32 *)(nh + serr->addr_offset - 24) &
- IPV6_FLOWINFO_MASK);
- if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin->sin6_scope_id = IP6CB(skb)->iif;
+ sin->sin6_flowinfo = ip6_flowinfo(ip6h);
+ sin->sin6_scope_id =
+ ipv6_iface_scope_id(&sin->sin6_addr,
+ IP6CB(skb)->iif);
} else {
ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
&sin->sin6_addr);
+ sin->sin6_scope_id = 0;
}
+ *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -378,18 +386,22 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
- sin->sin6_scope_id = 0;
+ sin->sin6_port = 0;
+ if (np->rxopt.all)
+ ip6_datagram_recv_common_ctl(sk, msg, skb);
if (skb->protocol == htons(ETH_P_IPV6)) {
- ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
+ sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
- datagram_recv_ctl(sk, msg, skb);
- if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin->sin6_scope_id = IP6CB(skb)->iif;
+ ip6_datagram_recv_specific_ctl(sk, msg, skb);
+ sin->sin6_scope_id =
+ ipv6_iface_scope_id(&sin->sin6_addr,
+ IP6CB(skb)->iif);
} else {
struct inet_sock *inet = inet_sk(sk);
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin->sin6_addr);
+ sin->sin6_scope_id = 0;
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
}
@@ -418,16 +430,18 @@ out_free_skb:
out:
return err;
}
+EXPORT_SYMBOL_GPL(ipv6_recv_error);
/*
* Handle IPV6_RECVPATHMTU
*/
-int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
+ int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
- struct sockaddr_in6 *sin;
struct ip6_mtuinfo mtu_info;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
int err;
int copied;
@@ -449,13 +463,13 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
- sin = (struct sockaddr_in6 *)msg->msg_name;
if (sin) {
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = 0;
sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
- ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+ sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
+ *addr_len = sizeof(*sin);
}
put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
@@ -469,19 +483,34 @@ out:
}
-int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_skb_parm *opt = IP6CB(skb);
- unsigned char *nh = skb_network_header(skb);
+ bool is_ipv6 = skb->protocol == htons(ETH_P_IPV6);
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+ if (is_ipv6) {
+ src_info.ipi6_ifindex = IP6CB(skb)->iif;
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
+ } else {
+ src_info.ipi6_ifindex =
+ PKTINFO_SKB_CB(skb)->ipi_ifindex;
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
+ &src_info.ipi6_addr);
+ }
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
+}
+
+void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ unsigned char *nh = skb_network_header(skb);
if (np->rxopt.bits.rxhlim) {
int hlim = ipv6_hdr(skb)->hop_limit;
@@ -489,13 +518,14 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
}
if (np->rxopt.bits.rxtclass) {
- int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
+ int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
- if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
- __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
- put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+ if (np->rxopt.bits.rxflow) {
+ __be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh);
+ if (flowinfo)
+ put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
}
/* HbH is allowed only once */
@@ -519,10 +549,10 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
while (off <= opt->lastopt) {
- unsigned len;
+ unsigned int len;
u8 *ptr = nh + off;
- switch(nexthdr) {
+ switch (nexthdr) {
case IPPROTO_DSTOPTS:
nexthdr = ptr[0];
len = (ptr[1] + 1) << 3;
@@ -554,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
@@ -579,7 +609,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
}
if (np->rxopt.bits.rxorigdstaddr) {
struct sockaddr_in6 sin6;
- u16 *ports = (u16 *) skb_transport_header(skb);
+ __be16 *ports = (__be16 *) skb_transport_header(skb);
if (skb_transport_offset(skb) + 4 <= skb->len) {
/* All current transport protocols have the port numbers in the
@@ -588,21 +618,30 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
*/
sin6.sin6_family = AF_INET6;
- ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
+ sin6.sin6_addr = ipv6_hdr(skb)->daddr;
sin6.sin6_port = ports[1];
sin6.sin6_flowinfo = 0;
- sin6.sin6_scope_id = 0;
+ sin6.sin6_scope_id =
+ ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
+ opt->iif);
put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
}
}
- return 0;
}
-int datagram_send_ctl(struct net *net,
- struct msghdr *msg, struct flowi *fl,
- struct ipv6_txoptions *opt,
- int *hlimit, int *tclass, int *dontfrag)
+void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ ip6_datagram_recv_common_ctl(sk, msg, skb);
+ ip6_datagram_recv_specific_ctl(sk, msg, skb);
+}
+EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
+
+int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
+ struct msghdr *msg, struct flowi6 *fl6,
+ struct ipv6_txoptions *opt,
+ int *hlimit, int *tclass, int *dontfrag)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
@@ -636,16 +675,17 @@ int datagram_send_ctl(struct net *net,
src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
if (src_info->ipi6_ifindex) {
- if (fl->oif && src_info->ipi6_ifindex != fl->oif)
+ if (fl6->flowi6_oif &&
+ src_info->ipi6_ifindex != fl6->flowi6_oif)
return -EINVAL;
- fl->oif = src_info->ipi6_ifindex;
+ fl6->flowi6_oif = src_info->ipi6_ifindex;
}
addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
rcu_read_lock();
- if (fl->oif) {
- dev = dev_get_by_index_rcu(net, fl->oif);
+ if (fl6->flowi6_oif) {
+ dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
if (!dev) {
rcu_read_unlock();
return -ENODEV;
@@ -657,11 +697,14 @@ int datagram_send_ctl(struct net *net,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
- if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0))
+ if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
+ !ipv6_chk_addr(net, &src_info->ipi6_addr,
+ strict ? dev : NULL, 0) &&
+ !ipv6_chk_acast_addr_src(net, dev,
+ &src_info->ipi6_addr))
err = -EINVAL;
else
- ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
+ fl6->saddr = src_info->ipi6_addr;
}
rcu_read_unlock();
@@ -678,13 +721,13 @@ int datagram_send_ctl(struct net *net,
goto exit_f;
}
- if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
- if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+ if (fl6->flowlabel&IPV6_FLOWINFO_MASK) {
+ if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
err = -EINVAL;
goto exit_f;
}
}
- fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
+ fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
break;
case IPV6_2292HOPOPTS:
@@ -700,7 +743,7 @@ int datagram_send_ctl(struct net *net,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -720,7 +763,7 @@ int datagram_send_ctl(struct net *net,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -745,7 +788,7 @@ int datagram_send_ctl(struct net *net,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -768,7 +811,7 @@ int datagram_send_ctl(struct net *net,
rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
switch (rthdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (rthdr->hdrlen != 2 ||
rthdr->segments_left != 1) {
@@ -829,9 +872,8 @@ int datagram_send_ctl(struct net *net,
int tc;
err = -EINVAL;
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
goto exit_f;
- }
tc = *(int *)CMSG_DATA(cmsg);
if (tc < -1 || tc > 0xff)
@@ -848,9 +890,8 @@ int datagram_send_ctl(struct net *net,
int df;
err = -EINVAL;
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
goto exit_f;
- }
df = *(int *)CMSG_DATA(cmsg);
if (df < 0 || df > 1)
@@ -872,3 +913,30 @@ int datagram_send_ctl(struct net *net,
exit_f:
return err;
}
+EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
+
+void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
+ __u16 srcp, __u16 destp, int bucket)
+{
+ const struct in6_addr *dest, *src;
+
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
+ seq_printf(seq,
+ "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
+ bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->sk_state,
+ sk_wmem_alloc_get(sp),
+ sk_rmem_alloc_get(sp),
+ 0, 0L, 0,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ 0,
+ sock_i_ino(sp),
+ atomic_read(&sp->sk_refcnt), sp,
+ atomic_read(&sp->sk_drops));
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ee9b93bdd6a..d15da137714 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors
*
@@ -24,6 +23,8 @@
* This file is derived from net/ipv4/esp.c
*/
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <crypto/aead.h>
#include <crypto/authenc.h>
#include <linux/err.h>
@@ -37,6 +38,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -49,19 +51,25 @@ struct esp_skb_cb {
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
+
/*
* Allocate an AEAD request structure with extra space for SG and IV.
*
- * For alignment considerations the IV is placed at the front, followed
- * by the request and finally the SG list.
+ * For alignment considerations the upper 32 bits of the sequence number are
+ * placed at the front, if present. Followed by the IV, the request and finally
+ * the SG list.
*
* TODO: Use spare space in skb for this where possible.
*/
-static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
{
unsigned int len;
- len = crypto_aead_ivsize(aead);
+ len = seqihlen;
+
+ len += crypto_aead_ivsize(aead);
+
if (len) {
len += crypto_aead_alignmask(aead) &
~(crypto_tfm_ctx_alignment() - 1);
@@ -76,10 +84,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
return kmalloc(len, GFP_ATOMIC);
}
-static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp)
+static inline __be32 *esp_tmp_seqhi(void *tmp)
+{
+ return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+}
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
{
return crypto_aead_ivsize(aead) ?
- PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp;
+ PTR_ALIGN((u8 *)tmp + seqhilen,
+ crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
}
static inline struct aead_givcrypt_request *esp_tmp_givreq(
@@ -140,47 +154,73 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
int blksize;
int clen;
int alen;
+ int plen;
+ int tfclen;
int nfrags;
+ int assoclen;
+ int sglists;
+ int seqhilen;
u8 *iv;
u8 *tail;
- struct esp_data *esp = x->data;
+ __be32 *seqhi;
/* skb is pure payload to encrypt */
- err = -ENOMEM;
-
- /* Round to block size */
- clen = skb->len;
-
- aead = esp->aead;
+ aead = x->data;
alen = crypto_aead_authsize(aead);
+ tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ tfclen = padto - skb->len;
+ }
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(clen + 2, blksize);
- if (esp->padlen)
- clen = ALIGN(clen, esp->padlen);
+ clen = ALIGN(skb->len + 2 + tfclen, blksize);
+ plen = clen - skb->len - tfclen;
- if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
+ err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
+ if (err < 0)
goto error;
nfrags = err;
- tmp = esp_alloc_tmp(aead, nfrags + 1);
- if (!tmp)
+ assoclen = sizeof(*esph);
+ sglists = 1;
+ seqhilen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists += 2;
+ seqhilen += sizeof(__be32);
+ assoclen += seqhilen;
+ }
+
+ tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ if (!tmp) {
+ err = -ENOMEM;
goto error;
+ }
- iv = esp_tmp_iv(aead, tmp);
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_givreq(aead, iv);
asg = esp_givreq_sg(aead, req);
- sg = asg + 1;
+ sg = asg + sglists;
/* Fill padding... */
tail = skb_tail_pointer(trailer);
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
do {
int i;
- for (i=0; i<clen-skb->len - 2; i++)
+ for (i = 0; i < plen - 2; i++)
tail[i] = i + 1;
} while (0);
- tail[clen-skb->len - 2] = (clen - skb->len) - 2;
- tail[clen - skb->len - 1] = *skb_mac_header(skb);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = *skb_mac_header(skb);
pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
@@ -188,19 +228,27 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
*skb_mac_header(skb) = IPPROTO_ESP;
esph->spi = x->id.spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
clen + alen);
- sg_init_one(asg, esph, sizeof(*esph));
+
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ sg_init_table(asg, 3);
+ sg_set_buf(asg, &esph->spi, sizeof(__be32));
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(asg + 1, seqhi, seqhilen);
+ sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
+ } else
+ sg_init_one(asg, esph, sizeof(*esph));
aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
- aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
+ aead_givcrypt_set_assoc(req, asg, assoclen);
aead_givcrypt_set_giv(req, esph->enc_data,
- XFRM_SKB_CB(skb)->seq.output);
+ XFRM_SKB_CB(skb)->seq.output.low);
ESP_SKB_CB(skb)->tmp = tmp;
err = crypto_aead_givencrypt(req);
@@ -219,8 +267,7 @@ error:
static int esp_input_done2(struct sk_buff *skb, int err)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int elen = skb->len - hlen;
@@ -248,7 +295,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
pskb_trim(skb, skb->len - alen - padlen - 2);
__skb_pull(skb, hlen);
- skb_set_transport_header(skb, -hdr_len);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -hdr_len);
err = nexthdr[1];
@@ -270,14 +320,17 @@ static void esp_input_done(struct crypto_async_request *base, int err)
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
int nfrags;
+ int assoclen;
+ int sglists;
+ int seqhilen;
int ret = 0;
void *tmp;
+ __be32 *seqhi;
u8 *iv;
struct scatterlist *sg;
struct scatterlist *asg;
@@ -298,15 +351,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
}
ret = -ENOMEM;
- tmp = esp_alloc_tmp(aead, nfrags + 1);
+
+ assoclen = sizeof(*esph);
+ sglists = 1;
+ seqhilen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists += 2;
+ seqhilen += sizeof(__be32);
+ assoclen += seqhilen;
+ }
+
+ tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
if (!tmp)
goto out;
ESP_SKB_CB(skb)->tmp = tmp;
- iv = esp_tmp_iv(aead, tmp);
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
asg = esp_req_sg(aead, req);
- sg = asg + 1;
+ sg = asg + sglists;
skb->ip_summed = CHECKSUM_NONE;
@@ -317,11 +382,19 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
- sg_init_one(asg, esph, sizeof(*esph));
+
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ sg_init_table(asg, 3);
+ sg_set_buf(asg, &esph->spi, sizeof(__be32));
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(asg + 1, seqhi, seqhilen);
+ sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
+ } else
+ sg_init_one(asg, esph, sizeof(*esph));
aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen, iv);
- aead_request_set_assoc(req, asg, sizeof(*esph));
+ aead_request_set_assoc(req, asg, assoclen);
ret = crypto_aead_decrypt(req);
if (ret == -EINPROGRESS)
@@ -335,58 +408,57 @@ out:
static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
- struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
- u32 align = max_t(u32, blksize, esp->padlen);
- u32 rem;
-
- mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
- rem = mtu & (align - 1);
- mtu &= ~(align - 1);
-
- if (x->props.mode != XFRM_MODE_TUNNEL) {
- u32 padsize = ((blksize - 1) & 7) + 1;
- mtu -= blksize - padsize;
- mtu += min_t(u32, blksize - padsize, rem);
- }
+ struct crypto_aead *aead = x->data;
+ u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ unsigned int net_adj;
+
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ net_adj = sizeof(struct ipv6hdr);
+ else
+ net_adj = 0;
- return mtu - 2;
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
- struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG)
- return;
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
- printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
- ntohl(esph->spi), &iph->daddr);
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
+ struct crypto_aead *aead = x->data;
- if (!esp)
+ if (!aead)
return;
- crypto_free_aead(esp->aead);
- kfree(esp);
+ crypto_free_aead(aead);
}
static int esp_init_aead(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
int err;
@@ -395,7 +467,7 @@ static int esp_init_aead(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
err = crypto_aead_setkey(aead, x->aead->alg_key,
(x->aead->alg_key_len + 7) / 8);
@@ -412,7 +484,6 @@ error:
static int esp_init_authenc(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
struct rtattr *rta;
@@ -427,17 +498,27 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
err = -ENAMETOOLONG;
- if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)",
- x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
- goto error;
+
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
+ "authencesn(%s,%s)",
+ x->aalg ? x->aalg->alg_name : "digest_null",
+ x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+ } else {
+ if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
+ "authenc(%s,%s)",
+ x->aalg ? x->aalg->alg_name : "digest_null",
+ x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+ }
aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
(x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
@@ -492,7 +573,6 @@ error:
static int esp6_init_state(struct xfrm_state *x)
{
- struct esp_data *esp;
struct crypto_aead *aead;
u32 align;
int err;
@@ -500,11 +580,7 @@ static int esp6_init_state(struct xfrm_state *x)
if (x->encap)
return -EINVAL;
- esp = kzalloc(sizeof(*esp), GFP_KERNEL);
- if (esp == NULL)
- return -ENOMEM;
-
- x->data = esp;
+ x->data = NULL;
if (x->aead)
err = esp_init_aead(x);
@@ -514,9 +590,7 @@ static int esp6_init_state(struct xfrm_state *x)
if (err)
goto error;
- aead = esp->aead;
-
- esp->padlen = 0;
+ aead = x->data;
x->props.header_len = sizeof(struct ip_esp_hdr) +
crypto_aead_ivsize(aead);
@@ -536,14 +610,17 @@ static int esp6_init_state(struct xfrm_state *x)
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
- if (esp->padlen)
- align = max_t(u32, align, esp->padlen);
- x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
+ x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
error:
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
@@ -558,20 +635,21 @@ static const struct xfrm_type esp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
{
if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
- printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
+ pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
- printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
+ pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
}
@@ -581,10 +659,10 @@ static int __init esp6_init(void)
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
- printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
+ pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
- printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
+ pr_info("%s: can't remove xfrm type\n", __func__);
}
module_init(esp6_init);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 262f105d23b..8d67900aa00 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -30,6 +30,7 @@
#include <linux/in6.h>
#include <linux/icmpv6.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <net/dst.h>
#include <net/sock.h>
@@ -42,67 +43,23 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/xfrm.h>
#endif
#include <asm/uaccess.h>
-int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
-{
- const unsigned char *nh = skb_network_header(skb);
- int packet_len = skb->tail - skb->network_header;
- struct ipv6_opt_hdr *hdr;
- int len;
-
- if (offset + 2 > packet_len)
- goto bad;
- hdr = (struct ipv6_opt_hdr *)(nh + offset);
- len = ((hdr->hdrlen + 1) << 3);
-
- if (offset + len > packet_len)
- goto bad;
-
- offset += 2;
- len -= 2;
-
- while (len > 0) {
- int opttype = nh[offset];
- int optlen;
-
- if (opttype == type)
- return offset;
-
- switch (opttype) {
- case IPV6_TLV_PAD0:
- optlen = 1;
- break;
- default:
- optlen = nh[offset + 1] + 2;
- if (optlen > len)
- goto bad;
- break;
- }
- offset += optlen;
- len -= optlen;
- }
- /* not_found */
- bad:
- return -1;
-}
-EXPORT_SYMBOL_GPL(ipv6_find_tlv);
-
/*
* Parsing tlv encoded headers.
*
- * Parsing function "func" returns 1, if parsing succeed
- * and 0, if it failed.
+ * Parsing function "func" returns true, if parsing succeed
+ * and false, if it failed.
* It MUST NOT touch skb->h.
*/
struct tlvtype_proc {
int type;
- int (*func)(struct sk_buff *skb, int offset);
+ bool (*func)(struct sk_buff *skb, int offset);
};
/*********************
@@ -111,11 +68,11 @@ struct tlvtype_proc {
/* An unknown option is detected, decide what to do */
-static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
{
switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
- return 1;
+ return true;
case 1: /* drop packet */
break;
@@ -128,21 +85,22 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
break;
case 2: /* send ICMP PARM PROB regardless and drop packet */
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
- return 0;
+ return false;
}
kfree_skb(skb);
- return 0;
+ return false;
}
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
{
- struct tlvtype_proc *curr;
+ const struct tlvtype_proc *curr;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
int len = (skb_transport_header(skb)[1] + 1) << 3;
+ int padlen = 0;
if (skb_transport_offset(skb) + len > skb_headlen(skb))
goto bad;
@@ -152,13 +110,33 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
while (len > 0) {
int optlen = nh[off + 1] + 2;
+ int i;
switch (nh[off]) {
- case IPV6_TLV_PAD0:
+ case IPV6_TLV_PAD1:
optlen = 1;
+ padlen++;
+ if (padlen > 7)
+ goto bad;
break;
case IPV6_TLV_PADN:
+ /* RFC 2460 states that the purpose of PadN is
+ * to align the containing header to multiples
+ * of 8. 7 is therefore the highest valid value.
+ * See also RFC 4942, Section 2.1.9.5.
+ */
+ padlen += optlen;
+ if (padlen > 7)
+ goto bad;
+ /* RFC 4942 recommends receiving hosts to
+ * actively check PadN payload to contain
+ * only zeroes.
+ */
+ for (i = 2; i < optlen; i++) {
+ if (nh[off + i] != 0)
+ goto bad;
+ }
break;
default: /* Other TLV code so scan list */
@@ -169,33 +147,35 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
/* type specific length/alignment
checks will be performed in the
func(). */
- if (curr->func(skb, off) == 0)
- return 0;
+ if (curr->func(skb, off) == false)
+ return false;
break;
}
}
if (curr->type < 0) {
if (ip6_tlvopt_unknown(skb, off) == 0)
- return 0;
+ return false;
}
+ padlen = 0;
break;
}
off += optlen;
len -= optlen;
}
+
if (len == 0)
- return 1;
+ return true;
bad:
kfree_skb(skb);
- return 0;
+ return false;
}
/*****************************
Destination options header.
*****************************/
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
@@ -242,23 +222,23 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
- ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
- ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
- ipv6_addr_copy(&hao->addr, &tmp_addr);
+ tmp_addr = ipv6h->saddr;
+ ipv6h->saddr = hao->addr;
+ hao->addr = tmp_addr;
if (skb->tstamp.tv64 == 0)
__net_timestamp(skb);
- return 1;
+ return true;
discard:
kfree_skb(skb);
- return 0;
+ return false;
}
#endif
-static struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+static const struct tlvtype_proc tlvprocdestopt_lst[] = {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
{
.type = IPV6_TLV_HAO,
.func = ipv6_dest_hao,
@@ -270,31 +250,29 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
__u16 dstbuf;
#endif
- struct dst_entry *dst;
+ struct dst_entry *dst = skb_dst(skb);
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
opt->lastopt = opt->dst1 = skb_network_header_len(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
dstbuf = opt->dst1;
#endif
- dst = dst_clone(skb_dst(skb));
if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
- dst_release(dst);
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
opt->nhoff = dstbuf;
#else
opt->nhoff = opt->dst1;
@@ -304,7 +282,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
- dst_release(dst);
return -1;
}
@@ -351,7 +328,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
looped_back:
if (hdr->segments_left == 0) {
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
/* Silently discard type 2 header unless it was
* processed by own
@@ -377,7 +354,7 @@ looped_back:
}
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (accept_source_route < 0)
goto unknown_rh;
@@ -434,7 +411,7 @@ looped_back:
addr += i - 1;
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&ipv6_hdr(skb)->saddr,
@@ -463,9 +440,9 @@ looped_back:
return -1;
}
- ipv6_addr_copy(&daddr, addr);
- ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
- ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
+ daddr = *addr;
+ *addr = ipv6_hdr(skb)->daddr;
+ ipv6_hdr(skb)->daddr = daddr;
skb_dst_drop(skb);
ip6_route_input(skb);
@@ -501,12 +478,12 @@ unknown_rh:
static const struct inet6_protocol rthdr_protocol = {
.handler = ipv6_rthdr_rcv,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+ .flags = INET6_PROTO_NOPOLICY,
};
static const struct inet6_protocol destopt_protocol = {
.handler = ipv6_destopt_rcv,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+ .flags = INET6_PROTO_NOPOLICY,
};
static const struct inet6_protocol nodata_protocol = {
@@ -532,10 +509,10 @@ int __init ipv6_exthdrs_init(void)
out:
return ret;
-out_rthdr:
- inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
out_destopt:
inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+out_rthdr:
+ inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
goto out;
};
@@ -565,23 +542,24 @@ static inline struct net *ipv6_skb_net(struct sk_buff *skb)
/* Router Alert as of RFC 2711 */
-static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
if (nh[optoff + 1] == 2) {
- IP6CB(skb)->ra = optoff;
- return 1;
+ IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+ memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
+ return true;
}
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
nh[optoff + 1]);
kfree_skb(skb);
- return 0;
+ return false;
}
/* Jumbo payload */
-static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
struct net *net = ipv6_skb_net(skb);
@@ -600,13 +578,13 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
- return 0;
+ return false;
}
if (ipv6_hdr(skb)->payload_len) {
IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
- return 0;
+ return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
@@ -618,14 +596,14 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
goto drop;
- return 1;
+ return true;
drop:
kfree_skb(skb);
- return 0;
+ return false;
}
-static struct tlvtype_proc tlvprochopopt_lst[] = {
+static const struct tlvtype_proc tlvprochopopt_lst[] = {
{
.type = IPV6_TLV_ROUTERALERT,
.func = ipv6_hop_ra,
@@ -692,7 +670,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
memcpy(phdr->addr, ihdr->addr + 1,
(hops - 1) * sizeof(struct in6_addr));
- ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+ phdr->addr[hops - 1] = **addr_p;
*addr_p = ihdr->addr;
phdr->rt_hdr.nexthdr = *proto;
@@ -724,7 +702,6 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
if (opt->hopopt)
ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
}
-
EXPORT_SYMBOL(ipv6_push_nfrag_opts);
void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
@@ -740,20 +717,19 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
if (opt2) {
- long dif = (char*)opt2 - (char*)opt;
+ long dif = (char *)opt2 - (char *)opt;
memcpy(opt2, opt, opt->tot_len);
if (opt2->hopopt)
- *((char**)&opt2->hopopt) += dif;
+ *((char **)&opt2->hopopt) += dif;
if (opt2->dst0opt)
- *((char**)&opt2->dst0opt) += dif;
+ *((char **)&opt2->dst0opt) += dif;
if (opt2->dst1opt)
- *((char**)&opt2->dst1opt) += dif;
+ *((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
- *((char**)&opt2->srcrt) += dif;
+ *((char **)&opt2->srcrt) += dif;
}
return opt2;
}
-
EXPORT_SYMBOL_GPL(ipv6_dup_options);
static int ipv6_renew_option(void *ohdr,
@@ -766,14 +742,14 @@ static int ipv6_renew_option(void *ohdr,
if (ohdr) {
memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
*hdr = (struct ipv6_opt_hdr *)*p;
- *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr));
+ *p += CMSG_ALIGN(ipv6_optlen(*hdr));
}
} else {
if (newopt) {
if (copy_from_user(*p, newopt, newoptlen))
return -EFAULT;
*hdr = (struct ipv6_opt_hdr *)*p;
- if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen)
+ if (ipv6_optlen(*hdr) > newoptlen)
return -EINVAL;
*p += CMSG_ALIGN(newoptlen);
}
@@ -871,28 +847,28 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
return opt;
}
+EXPORT_SYMBOL_GPL(ipv6_fixup_options);
/**
* fl6_update_dst - update flowi destination address with info given
* by srcrt option, if any.
*
- * @fl: flowi for which fl6_dst is to be updated
+ * @fl6: flowi6 for which daddr is to be updated
* @opt: struct ipv6_txoptions in which to look for srcrt opt
- * @orig: copy of original fl6_dst address if modified
+ * @orig: copy of original daddr address if modified
*
* Returns NULL if no txoptions or no srcrt, otherwise returns orig
- * and initial value of fl->fl6_dst set in orig
+ * and initial value of fl6->daddr set in orig
*/
-struct in6_addr *fl6_update_dst(struct flowi *fl,
+struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
const struct ipv6_txoptions *opt,
struct in6_addr *orig)
{
if (!opt || !opt->srcrt)
return NULL;
- ipv6_addr_copy(orig, &fl->fl6_dst);
- ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
+ *orig = fl6->daddr;
+ fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
return orig;
}
-
EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 14ed0a955b5..8af3eb57f43 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -2,13 +2,14 @@
* IPv6 library code, needed by static components when full IPv6 support is
* not configured or static.
*/
+#include <linux/export.h>
#include <net/ipv6.h>
/*
* find out if nexthdr is a well-known extension header or a protocol
*/
-int ipv6_ext_hdr(u8 nexthdr)
+bool ipv6_ext_hdr(u8 nexthdr)
{
/*
* find out if nexthdr is an extension header or a protocol
@@ -20,6 +21,7 @@ int ipv6_ext_hdr(u8 nexthdr)
(nexthdr == NEXTHDR_NONE) ||
(nexthdr == NEXTHDR_DEST);
}
+EXPORT_SYMBOL(ipv6_ext_hdr);
/*
* Skip any extension headers. This is used by the ICMP module.
@@ -56,6 +58,9 @@ int ipv6_ext_hdr(u8 nexthdr)
* it returns NULL.
* - First fragment header is skipped, not-first ones
* are considered as unparsable.
+ * - Reports the offset field of the final fragment header so it is
+ * possible to tell whether this is a first fragment, later fragment,
+ * or not fragmented.
* - ESP is unparsable for now and considered like
* normal payload protocol.
* - Note also special handling of AUTH header. Thanks to IPsec wizards.
@@ -63,10 +68,13 @@ int ipv6_ext_hdr(u8 nexthdr)
* --ANK (980726)
*/
-int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
+int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
+ __be16 *frag_offp)
{
u8 nexthdr = *nexthdrp;
+ *frag_offp = 0;
+
while (ipv6_ext_hdr(nexthdr)) {
struct ipv6_opt_hdr _hdr, *hp;
int hdrlen;
@@ -86,7 +94,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
if (fp == NULL)
return -1;
- if (ntohs(*fp) & ~0x7)
+ *frag_offp = *fp;
+ if (ntohs(*frag_offp) & ~0x7)
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
@@ -101,6 +110,172 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
*nexthdrp = nexthdr;
return start;
}
-
-EXPORT_SYMBOL(ipv6_ext_hdr);
EXPORT_SYMBOL(ipv6_skip_exthdr);
+
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ int packet_len = skb_tail_pointer(skb) - skb_network_header(skb);
+ struct ipv6_opt_hdr *hdr;
+ int len;
+
+ if (offset + 2 > packet_len)
+ goto bad;
+ hdr = (struct ipv6_opt_hdr *)(nh + offset);
+ len = ((hdr->hdrlen + 1) << 3);
+
+ if (offset + len > packet_len)
+ goto bad;
+
+ offset += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int opttype = nh[offset];
+ int optlen;
+
+ if (opttype == type)
+ return offset;
+
+ switch (opttype) {
+ case IPV6_TLV_PAD1:
+ optlen = 1;
+ break;
+ default:
+ optlen = nh[offset + 1] + 2;
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ offset += optlen;
+ len -= optlen;
+ }
+ /* not_found */
+ bad:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+
+/*
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * Note that *offset is used as input/output parameter. an if it is not zero,
+ * then it must be a valid offset to an inner IPv6 header. This can be used
+ * to explore inner IPv6 header, eg. ICMPv6 error messages.
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
+ *
+ * if flags is not NULL and it's a fragment, then the frag flag
+ * IP6_FH_F_FRAG will be set. If it's an AH header, the
+ * IP6_FH_F_AUTH flag is set and target < 0, then this function will
+ * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
+ * function will skip all those routing headers, where segements_left was 0.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+ int target, unsigned short *fragoff, int *flags)
+{
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ unsigned int len;
+ bool found;
+
+ if (fragoff)
+ *fragoff = 0;
+
+ if (*offset) {
+ struct ipv6hdr _ip6, *ip6;
+
+ ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
+ if (!ip6 || (ip6->version != 6)) {
+ printk(KERN_ERR "IPv6 header not found\n");
+ return -EBADMSG;
+ }
+ start = *offset + sizeof(struct ipv6hdr);
+ nexthdr = ip6->nexthdr;
+ }
+ len = skb->len - start;
+
+ do {
+ struct ipv6_opt_hdr _hdr, *hp;
+ unsigned int hdrlen;
+ found = (nexthdr == target);
+
+ if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+ if (target < 0 || found)
+ break;
+ return -ENOENT;
+ }
+
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return -EBADMSG;
+
+ if (nexthdr == NEXTHDR_ROUTING) {
+ struct ipv6_rt_hdr _rh, *rh;
+
+ rh = skb_header_pointer(skb, start, sizeof(_rh),
+ &_rh);
+ if (rh == NULL)
+ return -EBADMSG;
+
+ if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
+ rh->segments_left == 0)
+ found = false;
+ }
+
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ unsigned short _frag_off;
+ __be16 *fp;
+
+ if (flags) /* Indicate that this is a fragment */
+ *flags |= IP6_FH_F_FRAG;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -EBADMSG;
+
+ _frag_off = ntohs(*fp) & ~0x7;
+ if (_frag_off) {
+ if (target < 0 &&
+ ((!ipv6_ext_hdr(hp->nexthdr)) ||
+ hp->nexthdr == NEXTHDR_NONE)) {
+ if (fragoff)
+ *fragoff = _frag_off;
+ return hp->nexthdr;
+ }
+ return -ENOENT;
+ }
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH) {
+ if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
+ break;
+ hdrlen = (hp->hdrlen + 2) << 2;
+ } else
+ hdrlen = ipv6_optlen(hp);
+
+ if (!found) {
+ nexthdr = hp->nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+ } while (!found);
+
+ *offset = start;
+ return nexthdr;
+}
+EXPORT_SYMBOL(ipv6_find_hdr);
+
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
new file mode 100644
index 00000000000..447a7fbd1bb
--- /dev/null
+++ b/net/ipv6/exthdrs_offload.c
@@ -0,0 +1,41 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * IPV6 Extension Header GSO/GRO support
+ */
+#include <net/protocol.h>
+#include "ip6_offload.h"
+
+static const struct net_offload rthdr_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+static const struct net_offload dstopt_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+int __init ipv6_exthdrs_offload_init(void)
+{
+ int ret;
+
+ ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+ if (ret)
+ goto out;
+
+ ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+ if (ret)
+ goto out_rt;
+
+out:
+ return ret;
+
+out_rt:
+ inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+ goto out;
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d829874d894..b4d5e1d97c1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
*/
#include <linux/netdevice.h>
+#include <linux/export.h>
#include <net/fib_rules.h>
#include <net/ipv6.h>
@@ -21,15 +22,14 @@
#include <net/ip6_route.h>
#include <net/netlink.h>
-struct fib6_rule
-{
+struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
u8 tclass;
};
-struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
struct fib_lookup_arg arg = {
@@ -37,7 +37,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
.flags = FIB_LOOKUP_NOREF,
};
- fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg);
+ fib_rules_lookup(net->ipv6.fib6_rules_ops,
+ flowi6_to_flowi(fl6), flags, &arg);
if (arg.result)
return arg.result;
@@ -49,30 +50,38 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
struct fib6_table *table;
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
+ int err = 0;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
+ err = -ENETUNREACH;
rt = net->ipv6.ip6_null_entry;
goto discard_pkt;
default:
case FR_ACT_BLACKHOLE:
+ err = -EINVAL;
rt = net->ipv6.ip6_blk_hole_entry;
goto discard_pkt;
case FR_ACT_PROHIBIT:
+ err = -EACCES;
rt = net->ipv6.ip6_prohibit_entry;
goto discard_pkt;
}
table = fib6_get_table(net, rule->table);
- if (table)
- rt = lookup(net, table, flp, flags);
+ if (!table) {
+ err = -EAGAIN;
+ goto out;
+ }
+ rt = lookup(net, table, flp6, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -86,19 +95,20 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
if (ipv6_dev_get_saddr(net,
ip6_dst_idev(&rt->dst)->dev,
- &flp->fl6_dst,
+ &flp6->daddr,
rt6_flags2srcprefs(flags),
&saddr))
goto again;
if (!ipv6_prefix_equal(&saddr, &r->src.addr,
r->src.plen))
goto again;
- ipv6_addr_copy(&flp->fl6_src, &saddr);
+ flp6->saddr = saddr;
}
goto out;
}
again:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
+ err = -EAGAIN;
rt = NULL;
goto out;
@@ -106,16 +116,43 @@ discard_pkt:
dst_hold(&rt->dst);
out:
arg->result = rt;
- return rt == NULL ? -EAGAIN : 0;
+ return err;
}
+static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+{
+ struct rt6_info *rt = (struct rt6_info *) arg->result;
+ struct net_device *dev = NULL;
+
+ if (rt->rt6i_idev)
+ dev = rt->rt6i_idev->dev;
+
+ /* do not accept result if the route does
+ * not meet the required prefix length
+ */
+ if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
+ goto suppress_route;
+
+ /* do not accept result if the route uses a device
+ * belonging to a forbidden interface group
+ */
+ if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
+ goto suppress_route;
+
+ return false;
+
+suppress_route:
+ ip6_rt_put(rt);
+ return true;
+}
static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
struct fib6_rule *r = (struct fib6_rule *) rule;
+ struct flowi6 *fl6 = &fl->u.ip6;
if (r->dst.plen &&
- !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
+ !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen))
return 0;
/*
@@ -125,14 +162,14 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
*/
if (r->src.plen) {
if (flags & RT6_LOOKUP_F_HAS_SADDR) {
- if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr,
+ if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr,
r->src.plen))
return 0;
} else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
return 0;
}
- if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
+ if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
return 1;
@@ -211,14 +248,13 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->src_len = rule6->src.plen;
frh->tos = rule6->tclass;
- if (rule6->dst.plen)
- NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
- &rule6->dst.addr);
-
- if (rule6->src.plen)
- NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
- &rule6->src.addr);
-
+ if ((rule6->dst.plen &&
+ nla_put(skb, FRA_DST, sizeof(struct in6_addr),
+ &rule6->dst.addr)) ||
+ (rule6->src.plen &&
+ nla_put(skb, FRA_SRC, sizeof(struct in6_addr),
+ &rule6->src.addr)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -236,12 +272,13 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(16); /* src */
}
-static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
.addr_size = sizeof(struct in6_addr),
.action = fib6_rule_action,
.match = fib6_rule_match,
+ .suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 03e62f94ff8..f6c84a6eb23 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -29,6 +29,8 @@
* Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
*/
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -55,6 +57,7 @@
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
@@ -64,9 +67,9 @@
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/inet_common.h>
+#include <net/dsfield.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
/*
* The ICMP socket(s). This is the most convenient way to flow control
@@ -80,10 +83,28 @@ static inline struct sock *icmpv6_sk(struct net *net)
return net->ipv6.icmp_sk[smp_processor_id()];
}
+static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
+ struct net *net = dev_net(skb->dev);
+
+ if (type == ICMPV6_PKT_TOOBIG)
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ else if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+
+ if (!(type & ICMPV6_INFOMSG_MASK))
+ if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+ ping_err(skb, offset, info);
+}
+
static int icmpv6_rcv(struct sk_buff *skb);
static const struct inet6_protocol icmpv6_protocol = {
.handler = icmpv6_rcv,
+ .err_handler = icmpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -111,15 +132,6 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
}
/*
- * Slightly more convenient version of icmpv6_send.
- */
-void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
-{
- icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
- kfree_skb(skb);
-}
-
-/*
* Figure out, may we reply to this packet with icmp error.
*
* We do not reply, if:
@@ -130,18 +142,19 @@ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
* --ANK (980726)
*/
-static int is_ineligible(struct sk_buff *skb)
+static bool is_ineligible(const struct sk_buff *skb)
{
int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
int len = skb->len - ptr;
__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
if (len < 0)
- return 1;
+ return true;
- ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
+ ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
if (ptr < 0)
- return 0;
+ return false;
if (nexthdr == IPPROTO_ICMPV6) {
u8 _type, *tp;
tp = skb_header_pointer(skb,
@@ -149,49 +162,53 @@ static int is_ineligible(struct sk_buff *skb)
sizeof(_type), &_type);
if (tp == NULL ||
!(*tp & ICMPV6_INFOMSG_MASK))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
* Check the ICMP output rate limit
*/
-static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi *fl)
+static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+ struct flowi6 *fl6)
{
struct dst_entry *dst;
struct net *net = sock_net(sk);
- int res = 0;
+ bool res = false;
/* Informational messages are not limited. */
if (type & ICMPV6_INFOMSG_MASK)
- return 1;
+ return true;
/* Do not limit pmtu discovery, it would break it. */
if (type == ICMPV6_PKT_TOOBIG)
- return 1;
+ return true;
/*
* Look up the output route.
* XXX: perhaps the expire for routing entries cloned by
* this lookup should be more aggressive (not longer than timeout).
*/
- dst = ip6_route_output(net, sk, fl);
+ dst = ip6_route_output(net, sk, fl6);
if (dst->error) {
IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTNOROUTES);
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
- res = 1;
+ res = true;
} else {
struct rt6_info *rt = (struct rt6_info *)dst;
int tmo = net->ipv6.sysctl.icmpv6_time;
+ struct inet_peer *peer;
/* Give more bandwidth to wider prefixes. */
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- res = xrlim_allow(dst, tmo);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ res = inet_peer_xrlim_allow(peer, tmo);
+ if (peer)
+ inet_putpeer(peer);
}
dst_release(dst);
return res;
@@ -204,18 +221,19 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
* highest-order two bits set to 10
*/
-static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
+static bool opt_unrec(struct sk_buff *skb, __u32 offset)
{
u8 _optval, *op;
offset += skb_network_offset(skb);
op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
if (op == NULL)
- return 1;
+ return true;
return (*op & 0xC0) == 0x80;
}
-static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
@@ -231,9 +249,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
if (skb_queue_len(&sk->sk_write_queue) == 1) {
skb->csum = csum_partial(icmp6h,
sizeof(struct icmp6hdr), skb->csum);
- icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
- &fl->fl6_dst,
- len, fl->proto,
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
+ &fl6->daddr,
+ len, fl6->flowi6_proto,
skb->csum);
} else {
__wsum tmp_csum = 0;
@@ -244,9 +262,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
tmp_csum = csum_partial(icmp6h,
sizeof(struct icmp6hdr), tmp_csum);
- icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
- &fl->fl6_dst,
- len, fl->proto,
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
+ &fl6->daddr,
+ len, fl6->flowi6_proto,
tmp_csum);
}
ip6_push_pending_frames(sk);
@@ -274,7 +292,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
return 0;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
static void mip6_addr_swap(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -288,9 +306,9 @@ static void mip6_addr_swap(struct sk_buff *skb)
if (likely(off >= 0)) {
hao = (struct ipv6_destopt_hao *)
(skb_network_header(skb) + off);
- ipv6_addr_copy(&tmp, &iph->saddr);
- ipv6_addr_copy(&iph->saddr, &hao->addr);
- ipv6_addr_copy(&hao->addr, &tmp);
+ tmp = iph->saddr;
+ iph->saddr = hao->addr;
+ hao->addr = tmp;
}
}
}
@@ -298,42 +316,106 @@ static void mip6_addr_swap(struct sk_buff *skb)
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif
+static struct dst_entry *icmpv6_route_lookup(struct net *net,
+ struct sk_buff *skb,
+ struct sock *sk,
+ struct flowi6 *fl6)
+{
+ struct dst_entry *dst, *dst2;
+ struct flowi6 fl2;
+ int err;
+
+ err = ip6_dst_lookup(sk, &dst, fl6);
+ if (err)
+ return ERR_PTR(err);
+
+ /*
+ * We won't send icmp if the destination is known
+ * anycast.
+ */
+ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
+ dst_release(dst);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* No need to clone since we're just using its address. */
+ dst2 = dst;
+
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
+ if (!IS_ERR(dst)) {
+ if (dst != dst2)
+ return dst;
+ } else {
+ if (PTR_ERR(dst) == -EPERM)
+ dst = NULL;
+ else
+ return dst;
+ }
+
+ err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
+ if (err)
+ goto relookup_failed;
+
+ err = ip6_dst_lookup(sk, &dst2, &fl2);
+ if (err)
+ goto relookup_failed;
+
+ dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
+ if (!IS_ERR(dst2)) {
+ dst_release(dst);
+ dst = dst2;
+ } else {
+ err = PTR_ERR(dst2);
+ if (err == -EPERM) {
+ dst_release(dst);
+ return dst2;
+ } else
+ goto relookup_failed;
+ }
+
+relookup_failed:
+ if (dst)
+ return dst;
+ return ERR_PTR(err);
+}
+
/*
* Send an ICMP message in response to a packet in error
*/
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
{
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct sock *sk;
struct ipv6_pinfo *np;
- struct in6_addr *saddr = NULL;
+ const struct in6_addr *saddr = NULL;
struct dst_entry *dst;
- struct dst_entry *dst2;
struct icmp6hdr tmp_hdr;
- struct flowi fl;
- struct flowi fl2;
+ struct flowi6 fl6;
struct icmpv6_msg msg;
int iif = 0;
int addr_type = 0;
int len;
int hlimit;
int err = 0;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
- (skb->network_header + sizeof(*hdr)) > skb->tail)
+ (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
return;
/*
* Make sure we respect the rules
* i.e. RFC 1885 2.4(e)
- * Rule (e.1) is enforced by not using icmpv6_send
+ * Rule (e.1) is enforced by not using icmp6_send
* in any code that processes icmp errors.
*/
addr_type = ipv6_addr_type(&hdr->daddr);
- if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
+ if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
+ ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
saddr = &hdr->daddr;
/*
@@ -356,7 +438,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* Source addr check
*/
- if (addr_type & IPV6_ADDR_LINKLOCAL)
+ if (__ipv6_addr_needs_scope_id(addr_type))
iif = skb->dev->ifindex;
/*
@@ -366,7 +448,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* and anycast addresses will be checked later.
*/
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
return;
}
@@ -374,28 +456,30 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* Never answer to a ICMP packet.
*/
if (is_ineligible(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
return;
}
mip6_addr_swap(skb);
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.daddr = hdr->saddr;
if (saddr)
- ipv6_addr_copy(&fl.fl6_src, saddr);
- fl.oif = iif;
- fl.fl_icmp_type = type;
- fl.fl_icmp_code = code;
- security_skb_classify_flow(skb, &fl);
+ fl6.saddr = *saddr;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_oif = iif;
+ fl6.fl6_icmp_type = type;
+ fl6.fl6_icmp_code = code;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
- if (!icmpv6_xrlim_allow(sk, type, &fl))
+ if (!icmpv6_xrlim_allow(sk, type, &fl6))
goto out;
tmp_hdr.icmp6_type = type;
@@ -403,66 +487,16 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
tmp_hdr.icmp6_cksum = 0;
tmp_hdr.icmp6_pointer = htonl(info);
- if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
- fl.oif = np->mcast_oif;
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
- goto out;
-
- /*
- * We won't send icmp if the destination is known
- * anycast.
- */
- if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
- goto out_dst_release;
- }
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
- /* No need to clone since we're just using its address. */
- dst2 = dst;
-
- err = xfrm_lookup(net, &dst, &fl, sk, 0);
- switch (err) {
- case 0:
- if (dst != dst2)
- goto route_done;
- break;
- case -EPERM:
- dst = NULL;
- break;
- default:
+ dst = icmpv6_route_lookup(net, skb, sk, &fl6);
+ if (IS_ERR(dst))
goto out;
- }
-
- if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
- goto relookup_failed;
-
- if (ip6_dst_lookup(sk, &dst2, &fl2))
- goto relookup_failed;
-
- err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
- switch (err) {
- case 0:
- dst_release(dst);
- dst = dst2;
- break;
- case -EPERM:
- goto out_dst_release;
- default:
-relookup_failed:
- if (!dst)
- goto out;
- break;
- }
-route_done:
- if (ipv6_addr_is_multicast(&fl.fl6_dst))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -475,30 +509,35 @@ route_done:
goto out_dst_release;
}
- idev = in6_dev_get(skb->dev);
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
err = ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit,
- np->tclass, NULL, &fl, (struct rt6_info*)dst,
+ np->tclass, NULL, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
- goto out_put;
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ len + sizeof(struct icmp6hdr));
}
- err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
-
-out_put:
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ rcu_read_unlock();
out_dst_release:
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
}
-EXPORT_SYMBOL(icmpv6_send);
+/* Slightly more convenient version of icmp6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+ kfree_skb(skb);
+}
static void icmpv6_echo_reply(struct sk_buff *skb)
{
@@ -506,84 +545,86 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
- struct in6_addr *saddr = NULL;
+ const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb);
struct icmp6hdr tmp_hdr;
- struct flowi fl;
+ struct flowi6 fl6;
struct icmpv6_msg msg;
struct dst_entry *dst;
int err = 0;
int hlimit;
+ u8 tclass;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
saddr = &ipv6_hdr(skb)->daddr;
- if (!ipv6_unicast_destination(skb))
+ if (!ipv6_unicast_destination(skb) &&
+ !(net->ipv6.sysctl.anycast_src_echo_reply &&
+ ipv6_anycast_destination(skb)))
saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
- ipv6_addr_copy(&fl.fl6_src, saddr);
- fl.oif = skb->dev->ifindex;
- fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
- security_skb_classify_flow(skb, &fl);
+ fl6.saddr = *saddr;
+ fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ fl6.flowi6_mark = mark;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
- if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
- fl.oif = np->mcast_oif;
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
- err = ip6_dst_lookup(sk, &dst, &fl);
+ err = ip6_dst_lookup(sk, &dst, &fl6);
if (err)
goto out;
- if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0)
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
+ if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl.fl6_dst))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- idev = in6_dev_get(skb->dev);
+ idev = __in6_dev_get(skb->dev);
msg.skb = skb;
msg.offset = 0;
msg.type = ICMPV6_ECHO_REPLY;
+ tclass = ipv6_get_dsfield(ipv6_hdr(skb));
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
- (struct rt6_info*)dst, MSG_DONTWAIT,
+ sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
+ (struct rt6_info *)dst, MSG_DONTWAIT,
np->dontfrag);
if (err) {
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
- goto out_put;
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ skb->len + sizeof(struct icmp6hdr));
}
- err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
-
-out_put:
- if (likely(idev != NULL))
- in6_dev_put(idev);
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
}
-static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
{
const struct inet6_protocol *ipprot;
int inner_offset;
- int hash;
+ __be16 frag_off;
u8 nexthdr;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -592,7 +633,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
if (ipv6_ext_hdr(nexthdr)) {
/* now skip over extension headers */
- inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
if (inner_offset<0)
return;
} else {
@@ -610,10 +652,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
--ANK (980726)
*/
- hash = nexthdr & (MAX_INET_PROTOS - 1);
-
rcu_read_lock();
- ipprot = rcu_dereference(inet6_protos[hash]);
+ ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
rcu_read_unlock();
@@ -629,8 +669,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(dev);
- struct in6_addr *saddr, *daddr;
- struct ipv6hdr *orig_hdr;
+ const struct in6_addr *saddr, *daddr;
struct icmp6hdr *hdr;
u8 type;
@@ -642,7 +681,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
XFRM_STATE_ICMP))
goto drop_no_count;
- if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+ if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
nh = skb_network_offset(skb);
@@ -659,21 +698,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
- /* Perform checksum. */
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
- skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
- IPPROTO_ICMPV6, 0));
- if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
- saddr, daddr);
- goto discard_it;
- }
+ if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+ saddr, daddr);
+ goto csum_error;
}
if (!pskb_pull(skb, sizeof(*hdr)))
@@ -691,7 +720,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- /* we couldn't care less */
+ ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
@@ -703,9 +732,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto discard_it;
hdr = icmp6_hdr(skb);
- orig_hdr = (struct ipv6hdr *) (hdr + 1);
- rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
- ntohl(hdr->icmp6_mtu));
/*
* Drop through to notify
@@ -761,6 +787,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
kfree_skb(skb);
return 0;
+csum_error:
+ ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
@@ -768,20 +796,20 @@ drop_no_count:
return 0;
}
-void icmpv6_flow_init(struct sock *sk, struct flowi *fl,
+void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
u8 type,
const struct in6_addr *saddr,
const struct in6_addr *daddr,
int oif)
{
- memset(fl, 0, sizeof(*fl));
- ipv6_addr_copy(&fl->fl6_src, saddr);
- ipv6_addr_copy(&fl->fl6_dst, daddr);
- fl->proto = IPPROTO_ICMPV6;
- fl->fl_icmp_type = type;
- fl->fl_icmp_code = 0;
- fl->oif = oif;
- security_sk_classify_flow(sk, fl);
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->saddr = *saddr;
+ fl6->daddr = *daddr;
+ fl6->flowi6_proto = IPPROTO_ICMPV6;
+ fl6->fl6_icmp_type = type;
+ fl6->fl6_icmp_code = 0;
+ fl6->flowi6_oif = oif;
+ security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
}
/*
@@ -803,9 +831,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
err = inet_ctl_sock_create(&sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
- printk(KERN_ERR
- "Failed to initialize the ICMP6 control socket "
- "(err %d).\n",
+ pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
err);
goto fail;
}
@@ -824,8 +850,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
- sk->sk_sndbuf =
- (2 * ((64 * 1024) + sizeof(struct sk_buff)));
+ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
}
return 0;
@@ -862,16 +887,23 @@ int __init icmpv6_init(void)
err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
goto fail;
+
+ err = inet6_register_icmp_sender(icmp6_send);
+ if (err)
+ goto sender_reg_err;
return 0;
+sender_reg_err:
+ inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail:
- printk(KERN_ERR "Failed to register ICMP6 protocol\n");
+ pr_err("Failed to register ICMP6 protocol\n");
unregister_pernet_subsys(&icmpv6_sk_ops);
return err;
}
void icmpv6_cleanup(void)
{
+ inet6_unregister_icmp_sender(icmp6_send);
unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}
@@ -901,6 +933,14 @@ static const struct icmp6_err {
.err = ECONNREFUSED,
.fatal = 1,
},
+ { /* POLICY_FAIL */
+ .err = EACCES,
+ .fatal = 1,
+ },
+ { /* REJECT_ROUTE */
+ .err = EACCES,
+ .fatal = 1,
+ },
};
int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -912,7 +952,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
switch (type) {
case ICMPV6_DEST_UNREACH:
fatal = 1;
- if (code <= ICMPV6_PORT_UNREACH) {
+ if (code < ARRAY_SIZE(tab_unreach)) {
*err = tab_unreach[code].err;
fatal = tab_unreach[code].fatal;
}
@@ -934,11 +974,10 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
return fatal;
}
-
EXPORT_SYMBOL(icmpv6_err_convert);
#ifdef CONFIG_SYSCTL
-ctl_table ipv6_icmp_table_template[] = {
+static struct ctl_table ipv6_icmp_table_template[] = {
{
.procname = "ratelimit",
.data = &init_net.ipv6.sysctl.icmpv6_time,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e46305d1815..a245e5ddffb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -28,59 +28,66 @@
#include <net/inet6_connection_sock.h>
int inet6_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb)
+ const struct inet_bind_bucket *tb, bool relax)
{
const struct sock *sk2;
- const struct hlist_node *node;
+ int reuse = sk->sk_reuse;
+ int reuseport = sk->sk_reuseport;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
/* We must walk the whole port owner list in this case. -DaveM */
/*
* See comment in inet_csk_bind_conflict about sock lookup
* vs net namespaces issues.
*/
- sk_for_each_bound(sk2, node, &tb->owners) {
+ sk_for_each_bound(sk2, &tb->owners) {
if (sk != sk2 &&
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
- (!sk->sk_reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- ipv6_rcv_saddr_equal(sk, sk2))
- break;
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ if ((!reuse || !sk2->sk_reuse ||
+ sk2->sk_state == TCP_LISTEN) &&
+ (!reuseport || !sk2->sk_reuseport ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid,
+ sock_i_uid((struct sock *)sk2))))) {
+ if (ipv6_rcv_saddr_equal(sk, sk2))
+ break;
+ }
+ if (!relax && reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN &&
+ ipv6_rcv_saddr_equal(sk, sk2))
+ break;
+ }
}
- return node != NULL;
+ return sk2 != NULL;
}
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
struct dst_entry *inet6_csk_route_req(struct sock *sk,
+ struct flowi6 *fl6,
const struct request_sock *req)
{
- struct inet6_request_sock *treq = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
- final_p = fl6_update_dst(&fl, np->opt, &final);
- ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, &fl);
-
- if (ip6_dst_lookup(sk, &dst, &fl))
- return NULL;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_TCP;
+ fl6->daddr = ireq->ir_v6_rmt_addr;
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ fl6->saddr = ireq->ir_v6_loc_addr;
+ fl6->flowi6_oif = ireq->ir_iif;
+ fl6->flowi6_mark = ireq->ir_mark;
+ fl6->fl6_dport = ireq->ir_rmt_port;
+ fl6->fl6_sport = htons(ireq->ir_num);
+ security_req_classify_flow(req, flowi6_to_flowi(fl6));
+
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ if (IS_ERR(dst))
return NULL;
return dst;
@@ -90,7 +97,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
* request_sock (formerly open request) hash tables.
*/
static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
- const u32 rnd, const u16 synq_hsize)
+ const u32 rnd, const u32 synq_hsize)
{
u32 c;
@@ -122,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
lopt->nr_table_entries)];
(req = *prev) != NULL;
prev = &req->dl_next) {
- const struct inet6_request_sock *treq = inet6_rsk(req);
+ const struct inet_request_sock *ireq = inet_rsk(req);
- if (inet_rsk(req)->rmt_port == rport &&
+ if (ireq->ir_rmt_port == rport &&
req->rsk_ops->family == AF_INET6 &&
- ipv6_addr_equal(&treq->rmt_addr, raddr) &&
- ipv6_addr_equal(&treq->loc_addr, laddr) &&
- (!treq->iif || treq->iif == iif)) {
+ ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
+ ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
+ (!ireq->ir_iif || ireq->ir_iif == iif)) {
WARN_ON(req->sk != NULL);
*prevp = prev;
return req;
@@ -146,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
- inet_rsk(req)->rmt_port,
+ const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
+ inet_rsk(req)->ir_rmt_port,
lopt->hash_rnd, lopt->nr_table_entries);
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
@@ -158,108 +165,102 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
sin6->sin6_family = AF_INET6;
- ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+ sin6->sin6_addr = sk->sk_v6_daddr;
sin6->sin6_port = inet_sk(sk)->inet_dport;
/* We do not store received flowlabel for TCP */
sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (sk->sk_bound_dev_if &&
- ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = sk->sk_bound_dev_if;
+ sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+ sk->sk_bound_dev_if);
}
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- struct in6_addr *daddr, struct in6_addr *saddr)
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
__ip6_dst_store(sk, dst, daddr, saddr);
-
-#ifdef CONFIG_XFRM
- {
- struct rt6_info *rt = (struct rt6_info *)dst;
- rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
- }
-#endif
}
static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
{
- struct dst_entry *dst;
-
- dst = __sk_dst_check(sk, cookie);
-
-#ifdef CONFIG_XFRM
- if (dst) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
- __sk_dst_reset(sk);
- dst = NULL;
- }
- }
-#endif
-
- return dst;
+ return __sk_dst_check(sk, cookie);
}
-int inet6_csk_xmit(struct sk_buff *skb)
+static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
+ struct flowi6 *fl6)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi fl;
- struct dst_entry *dst;
struct in6_addr *final_p, final;
+ struct dst_entry *dst;
- memset(&fl, 0, sizeof(fl));
- fl.proto = sk->sk_protocol;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.fl6_flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_sport = inet->inet_sport;
- fl.fl_ip_dport = inet->inet_dport;
- security_sk_classify_flow(sk, &fl);
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = sk->sk_protocol;
+ fl6->daddr = sk->sk_v6_daddr;
+ fl6->saddr = np->saddr;
+ fl6->flowlabel = np->flow_label;
+ IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+ fl6->flowi6_oif = sk->sk_bound_dev_if;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_sport = inet->inet_sport;
+ fl6->fl6_dport = inet->inet_dport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(&fl, np->opt, &final);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+ if (!dst) {
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p);
- if (dst == NULL) {
- int err = ip6_dst_lookup(sk, &dst, &fl);
-
- if (err) {
- sk->sk_err_soft = -err;
- kfree_skb(skb);
- return err;
- }
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
- sk->sk_route_caps = 0;
- kfree_skb(skb);
- return err;
- }
+ if (!IS_ERR(dst))
+ __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ }
+ return dst;
+}
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int res;
+
+ dst = inet6_csk_route_socket(sk, &fl6);
+ if (IS_ERR(dst)) {
+ sk->sk_err_soft = -PTR_ERR(dst);
+ sk->sk_route_caps = 0;
+ kfree_skb(skb);
+ return PTR_ERR(dst);
}
- skb_dst_set(skb, dst_clone(dst));
+ rcu_read_lock();
+ skb_dst_set_noref(skb, dst);
/* Restore final destination back after routing done */
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ fl6.daddr = sk->sk_v6_daddr;
- return ip6_xmit(sk, skb, &fl, np->opt);
+ res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_unlock();
+ return res;
}
-
EXPORT_SYMBOL_GPL(inet6_csk_xmit);
+
+struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6);
+
+ if (IS_ERR(dst))
+ return NULL;
+ dst->ops->update_pmtu(dst, sk, NULL, mtu);
+
+ dst = inet6_csk_route_socket(sk, &fl6);
+ return IS_ERR(dst) ? NULL : dst;
+}
+EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 633a6c26613..262e13c02ec 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -20,8 +20,42 @@
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
+#include <net/secure_seq.h>
#include <net/ip.h>
+static unsigned int inet6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
+{
+ static u32 inet6_ehash_secret __read_mostly;
+ static u32 ipv6_hash_secret __read_mostly;
+
+ u32 lhash, fhash;
+
+ net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
+ net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ inet6_ehash_secret + net_hash_mix(net));
+}
+
+static int inet6_sk_ehashfn(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *faddr = &sk->sk_v6_daddr;
+ const __u16 lport = inet->inet_num;
+ const __be16 fport = inet->inet_dport;
+ struct net *net = sock_net(sk);
+
+ return inet6_ehashfn(net, laddr, lport, faddr, fport);
+}
+
int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
@@ -86,45 +120,30 @@ struct sock *__inet6_lookup_established(struct net *net,
rcu_read_lock();
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
- /* For IPV6 do the cheaper port and family tests first. */
- if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
- goto begintw;
- if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
- sock_put(sk);
- goto begin;
- }
- goto out;
- }
- }
- if (get_nulls_value(node) != slot)
- goto begin;
-
-begintw:
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_nulls_for_each_rcu(sk, node, &head->twchain) {
- if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
- sk = NULL;
- goto out;
- }
- if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
- sock_put(sk);
- goto begintw;
- }
+ if (sk->sk_hash != hash)
+ continue;
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+ continue;
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
goto out;
+
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ sock_gen_put(sk);
+ goto begin;
}
+ goto found;
}
if (get_nulls_value(node) != slot)
- goto begintw;
- sk = NULL;
+ goto begin;
out:
+ sk = NULL;
+found:
rcu_read_unlock();
return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);
-static int inline compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
const int dif)
@@ -133,11 +152,10 @@ static int inline compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
score = 1;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
@@ -151,25 +169,38 @@ static int inline compute_score(struct sock *sk, struct net *net,
}
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
+ struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
struct sock *sk;
const struct hlist_nulls_node *node;
struct sock *result;
- int score, hiscore;
+ int score, hiscore, matches = 0, reuseport = 0;
+ u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
rcu_read_lock();
begin:
result = NULL;
- hiscore = -1;
+ hiscore = 0;
sk_nulls_for_each(sk, node, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif);
if (score > hiscore) {
hiscore = score;
result = sk;
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
+ }
+ } else if (score == hiscore && reuseport) {
+ matches++;
+ if (((u64)phash * matches) >> 32 == 0)
+ result = sk;
+ phash = next_pseudo_random32(phash);
}
}
/*
@@ -216,9 +247,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct in6_addr *daddr = &np->rcv_saddr;
- const struct in6_addr *saddr = &np->daddr;
+ const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *saddr = &sk->sk_v6_daddr;
const int dif = sk->sk_bound_dev_if;
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
@@ -228,33 +258,28 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
+ struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock);
- /* Check TIME-WAIT sockets first. */
- sk_nulls_for_each(sk2, node, &head->twchain) {
- tw = inet_twsk(sk2);
-
- if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
- goto not_unique;
- }
- }
- tw = NULL;
-
- /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) {
- if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
+ if (sk2->sk_hash != hash)
+ continue;
+
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+ break;
+ }
goto not_unique;
+ }
}
-unique:
/* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
+ * in hash table socket with a funny identity.
+ */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
@@ -287,9 +312,9 @@ not_unique:
static inline u32 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
- np->daddr.s6_addr32,
+
+ return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_dport);
}
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
new file mode 100644
index 00000000000..9a4d7322fb2
--- /dev/null
+++ b/net/ipv6/ip6_checksum.c
@@ -0,0 +1,124 @@
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <asm/checksum.h>
+
+#ifndef _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, unsigned short proto,
+ __wsum csum)
+{
+
+ int carry;
+ __u32 ulen;
+ __u32 uproto;
+ __u32 sum = (__force u32)csum;
+
+ sum += (__force u32)saddr->s6_addr32[0];
+ carry = (sum < (__force u32)saddr->s6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)saddr->s6_addr32[1];
+ carry = (sum < (__force u32)saddr->s6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)saddr->s6_addr32[2];
+ carry = (sum < (__force u32)saddr->s6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)saddr->s6_addr32[3];
+ carry = (sum < (__force u32)saddr->s6_addr32[3]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[0];
+ carry = (sum < (__force u32)daddr->s6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[1];
+ carry = (sum < (__force u32)daddr->s6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[2];
+ carry = (sum < (__force u32)daddr->s6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)daddr->s6_addr32[3];
+ carry = (sum < (__force u32)daddr->s6_addr32[3]);
+ sum += carry;
+
+ ulen = (__force u32)htonl((__u32) len);
+ sum += ulen;
+ carry = (sum < ulen);
+ sum += carry;
+
+ uproto = (__force u32)htonl(proto);
+ sum += uproto;
+ carry = (sum < uproto);
+ sum += carry;
+
+ return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif
+
+int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
+{
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
+ /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+ * we accept a checksum of zero here. When we find the socket
+ * for the UDP packet we'll check if that socket allows zero checksum
+ * for IPv6 (set by socket option).
+ */
+ return skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+}
+EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v6_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+}
+EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index de382114609..cb4459bd1d2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -9,15 +9,16 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
*/
-/*
- * Changes:
- * Yuji SEKIYA @USAGI: Support default route on router node;
- * remove ip6_null_entry from the top of
- * routing table.
- * Ville Nuorvala: Fixed routing subtrees.
- */
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/net.h>
@@ -28,10 +29,6 @@
#include <linux/list.h>
#include <linux/slab.h>
-#ifdef CONFIG_PROC_FS
-#include <linux/proc_fs.h>
-#endif
-
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
@@ -42,15 +39,14 @@
#define RT6_DEBUG 2
#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#define RT6_TRACE(x...) pr_debug(x)
#else
#define RT6_TRACE(x...) do { ; } while (0)
#endif
-static struct kmem_cache * fib6_node_kmem __read_mostly;
+static struct kmem_cache *fib6_node_kmem __read_mostly;
-enum fib_walk_state_t
-{
+enum fib_walk_state_t {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
#endif
@@ -60,8 +56,7 @@ enum fib_walk_state_t
FWS_U
};
-struct fib6_cleaner_t
-{
+struct fib6_cleaner_t {
struct fib6_walker_t w;
struct net *net;
int (*func)(struct rt6_info *, void *arg);
@@ -76,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct fib6_walker_t *w);
@@ -134,12 +128,12 @@ static __inline__ u32 fib6_new_sernum(void)
# define BITOP_BE32_SWIZZLE 0
#endif
-static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
+static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
{
- __be32 *addr = token;
+ const __be32 *addr = token;
/*
* Here,
- * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+ * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
* is optimized version of
* htonl(1 << ((~fn_bit)&0x1F))
* See include/asm-generic/bitops/le.h.
@@ -148,7 +142,7 @@ static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static __inline__ struct fib6_node * node_alloc(void)
+static __inline__ struct fib6_node *node_alloc(void)
{
struct fib6_node *fn;
@@ -157,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void)
return fn;
}
-static __inline__ void node_free(struct fib6_node * fn)
+static __inline__ void node_free(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
@@ -194,10 +188,11 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
struct fib6_table *table;
table = kzalloc(sizeof(*table), GFP_ATOMIC);
- if (table != NULL) {
+ if (table) {
table->tb6_id = id;
table->tb6_root.leaf = net->ipv6.ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&table->tb6_peers);
}
return table;
@@ -214,7 +209,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id)
return tb;
tb = fib6_alloc_table(net, id);
- if (tb != NULL)
+ if (tb)
fib6_link_table(net, tb);
return tb;
@@ -224,7 +219,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
struct fib6_table *tb;
struct hlist_head *head;
- struct hlist_node *node;
unsigned int h;
if (id == 0)
@@ -232,7 +226,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
h = id & (FIB6_TABLE_HASHSZ - 1);
rcu_read_lock();
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (tb->tb6_id == id) {
rcu_read_unlock();
return tb;
@@ -260,10 +254,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
return net->ipv6.fib6_main_tbl;
}
-struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
- return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags);
+ return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
}
static void __net_init fib6_tables_init(struct net *net)
@@ -293,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
- struct fib6_walker_t *w = (void*)cb->args[2];
+ struct fib6_walker_t *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
@@ -303,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
cb->args[2] = 0;
kfree(w);
}
- cb->done = (void*)cb->args[3];
+ cb->done = (void *)cb->args[3];
cb->args[1] = 3;
}
@@ -363,7 +357,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct rt6_rtnl_dump_arg arg;
struct fib6_walker_t *w;
struct fib6_table *tb;
- struct hlist_node *node;
struct hlist_head *head;
int res = 0;
@@ -371,7 +364,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
s_e = cb->args[1];
w = (void *)cb->args[2];
- if (w == NULL) {
+ if (!w) {
/* New dump:
*
* 1. hook callback destructor.
@@ -383,7 +376,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
* 2. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (w == NULL)
+ if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
@@ -394,10 +387,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
arg.net = net;
w->args = &arg;
+ rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry(tb, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (e < s_e)
goto next;
res = fib6_dump_table(tb, skb, cb);
@@ -408,6 +402,7 @@ next:
}
}
out:
+ rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
@@ -425,9 +420,10 @@ out:
* node.
*/
-static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
- int addrlen, int plen,
- int offset)
+static struct fib6_node *fib6_add_1(struct fib6_node *root,
+ struct in6_addr *addr, int plen,
+ int offset, int allow_create,
+ int replace_required)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -449,8 +445,16 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
* Prefix match
*/
if (plen < fn->fn_bit ||
- !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+ !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
+ if (!allow_create) {
+ if (replace_required) {
+ pr_warn("Can't replace route, no match found\n");
+ return ERR_PTR(-ENOENT);
+ }
+ pr_warn("NLM_F_CREATE should be set when creating new route\n");
+ }
goto insert_above;
+ }
/*
* Exact match ?
@@ -458,7 +462,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
if (plen == fn->fn_bit) {
/* clean up an intermediate node */
- if ((fn->fn_flags & RTN_RTINFO) == 0) {
+ if (!(fn->fn_flags & RTN_RTINFO)) {
rt6_release(fn->leaf);
fn->leaf = NULL;
}
@@ -476,9 +480,25 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right: fn->left;
+ fn = dir ? fn->right : fn->left;
} while (fn);
+ if (!allow_create) {
+ /* We should not create new node because
+ * NLM_F_REPLACE was specified without NLM_F_CREATE
+ * I assume it is safe to require NLM_F_CREATE when
+ * REPLACE flag is used! Later we may want to remove the
+ * check for replace_required, because according
+ * to netlink specification, NLM_F_CREATE
+ * MUST be specified if new route is created.
+ * That would keep IPv6 consistent with IPv4
+ */
+ if (replace_required) {
+ pr_warn("Can't replace route, no match found\n");
+ return ERR_PTR(-ENOENT);
+ }
+ pr_warn("NLM_F_CREATE should be set when creating new route\n");
+ }
/*
* We walked to the bottom of tree.
* Create new leaf node without children.
@@ -486,8 +506,8 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
ln = node_alloc();
- if (ln == NULL)
- return NULL;
+ if (!ln)
+ return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
ln->parent = pn;
@@ -518,7 +538,7 @@ insert_above:
but if it is >= plen, the value is ignored in any case.
*/
- bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
+ bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
/*
* (intermediate)[in]
@@ -529,12 +549,12 @@ insert_above:
in = node_alloc();
ln = node_alloc();
- if (in == NULL || ln == NULL) {
+ if (!in || !ln) {
if (in)
node_free(in);
if (ln)
node_free(ln);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
/*
@@ -583,8 +603,8 @@ insert_above:
ln = node_alloc();
- if (ln == NULL)
- return NULL;
+ if (!ln)
+ return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
@@ -607,19 +627,61 @@ insert_above:
return ln;
}
+static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+ RTF_GATEWAY;
+}
+
+static int fib6_commit_metrics(struct dst_entry *dst,
+ struct nlattr *mx, int mx_len)
+{
+ struct nlattr *nla;
+ int remaining;
+ u32 *mp;
+
+ if (dst->flags & DST_HOST) {
+ mp = dst_metrics_write_ptr(dst);
+ } else {
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (!mp)
+ return -ENOMEM;
+ dst_init_metrics(dst, mp, 0);
+ }
+
+ nla_for_each_attr(nla, mx, mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ mp[type - 1] = nla_get_u32(nla);
+ }
+ }
+ return 0;
+}
+
/*
* Insert routing information in a node.
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info)
+ struct nl_info *info, struct nlattr *mx, int mx_len)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
+ int replace = (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_REPLACE));
+ int add = (!info->nlh ||
+ (info->nlh->nlmsg_flags & NLM_F_CREATE));
+ int found = 0;
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ int err;
ins = &fn->leaf;
- for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
+ for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
/*
* Search for duplicates
*/
@@ -628,20 +690,42 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
/*
* Same priority level
*/
+ if (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_EXCL))
+ return -EEXIST;
+ if (replace) {
+ found++;
+ break;
+ }
- if (iter->rt6i_dev == rt->rt6i_dev &&
+ if (iter->dst.dev == rt->dst.dev &&
iter->rt6i_idev == rt->rt6i_idev &&
ipv6_addr_equal(&iter->rt6i_gateway,
&rt->rt6i_gateway)) {
- if (!(iter->rt6i_flags&RTF_EXPIRES))
+ if (rt->rt6i_nsiblings)
+ rt->rt6i_nsiblings = 0;
+ if (!(iter->rt6i_flags & RTF_EXPIRES))
return -EEXIST;
- iter->rt6i_expires = rt->rt6i_expires;
- if (!(rt->rt6i_flags&RTF_EXPIRES)) {
- iter->rt6i_flags &= ~RTF_EXPIRES;
- iter->rt6i_expires = 0;
- }
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ rt6_clean_expires(iter);
+ else
+ rt6_set_expires(iter, rt->dst.expires);
return -EEXIST;
}
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt_can_ecmp &&
+ rt6_qualify_for_ecmp(iter))
+ rt->rt6i_nsiblings++;
}
if (iter->rt6i_metric > rt->rt6i_metric)
@@ -654,20 +738,83 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
+ /* Link this route to others same route. */
+ if (rt->rt6i_nsiblings) {
+ unsigned int rt6i_nsiblings;
+ struct rt6_info *sibling, *temp_sibling;
+
+ /* Find the first route that have the same metric */
+ sibling = fn->leaf;
+ while (sibling) {
+ if (sibling->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(sibling)) {
+ list_add_tail(&rt->rt6i_siblings,
+ &sibling->rt6i_siblings);
+ break;
+ }
+ sibling = sibling->dst.rt6_next;
+ }
+ /* For each sibling in the list, increment the counter of
+ * siblings. BUG() if counters does not match, list of siblings
+ * is broken!
+ */
+ rt6i_nsiblings = 0;
+ list_for_each_entry_safe(sibling, temp_sibling,
+ &rt->rt6i_siblings, rt6i_siblings) {
+ sibling->rt6i_nsiblings++;
+ BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6i_nsiblings++;
+ }
+ BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ }
+
/*
* insert node
*/
+ if (!replace) {
+ if (!add)
+ pr_warn("NLM_F_CREATE should be set when creating new route\n");
+
+add:
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
+ rt->dst.rt6_next = iter;
+ *ins = rt;
+ rt->rt6i_node = fn;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
+
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
- rt->dst.rt6_next = iter;
- *ins = rt;
- rt->rt6i_node = fn;
- atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info);
- info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
-
- if ((fn->fn_flags & RTN_RTINFO) == 0) {
- info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
- fn->fn_flags |= RTN_RTINFO;
+ } else {
+ if (!found) {
+ if (add)
+ goto add;
+ pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
+ return -ENOENT;
+ }
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
+ *ins = rt;
+ rt->rt6i_node = fn;
+ rt->dst.rt6_next = iter->dst.rt6_next;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ rt6_release(iter);
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
}
return 0;
@@ -676,7 +823,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
{
if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
- (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
+ (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
mod_timer(&net->ipv6.ip6_fib_timer,
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
@@ -694,16 +841,31 @@ void fib6_force_start_gc(struct net *net)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
-
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
- rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
-
- if (fn == NULL)
+ int allow_create = 1;
+ int replace_required = 0;
+
+ if (info->nlh) {
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+ allow_create = 0;
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+ replace_required = 1;
+ }
+ if (!allow_create && !replace_required)
+ pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
+
+ fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ offsetof(struct rt6_info, rt6i_dst), allow_create,
+ replace_required);
+ if (IS_ERR(fn)) {
+ err = PTR_ERR(fn);
+ fn = NULL;
goto out;
+ }
pn = fn;
@@ -711,7 +873,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
- if (fn->subtree == NULL) {
+ if (!fn->subtree) {
struct fib6_node *sfn;
/*
@@ -726,7 +888,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
/* Create subtree root node */
sfn = node_alloc();
- if (sfn == NULL)
+ if (!sfn)
goto st_failure;
sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
@@ -737,15 +899,17 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
/* Now add the first leaf node to new subtree */
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
- offsetof(struct rt6_info, rt6i_src));
+ rt->rt6i_src.plen,
+ offsetof(struct rt6_info, rt6i_src),
+ allow_create, replace_required);
- if (sn == NULL) {
+ if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
root, and then (in st_failure) stale node
in main tree.
*/
node_free(sfn);
+ err = PTR_ERR(sn);
goto st_failure;
}
@@ -754,14 +918,17 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
fn->subtree = sfn;
} else {
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
- offsetof(struct rt6_info, rt6i_src));
+ rt->rt6i_src.plen,
+ offsetof(struct rt6_info, rt6i_src),
+ allow_create, replace_required);
- if (sn == NULL)
+ if (IS_ERR(sn)) {
+ err = PTR_ERR(sn);
goto st_failure;
+ }
}
- if (fn->leaf == NULL) {
+ if (!fn->leaf) {
fn->leaf = rt;
atomic_inc(&rt->rt6i_ref);
}
@@ -769,12 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
}
#endif
- err = fib6_add_rt2node(fn, rt, info);
-
- if (err == 0) {
+ err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
+ if (!err) {
fib6_start_gc(info->nl_net, rt);
- if (!(rt->rt6i_flags&RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn, rt);
+ if (!(rt->rt6i_flags & RTF_CACHE))
+ fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -821,12 +987,12 @@ st_failure:
*/
struct lookup_args {
- int offset; /* key offset on rt6_info */
- struct in6_addr *addr; /* search key */
+ int offset; /* key offset on rt6_info */
+ const struct in6_addr *addr; /* search key */
};
-static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
- struct lookup_args *args)
+static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
@@ -851,11 +1017,10 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
fn = next;
continue;
}
-
break;
}
- while(fn) {
+ while (fn) {
if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
struct rt6key *key;
@@ -864,14 +1029,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree)
- fn = fib6_lookup_1(fn->subtree, args + 1);
+ if (fn->subtree) {
+ struct fib6_node *sfn;
+ sfn = fib6_lookup_1(fn->subtree,
+ args + 1);
+ if (!sfn)
+ goto backtrack;
+ fn = sfn;
+ }
#endif
- if (!fn || fn->fn_flags & RTN_RTINFO)
+ if (fn->fn_flags & RTN_RTINFO)
return fn;
}
}
-
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
if (fn->fn_flags & RTN_ROOT)
break;
@@ -881,8 +1054,8 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
return NULL;
}
-struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
- struct in6_addr *saddr)
+struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
@@ -902,8 +1075,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
};
fn = fib6_lookup_1(root, daddr ? args : args + 1);
-
- if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
+ if (!fn || fn->fn_flags & RTN_TL_ROOT)
fn = root;
return fn;
@@ -915,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
*/
-static struct fib6_node * fib6_locate_1(struct fib6_node *root,
- struct in6_addr *addr,
- int plen, int offset)
+static struct fib6_node *fib6_locate_1(struct fib6_node *root,
+ const struct in6_addr *addr,
+ int plen, int offset)
{
struct fib6_node *fn;
@@ -945,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
return NULL;
}
-struct fib6_node * fib6_locate(struct fib6_node *root,
- struct in6_addr *daddr, int dst_len,
- struct in6_addr *saddr, int src_len)
+struct fib6_node *fib6_locate(struct fib6_node *root,
+ const struct in6_addr *daddr, int dst_len,
+ const struct in6_addr *saddr, int src_len)
{
struct fib6_node *fn;
@@ -963,7 +1135,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
}
#endif
- if (fn && fn->fn_flags&RTN_RTINFO)
+ if (fn && fn->fn_flags & RTN_RTINFO)
return fn;
return NULL;
@@ -977,14 +1149,13 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
{
- if (fn->fn_flags&RTN_ROOT)
+ if (fn->fn_flags & RTN_ROOT)
return net->ipv6.ip6_null_entry;
- while(fn) {
- if(fn->left)
+ while (fn) {
+ if (fn->left)
return fn->left->leaf;
-
- if(fn->right)
+ if (fn->right)
return fn->right->leaf;
fn = FIB6_SUBTREE(fn);
@@ -1016,18 +1187,20 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
children = 0;
child = NULL;
- if (fn->right) child = fn->right, children |= 1;
- if (fn->left) child = fn->left, children |= 2;
+ if (fn->right)
+ child = fn->right, children |= 1;
+ if (fn->left)
+ child = fn->left, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
/* Subtree root (i.e. fn) may have one child */
- || (children && fn->fn_flags&RTN_ROOT)
+ || (children && fn->fn_flags & RTN_ROOT)
#endif
) {
fn->leaf = fib6_find_prefix(net, fn);
#if RT6_DEBUG >= 2
- if (fn->leaf==NULL) {
+ if (!fn->leaf) {
WARN_ON(!fn->leaf);
fn->leaf = net->ipv6.ip6_null_entry;
}
@@ -1045,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn) pn->right = child;
- else if (pn->left == fn) pn->left = child;
+ if (pn->right == fn)
+ pn->right = child;
+ else if (pn->left == fn)
+ pn->left = child;
#if RT6_DEBUG >= 2
else
WARN_ON(1);
@@ -1060,7 +1235,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
- if (child == NULL) {
+ if (!child) {
if (w->root == fn) {
w->root = w->node = NULL;
RT6_TRACE("W %p adjusted by delroot 1\n", w);
@@ -1078,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
w->node = child;
if (children&2) {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
}
@@ -1089,7 +1264,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_unlock(&fib6_walker_lock);
node_free(fn);
- if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
+ if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
rt6_release(pn->leaf);
@@ -1117,13 +1292,24 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (fn->rr_ptr == rt)
fn->rr_ptr = NULL;
+ /* Remove this entry from other siblings */
+ if (rt->rt6i_nsiblings) {
+ struct rt6_info *sibling, *next_sibling;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &rt->rt6i_siblings, rt6i_siblings)
+ sibling->rt6i_nsiblings--;
+ rt->rt6i_nsiblings = 0;
+ list_del_init(&rt->rt6i_siblings);
+ }
+
/* Adjust walkers */
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
w->leaf = rt->dst.rt6_next;
- if (w->leaf == NULL)
+ if (!w->leaf)
w->state = FWS_U;
}
}
@@ -1132,7 +1318,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
rt->dst.rt6_next = NULL;
/* If it was last route, expunge its radix tree node */
- if (fn->leaf == NULL) {
+ if (!fn->leaf) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
fn = fib6_repair_tree(net, fn);
@@ -1146,7 +1332,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
* to still alive ones.
*/
while (fn) {
- if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
+ if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
fn->leaf = fib6_find_prefix(net, fn);
atomic_inc(&fn->leaf->rt6i_ref);
rt6_release(rt);
@@ -1168,27 +1354,27 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
- if (rt->dst.obsolete>0) {
+ if (rt->dst.obsolete > 0) {
WARN_ON(fn != NULL);
return -ENOENT;
}
#endif
- if (fn == NULL || rt == net->ipv6.ip6_null_entry)
+ if (!fn || rt == net->ipv6.ip6_null_entry)
return -ENOENT;
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- if (!(rt->rt6i_flags&RTF_CACHE)) {
+ if (!(rt->rt6i_flags & RTF_CACHE)) {
struct fib6_node *pn = fn;
#ifdef CONFIG_IPV6_SUBTREES
/* clones of this route might be in another subtree */
if (rt->rt6i_src.plen) {
- while (!(pn->fn_flags&RTN_ROOT))
+ while (!(pn->fn_flags & RTN_ROOT))
pn = pn->parent;
pn = pn->parent;
}
#endif
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
/*
@@ -1234,11 +1420,11 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
for (;;) {
fn = w->node;
- if (fn == NULL)
+ if (!fn)
return 0;
if (w->prune && fn != w->root &&
- fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
+ fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
w->state = FWS_C;
w->leaf = fn->leaf;
}
@@ -1267,12 +1453,12 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->state = FWS_C;
w->leaf = fn->leaf;
case FWS_C:
- if (w->leaf && fn->fn_flags&RTN_RTINFO) {
+ if (w->leaf && fn->fn_flags & RTN_RTINFO) {
int err;
- if (w->count < w->skip) {
- w->count++;
- continue;
+ if (w->skip) {
+ w->skip--;
+ goto skip;
}
err = w->func(w);
@@ -1282,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->count++;
continue;
}
+skip:
w->state = FWS_U;
case FWS_U:
if (fn == w->root)
@@ -1341,7 +1528,8 @@ static int fib6_clean_node(struct fib6_walker_t *w)
res = fib6_del(rt, &info);
if (res) {
#if RT6_DEBUG >= 2
- printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+ pr_debug("%s: del failed: rt=%p@%p err=%d\n",
+ __func__, rt, rt->rt6i_node, res);
#endif
continue;
}
@@ -1383,20 +1571,19 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
}
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
- int prune, void *arg)
+ void *arg)
{
struct fib6_table *table;
- struct hlist_node *node;
struct hlist_head *head;
unsigned int h;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, prune, arg);
+ func, 0, arg);
write_unlock_bh(&table->tb6_lock);
}
}
@@ -1413,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
return 0;
}
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
{
- fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
+ fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
}
/*
@@ -1441,8 +1627,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
* only if they are not in use now.
*/
- if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
- if (time_after(now, rt->rt6i_expires)) {
+ if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
+ if (time_after(now, rt->dst.expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
}
@@ -1452,11 +1638,20 @@ static int fib6_age(struct rt6_info *rt, void *arg)
time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
RT6_TRACE("aging clone %p\n", rt);
return -1;
- } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
- (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
- return -1;
+ } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ struct neighbour *neigh;
+ __u8 neigh_flags = 0;
+
+ neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+ if (neigh) {
+ neigh_flags = neigh->flags;
+ neigh_release(neigh);
+ }
+ if (!(neigh_flags & NTF_ROUTER)) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ return -1;
+ }
}
gc_args.more++;
}
@@ -1466,27 +1661,28 @@ static int fib6_age(struct rt6_info *rt, void *arg)
static DEFINE_SPINLOCK(fib6_gc_lock);
-void fib6_run_gc(unsigned long expires, struct net *net)
+void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
- if (expires != ~0UL) {
+ unsigned long now;
+
+ if (force) {
spin_lock_bh(&fib6_gc_lock);
- gc_args.timeout = expires ? (int)expires :
- net->ipv6.sysctl.ip6_rt_gc_interval;
- } else {
- if (!spin_trylock_bh(&fib6_gc_lock)) {
- mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
- return;
- }
- gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
+ } else if (!spin_trylock_bh(&fib6_gc_lock)) {
+ mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+ return;
}
+ gc_args.timeout = expires ? (int)expires :
+ net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = icmp6_dst_gc();
- fib6_clean_all(net, fib6_age, 0, NULL);
+ fib6_clean_all(net, fib6_age, NULL);
+ now = jiffies;
+ net->ipv6.ip6_rt_last_gc = now;
if (gc_args.more)
mod_timer(&net->ipv6.ip6_fib_timer,
- round_jiffies(jiffies
+ round_jiffies(now
+ net->ipv6.sysctl.ip6_rt_gc_interval));
else
del_timer(&net->ipv6.ip6_fib_timer);
@@ -1495,7 +1691,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
static void fib6_gc_timer_cb(unsigned long arg)
{
- fib6_run_gc(0, (struct net *)arg);
+ fib6_run_gc(0, (struct net *)arg, true);
}
static int __net_init fib6_net_init(struct net *net)
@@ -1524,6 +1720,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -1534,6 +1731,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
#endif
fib6_tables_init(net);
@@ -1549,7 +1747,7 @@ out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
return -ENOMEM;
- }
+}
static void fib6_net_exit(struct net *net)
{
@@ -1557,8 +1755,10 @@ static void fib6_net_exit(struct net *net)
del_timer_sync(&net->ipv6.ip6_fib_timer);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
kfree(net->ipv6.fib6_local_tbl);
#endif
+ inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
kfree(net->ipv6.fib6_main_tbl);
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
@@ -1584,7 +1784,8 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
+ ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
+ NULL);
if (ret)
goto out_unregister_subsys;
out:
@@ -1602,3 +1803,189 @@ void fib6_gc_cleanup(void)
unregister_pernet_subsys(&fib6_net_ops);
kmem_cache_destroy(fib6_node_kmem);
}
+
+#ifdef CONFIG_PROC_FS
+
+struct ipv6_route_iter {
+ struct seq_net_private p;
+ struct fib6_walker_t w;
+ loff_t skip;
+ struct fib6_table *tbl;
+ __u32 sernum;
+};
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+ struct rt6_info *rt = v;
+ struct ipv6_route_iter *iter = seq->private;
+
+ seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+#else
+ seq_puts(seq, "00000000000000000000000000000000 00 ");
+#endif
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+ else
+ seq_puts(seq, "00000000000000000000000000000000");
+
+ seq_printf(seq, " %08x %08x %08x %08x %8s\n",
+ rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+ rt->dst.__use, rt->rt6i_flags,
+ rt->dst.dev ? rt->dst.dev->name : "");
+ iter->w.leaf = NULL;
+ return 0;
+}
+
+static int ipv6_route_yield(struct fib6_walker_t *w)
+{
+ struct ipv6_route_iter *iter = w->args;
+
+ if (!iter->skip)
+ return 1;
+
+ do {
+ iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->skip--;
+ if (!iter->skip && iter->w.leaf)
+ return 1;
+ } while (iter->w.leaf);
+
+ return 0;
+}
+
+static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
+{
+ memset(&iter->w, 0, sizeof(iter->w));
+ iter->w.func = ipv6_route_yield;
+ iter->w.root = &iter->tbl->tb6_root;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ iter->w.args = iter;
+ iter->sernum = iter->w.root->fn_sernum;
+ INIT_LIST_HEAD(&iter->w.lh);
+ fib6_walker_link(&iter->w);
+}
+
+static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+ struct net *net)
+{
+ unsigned int h;
+ struct hlist_node *node;
+
+ if (tbl) {
+ h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
+ node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+ } else {
+ h = 0;
+ node = NULL;
+ }
+
+ while (!node && h < FIB6_TABLE_HASHSZ) {
+ node = rcu_dereference_bh(
+ hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
+ }
+ return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
+}
+
+static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+{
+ if (iter->sernum != iter->w.root->fn_sernum) {
+ iter->sernum = iter->w.root->fn_sernum;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ WARN_ON(iter->w.skip);
+ iter->w.skip = iter->w.count;
+ }
+}
+
+static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ int r;
+ struct rt6_info *n;
+ struct net *net = seq_file_net(seq);
+ struct ipv6_route_iter *iter = seq->private;
+
+ if (!v)
+ goto iter_table;
+
+ n = ((struct rt6_info *)v)->dst.rt6_next;
+ if (n) {
+ ++*pos;
+ return n;
+ }
+
+iter_table:
+ ipv6_route_check_sernum(iter);
+ read_lock(&iter->tbl->tb6_lock);
+ r = fib6_walk_continue(&iter->w);
+ read_unlock(&iter->tbl->tb6_lock);
+ if (r > 0) {
+ if (v)
+ ++*pos;
+ return iter->w.leaf;
+ } else if (r < 0) {
+ fib6_walker_unlink(&iter->w);
+ return NULL;
+ }
+ fib6_walker_unlink(&iter->w);
+
+ iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
+ if (!iter->tbl)
+ return NULL;
+
+ ipv6_route_seq_setup_walk(iter);
+ goto iter_table;
+}
+
+static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU_BH)
+{
+ struct net *net = seq_file_net(seq);
+ struct ipv6_route_iter *iter = seq->private;
+
+ rcu_read_lock_bh();
+ iter->tbl = ipv6_route_seq_next_table(NULL, net);
+ iter->skip = *pos;
+
+ if (iter->tbl) {
+ ipv6_route_seq_setup_walk(iter);
+ return ipv6_route_seq_next(seq, NULL, pos);
+ } else {
+ return NULL;
+ }
+}
+
+static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
+{
+ struct fib6_walker_t *w = &iter->w;
+ return w->node && !(w->state == FWS_U && w->node == w->root);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU_BH)
+{
+ struct ipv6_route_iter *iter = seq->private;
+
+ if (ipv6_route_iter_active(iter))
+ fib6_walker_unlink(&iter->w);
+
+ rcu_read_unlock_bh();
+}
+
+static const struct seq_operations ipv6_route_seq_ops = {
+ .start = ipv6_route_seq_start,
+ .next = ipv6_route_seq_next,
+ .stop = ipv6_route_seq_stop,
+ .show = ipv6_route_seq_show
+};
+
+int ipv6_route_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter));
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 13654686aea..4052694c6f2 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -15,23 +15,18 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/in6.h>
-#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
#include <net/rawv6.h>
-#include <net/icmp.h>
#include <net/transp_v6.h>
#include <asm/uaccess.h>
@@ -39,7 +34,7 @@
#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
in old IPv6 RFC. Well, it was reasonable value.
*/
-#define FL_MAX_LINGER 60 /* Maximal linger timeout */
+#define FL_MAX_LINGER 150 /* Maximal linger timeout */
/* FL hash table */
@@ -49,25 +44,38 @@
#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
static atomic_t fl_size = ATOMIC_INIT(0);
-static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
static void ip6_fl_gc(unsigned long dummy);
static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
/* FL hash table lock: it protects only of GC */
-static DEFINE_RWLOCK(ip6_fl_lock);
+static DEFINE_SPINLOCK(ip6_fl_lock);
/* Big socket sock */
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
+#define for_each_fl_rcu(hash, fl) \
+ for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
+ fl != NULL; \
+ fl = rcu_dereference_bh(fl->next))
+#define for_each_fl_continue_rcu(fl) \
+ for (fl = rcu_dereference_bh(fl->next); \
+ fl != NULL; \
+ fl = rcu_dereference_bh(fl->next))
+
+#define for_each_sk_fl_rcu(np, sfl) \
+ for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \
+ sfl != NULL; \
+ sfl = rcu_dereference_bh(sfl->next))
static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
{
struct ip6_flowlabel *fl;
- for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+ for_each_fl_rcu(FL_HASH(label), fl) {
if (fl->label == label && net_eq(fl->fl_net, net))
return fl;
}
@@ -78,11 +86,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
{
struct ip6_flowlabel *fl;
- read_lock_bh(&ip6_fl_lock);
+ rcu_read_lock_bh();
fl = __fl_lookup(net, label);
- if (fl)
- atomic_inc(&fl->users);
- read_unlock_bh(&ip6_fl_lock);
+ if (fl && !atomic_inc_not_zero(&fl->users))
+ fl = NULL;
+ rcu_read_unlock_bh();
return fl;
}
@@ -90,15 +98,17 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
static void fl_free(struct ip6_flowlabel *fl)
{
if (fl) {
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
release_net(fl->fl_net);
kfree(fl->opt);
+ kfree_rcu(fl, rcu);
}
- kfree(fl);
}
static void fl_release(struct ip6_flowlabel *fl)
{
- write_lock_bh(&ip6_fl_lock);
+ spin_lock_bh(&ip6_fl_lock);
fl->lastuse = jiffies;
if (atomic_dec_and_test(&fl->users)) {
@@ -115,7 +125,7 @@ static void fl_release(struct ip6_flowlabel *fl)
time_after(ip6_fl_gc_timer.expires, ttd))
mod_timer(&ip6_fl_gc_timer, ttd);
}
- write_unlock_bh(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
}
static void ip6_fl_gc(unsigned long dummy)
@@ -124,12 +134,15 @@ static void ip6_fl_gc(unsigned long dummy)
unsigned long now = jiffies;
unsigned long sched = 0;
- write_lock(&ip6_fl_lock);
+ spin_lock(&ip6_fl_lock);
for (i=0; i<=FL_HASH_MASK; i++) {
- struct ip6_flowlabel *fl, **flp;
+ struct ip6_flowlabel *fl;
+ struct ip6_flowlabel __rcu **flp;
+
flp = &fl_ht[i];
- while ((fl=*flp) != NULL) {
+ while ((fl = rcu_dereference_protected(*flp,
+ lockdep_is_held(&ip6_fl_lock))) != NULL) {
if (atomic_read(&fl->users) == 0) {
unsigned long ttd = fl->lastuse + fl->linger;
if (time_after(ttd, fl->expires))
@@ -152,18 +165,21 @@ static void ip6_fl_gc(unsigned long dummy)
if (sched) {
mod_timer(&ip6_fl_gc_timer, sched);
}
- write_unlock(&ip6_fl_lock);
+ spin_unlock(&ip6_fl_lock);
}
static void __net_exit ip6_fl_purge(struct net *net)
{
int i;
- write_lock(&ip6_fl_lock);
+ spin_lock(&ip6_fl_lock);
for (i = 0; i <= FL_HASH_MASK; i++) {
- struct ip6_flowlabel *fl, **flp;
+ struct ip6_flowlabel *fl;
+ struct ip6_flowlabel __rcu **flp;
+
flp = &fl_ht[i];
- while ((fl = *flp) != NULL) {
+ while ((fl = rcu_dereference_protected(*flp,
+ lockdep_is_held(&ip6_fl_lock))) != NULL) {
if (net_eq(fl->fl_net, net) &&
atomic_read(&fl->users) == 0) {
*flp = fl->next;
@@ -174,7 +190,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
flp = &fl->next;
}
}
- write_unlock(&ip6_fl_lock);
+ spin_unlock(&ip6_fl_lock);
}
static struct ip6_flowlabel *fl_intern(struct net *net,
@@ -184,10 +200,10 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
fl->label = label & IPV6_FLOWLABEL_MASK;
- write_lock_bh(&ip6_fl_lock);
+ spin_lock_bh(&ip6_fl_lock);
if (label == 0) {
for (;;) {
- fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
+ fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
if (fl->label) {
lfl = __fl_lookup(net, fl->label);
if (lfl == NULL)
@@ -206,16 +222,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
lfl = __fl_lookup(net, fl->label);
if (lfl != NULL) {
atomic_inc(&lfl->users);
- write_unlock_bh(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
return lfl;
}
}
fl->lastuse = jiffies;
fl->next = fl_ht[FL_HASH(fl->label)];
- fl_ht[FL_HASH(fl->label)] = fl;
+ rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
atomic_inc(&fl_size);
- write_unlock_bh(&ip6_fl_lock);
+ spin_unlock_bh(&ip6_fl_lock);
return NULL;
}
@@ -230,17 +246,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
label &= IPV6_FLOWLABEL_MASK;
- read_lock_bh(&ip6_sk_fl_lock);
- for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
struct ip6_flowlabel *fl = sfl->fl;
if (fl->label == label) {
fl->lastuse = jiffies;
atomic_inc(&fl->users);
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
return fl;
}
}
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
return NULL;
}
@@ -251,11 +267,21 @@ void fl6_free_socklist(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_fl_socklist *sfl;
- while ((sfl = np->ipv6_fl_list) != NULL) {
+ if (!rcu_access_pointer(np->ipv6_fl_list))
+ return;
+
+ spin_lock_bh(&ip6_sk_fl_lock);
+ while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
np->ipv6_fl_list = sfl->next;
+ spin_unlock_bh(&ip6_sk_fl_lock);
+
fl_release(sfl->fl);
- kfree(sfl);
+ kfree_rcu(sfl, rcu);
+
+ spin_lock_bh(&ip6_sk_fl_lock);
}
+ spin_unlock_bh(&ip6_sk_fl_lock);
}
/* Service routines */
@@ -293,6 +319,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
opt_space->opt_flen = fopt->opt_flen;
return opt_space;
}
+EXPORT_SYMBOL_GPL(fl6_merge_options);
static unsigned long check_linger(unsigned long ttl)
{
@@ -311,6 +338,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
expires = check_linger(expires);
if (!expires)
return -EPERM;
+
+ spin_lock_bh(&ip6_fl_lock);
fl->lastuse = jiffies;
if (time_before(fl->linger, linger))
fl->linger = linger;
@@ -318,12 +347,14 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
expires = fl->linger;
if (time_before(fl->expires, fl->lastuse + expires))
fl->expires = fl->lastuse + expires;
+ spin_unlock_bh(&ip6_fl_lock);
+
return 0;
}
static struct ip6_flowlabel *
-fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
- int optlen, int *err_p)
+fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
+ char __user *optval, int optlen, int *err_p)
{
struct ip6_flowlabel *fl = NULL;
int olen;
@@ -342,7 +373,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
if (olen > 0) {
struct msghdr msg;
- struct flowi flowi;
+ struct flowi6 flowi6;
int junk;
err = -ENOMEM;
@@ -358,10 +389,10 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
msg.msg_controllen = olen;
msg.msg_control = (void*)(fl->opt+1);
- flowi.oif = 0;
+ memset(&flowi6, 0, sizeof(flowi6));
- err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk,
- &junk, &junk);
+ err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
+ &junk, &junk, &junk);
if (err)
goto done;
err = -EINVAL;
@@ -385,17 +416,17 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
err = -EINVAL;
goto done;
}
- ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+ fl->dst = freq->flr_dst;
atomic_set(&fl->users, 1);
switch (fl->share) {
case IPV6_FL_S_EXCL:
case IPV6_FL_S_ANY:
break;
case IPV6_FL_S_PROCESS:
- fl->owner = current->pid;
+ fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
break;
case IPV6_FL_S_USER:
- fl->owner = current_euid();
+ fl->owner.uid = current_euid();
break;
default:
err = -EINVAL;
@@ -419,8 +450,10 @@ static int mem_check(struct sock *sk)
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
- for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl)
count++;
+ rcu_read_unlock_bh();
if (room <= 0 ||
((count >= FL_MAX_PER_SOCK ||
@@ -431,42 +464,51 @@ static int mem_check(struct sock *sk)
return 0;
}
-static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
+static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
+ struct ip6_flowlabel *fl)
{
- if (h1 == h2)
- return 0;
- if (h1 == NULL || h2 == NULL)
- return 1;
- if (h1->hdrlen != h2->hdrlen)
- return 1;
- return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
+ spin_lock_bh(&ip6_sk_fl_lock);
+ sfl->fl = fl;
+ sfl->next = np->ipv6_fl_list;
+ rcu_assign_pointer(np->ipv6_fl_list, sfl);
+ spin_unlock_bh(&ip6_sk_fl_lock);
}
-static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
+int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
+ int flags)
{
- if (o1 == o2)
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_fl_socklist *sfl;
+
+ if (flags & IPV6_FL_F_REMOTE) {
+ freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
return 0;
- if (o1 == NULL || o2 == NULL)
- return 1;
- if (o1->opt_nflen != o2->opt_nflen)
- return 1;
- if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
- return 1;
- if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
- return 1;
- if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
- return 1;
- return 0;
-}
+ }
-static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
- struct ip6_flowlabel *fl)
-{
- write_lock_bh(&ip6_sk_fl_lock);
- sfl->fl = fl;
- sfl->next = np->ipv6_fl_list;
- np->ipv6_fl_list = sfl;
- write_unlock_bh(&ip6_sk_fl_lock);
+ if (np->repflow) {
+ freq->flr_label = np->flow_label;
+ return 0;
+ }
+
+ rcu_read_lock_bh();
+
+ for_each_sk_fl_rcu(np, sfl) {
+ if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+ spin_lock_bh(&ip6_fl_lock);
+ freq->flr_label = sfl->fl->label;
+ freq->flr_dst = sfl->fl->dst;
+ freq->flr_share = sfl->fl->share;
+ freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
+ freq->flr_linger = sfl->fl->linger / HZ;
+
+ spin_unlock_bh(&ip6_fl_lock);
+ rcu_read_unlock_bh();
+ return 0;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return -ENOENT;
}
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
@@ -476,7 +518,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_flowlabel_req freq;
struct ipv6_fl_socklist *sfl1=NULL;
- struct ipv6_fl_socklist *sfl, **sflp;
+ struct ipv6_fl_socklist *sfl;
+ struct ipv6_fl_socklist __rcu **sflp;
struct ip6_flowlabel *fl, *fl1 = NULL;
@@ -488,33 +531,45 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
switch (freq.flr_action) {
case IPV6_FL_A_PUT:
- write_lock_bh(&ip6_sk_fl_lock);
- for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+ if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ if (!np->repflow)
+ return -ESRCH;
+ np->flow_label = 0;
+ np->repflow = 0;
+ return 0;
+ }
+ spin_lock_bh(&ip6_sk_fl_lock);
+ for (sflp = &np->ipv6_fl_list;
+ (sfl = rcu_dereference(*sflp))!=NULL;
+ sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = sfl->next;
- write_unlock_bh(&ip6_sk_fl_lock);
+ *sflp = rcu_dereference(sfl->next);
+ spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
- kfree(sfl);
+ kfree_rcu(sfl, rcu);
return 0;
}
}
- write_unlock_bh(&ip6_sk_fl_lock);
+ spin_unlock_bh(&ip6_sk_fl_lock);
return -ESRCH;
case IPV6_FL_A_RENEW:
- read_lock_bh(&ip6_sk_fl_lock);
- for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
if (sfl->fl->label == freq.flr_label) {
err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
return err;
}
}
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
- if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
+ if (freq.flr_share == IPV6_FL_S_NONE &&
+ ns_capable(net->user_ns, CAP_NET_ADMIN)) {
fl = fl_lookup(net, freq.flr_label);
if (fl) {
err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
@@ -525,21 +580,35 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
return -ESRCH;
case IPV6_FL_A_GET:
+ if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+ struct net *net = sock_net(sk);
+ if (net->ipv6.sysctl.flowlabel_consistency) {
+ net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
+ return -EPERM;
+ }
+
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+
+ np->repflow = 1;
+ return 0;
+ }
+
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
- fl = fl_create(net, &freq, optval, optlen, &err);
+ fl = fl_create(net, sk, &freq, optval, optlen, &err);
if (fl == NULL)
return err;
sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
if (freq.flr_label) {
err = -EEXIST;
- read_lock_bh(&ip6_sk_fl_lock);
- for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_flags&IPV6_FL_F_EXCL) {
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
goto done;
}
fl1 = sfl->fl;
@@ -547,7 +616,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
break;
}
}
- read_unlock_bh(&ip6_sk_fl_lock);
+ rcu_read_unlock_bh();
if (fl1 == NULL)
fl1 = fl_lookup(net, freq.flr_label);
@@ -559,12 +628,10 @@ recheck:
err = -EPERM;
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
- fl1->owner != fl->owner)
- goto release;
-
- err = -EINVAL;
- if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
- ipv6_opt_cmp(fl1->opt, fl->opt))
+ ((fl1->share == IPV6_FL_S_PROCESS) &&
+ (fl1->owner.pid == fl->owner.pid)) ||
+ ((fl1->share == IPV6_FL_S_USER) &&
+ uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -ENOMEM;
@@ -619,6 +686,7 @@ done:
struct ip6fl_iter_state {
struct seq_net_private p;
+ struct pid_namespace *pid_ns;
int bucket;
};
@@ -631,13 +699,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
- fl = fl_ht[state->bucket];
-
- while (fl && !net_eq(fl->fl_net, net))
- fl = fl->next;
- if (fl)
- break;
+ for_each_fl_rcu(state->bucket, fl) {
+ if (net_eq(fl->fl_net, net))
+ goto out;
+ }
}
+ fl = NULL;
+out:
return fl;
}
@@ -646,18 +714,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
struct net *net = seq_file_net(seq);
- fl = fl->next;
+ for_each_fl_continue_rcu(fl) {
+ if (net_eq(fl->fl_net, net))
+ goto out;
+ }
+
try_again:
- while (fl && !net_eq(fl->fl_net, net))
- fl = fl->next;
-
- while (!fl) {
- if (++state->bucket <= FL_HASH_MASK) {
- fl = fl_ht[state->bucket];
- goto try_again;
- } else
- break;
+ if (++state->bucket <= FL_HASH_MASK) {
+ for_each_fl_rcu(state->bucket, fl) {
+ if (net_eq(fl->fl_net, net))
+ goto out;
+ }
+ goto try_again;
}
+ fl = NULL;
+
+out:
return fl;
}
@@ -671,9 +743,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
}
static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(ip6_fl_lock)
+ __acquires(RCU)
{
- read_lock_bh(&ip6_fl_lock);
+ rcu_read_lock_bh();
return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
@@ -690,13 +762,14 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void ip6fl_seq_stop(struct seq_file *seq, void *v)
- __releases(ip6_fl_lock)
+ __releases(RCU)
{
- read_unlock_bh(&ip6_fl_lock);
+ rcu_read_unlock_bh();
}
static int ip6fl_seq_show(struct seq_file *seq, void *v)
{
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
"Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
@@ -704,9 +777,13 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
struct ip6_flowlabel *fl = v;
seq_printf(seq,
"%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
- (unsigned)ntohl(fl->label),
+ (unsigned int)ntohl(fl->label),
fl->share,
- (unsigned)fl->owner,
+ ((fl->share == IPV6_FL_S_PROCESS) ?
+ pid_nr_ns(fl->owner.pid, state->pid_ns) :
+ ((fl->share == IPV6_FL_S_USER) ?
+ from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
+ 0)),
atomic_read(&fl->users),
fl->linger/HZ,
(long)(fl->expires - jiffies)/HZ,
@@ -725,8 +802,29 @@ static const struct seq_operations ip6fl_seq_ops = {
static int ip6fl_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_net(inode, file, &ip6fl_seq_ops,
- sizeof(struct ip6fl_iter_state));
+ struct seq_file *seq;
+ struct ip6fl_iter_state *state;
+ int err;
+
+ err = seq_open_net(inode, file, &ip6fl_seq_ops,
+ sizeof(struct ip6fl_iter_state));
+
+ if (!err) {
+ seq = file->private_data;
+ state = ip6fl_seq_private(seq);
+ rcu_read_lock();
+ state->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ rcu_read_unlock();
+ }
+ return err;
+}
+
+static int ip6fl_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ put_pid_ns(state->pid_ns);
+ return seq_release_net(inode, file);
}
static const struct file_operations ip6fl_seq_fops = {
@@ -734,20 +832,20 @@ static const struct file_operations ip6fl_seq_fops = {
.open = ip6fl_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_net,
+ .release = ip6fl_seq_release,
};
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_net_fops_create(net, "ip6_flowlabel",
- S_IRUGO, &ip6fl_seq_fops))
+ if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+ &ip6fl_seq_fops))
return -ENOMEM;
return 0;
}
static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
{
- proc_net_remove(net, "ip6_flowlabel");
+ remove_proc_entry("ip6_flowlabel", net->proc_net);
}
#else
static inline int ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 00000000000..3873181ed85
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1722 @@
+/*
+ * GRE over IPv6 protocol decoder.
+ *
+ * Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/hash.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static int ip6gre_net_id __read_mostly;
+struct ip6gre_net {
+ struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+
+ struct net_device *fb_tunnel_dev;
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+static int ip6gre_tunnel_init(struct net_device *dev);
+static void ip6gre_tunnel_setup(struct net_device *dev);
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+
+/* Tunnel hash table */
+
+/*
+ 4 hash tables:
+
+ 3: (remote,local)
+ 2: (remote,*)
+ 1: (*,local)
+ 0: (*,*)
+
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ */
+
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+static u32 HASH_ADDR(const struct in6_addr *addr)
+{
+ u32 hash = ipv6_addr_hash(addr);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+#define tunnels_r_l tunnels[3]
+#define tunnels_r tunnels[2]
+#define tunnels_l tunnels[1]
+#define tunnels_wc tunnels[0]
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
+ const struct in6_addr *remote, const struct in6_addr *local,
+ __be32 key, __be16 gre_proto)
+{
+ struct net *net = dev_net(dev);
+ int link = dev->ifindex;
+ unsigned int h0 = HASH_ADDR(remote);
+ unsigned int h1 = HASH_KEY(key);
+ struct ip6_tnl *t, *cand = NULL;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ ARPHRD_ETHER : ARPHRD_IP6GRE;
+ int score, cand_score = 4;
+
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
+ if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
+ (!ipv6_addr_equal(local, &t->parms.raddr) ||
+ !ipv6_addr_is_multicast(local))) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ if (cand != NULL)
+ return cand;
+
+ dev = ign->fb_tunnel_dev;
+ if (dev->flags & IFF_UP)
+ return netdev_priv(dev);
+
+ return NULL;
+}
+
+static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
+ const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = HASH_KEY(p->i_key);
+ int prio = 0;
+
+ if (!ipv6_addr_any(local))
+ prio |= 1;
+ if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
+ prio |= 2;
+ h ^= HASH_ADDR(remote);
+ }
+
+ return &ign->tunnels[prio][h];
+}
+
+static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+ const struct ip6_tnl *t)
+{
+ return __ip6gre_bucket(ign, &t->parms);
+}
+
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = ip6gre_bucket(ign, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
+ const struct __ip6_tnl_parm *parms,
+ int type)
+{
+ const struct in6_addr *remote = &parms->raddr;
+ const struct in6_addr *local = &parms->laddr;
+ __be32 key = parms->i_key;
+ int link = parms->link;
+ struct ip6_tnl *t;
+ struct ip6_tnl __rcu **tp;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ for (tp = __ip6gre_bucket(ign, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next)
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ key == t->parms.i_key &&
+ link == t->parms.link &&
+ type == t->dev->type)
+ break;
+
+ return t;
+}
+
+static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
+ const struct __ip6_tnl_parm *parms, int create)
+{
+ struct ip6_tnl *t, *nt;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+ if (t || !create)
+ return t;
+
+ if (parms->name[0])
+ strlcpy(name, parms->name, IFNAMSIZ);
+ else
+ strcpy(name, "ip6gre%d");
+
+ dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+ if (!dev)
+ return NULL;
+
+ dev_net_set(dev, net);
+
+ nt = netdev_priv(dev);
+ nt->parms = *parms;
+ dev->rtnl_link_ops = &ip6gre_link_ops;
+
+ nt->dev = dev;
+ nt->net = dev_net(dev);
+ ip6gre_tnl_link_config(nt, 1);
+
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
+
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
+ dev_hold(dev);
+ ip6gre_tunnel_link(ign, nt);
+ return nt;
+
+failed_free:
+ free_netdev(dev);
+ return NULL;
+}
+
+static void ip6gre_tunnel_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+
+ ip6gre_tunnel_unlink(ign, t);
+ dev_put(dev);
+}
+
+
+static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+ __be16 *p = (__be16 *)(skb->data + offset);
+ int grehlen = offset + 4;
+ struct ip6_tnl *t;
+ __be16 flags;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_KEY) {
+ grehlen += 4;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ }
+ }
+
+ /* If only 8 bytes returned, keyed message will be dropped here */
+ if (!pskb_may_pull(skb, grehlen))
+ return;
+ ipv6h = (const struct ipv6hdr *)skb->data;
+ p = (__be16 *)(skb->data + offset);
+
+ t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
+ flags & GRE_KEY ?
+ *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+ p[1]);
+ if (t == NULL)
+ return;
+
+ switch (type) {
+ __u32 teli;
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 mtu;
+ case ICMPV6_DEST_UNREACH:
+ net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
+ break;
+ case ICMPV6_TIME_EXCEED:
+ if (code == ICMPV6_EXC_HOPLIMIT) {
+ net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
+ }
+ break;
+ case ICMPV6_PARAMPROB:
+ teli = 0;
+ if (code == ICMPV6_HDR_FIELD)
+ teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
+
+ if (teli && teli == info - 2) {
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+ if (tel->encap_limit == 0) {
+ net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
+ }
+ } else {
+ net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ mtu = info - offset;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
+ break;
+ }
+
+ if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+}
+
+static int ip6gre_rcv(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ u8 *h;
+ __be16 flags;
+ __sum16 csum = 0;
+ __be32 key = 0;
+ u32 seqno = 0;
+ struct ip6_tnl *tunnel;
+ int offset = 4;
+ __be16 gre_proto;
+ int err;
+
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ goto drop;
+
+ ipv6h = ipv6_hdr(skb);
+ h = skb->data;
+ flags = *(__be16 *)h;
+
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+ /* - Version must be 0.
+ - We do not support routing headers.
+ */
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ goto drop;
+
+ if (flags&GRE_CSUM) {
+ csum = skb_checksum_simple_validate(skb);
+ offset += 4;
+ }
+ if (flags&GRE_KEY) {
+ key = *(__be32 *)(h + offset);
+ offset += 4;
+ }
+ if (flags&GRE_SEQ) {
+ seqno = ntohl(*(__be32 *)(h + offset));
+ offset += 4;
+ }
+ }
+
+ gre_proto = *(__be16 *)(h + 2);
+
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, key,
+ gre_proto);
+ if (tunnel) {
+ struct pcpu_sw_netstats *tstats;
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
+ tunnel->dev->stats.rx_dropped++;
+ goto drop;
+ }
+
+ skb->protocol = gre_proto;
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
+ skb->protocol = htons(ETH_P_IP);
+ if ((*(h + offset) & 0xF0) != 0x40)
+ offset += 4;
+ }
+
+ skb->mac_header = skb->network_header;
+ __pskb_pull(skb, offset);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+
+ if (((flags&GRE_CSUM) && csum) ||
+ (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ if (tunnel->parms.i_flags&GRE_SEQ) {
+ if (!(flags&GRE_SEQ) ||
+ (tunnel->i_seqno &&
+ (s32)(seqno - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ tunnel->i_seqno = seqno + 1;
+ }
+
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+
+ ipv6h = ipv6_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
+
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+ skb_reset_network_header(skb);
+
+ err = IP6_ECN_decapsulate(ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
+ }
+ }
+
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ netif_rx(skb);
+
+ return 0;
+ }
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+struct ipv6_tel_txoption {
+ struct ipv6_txoptions ops;
+ __u8 dst_opt[8];
+};
+
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+ memset(opt, 0, sizeof(struct ipv6_tel_txoption));
+
+ opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+ opt->dst_opt[3] = 1;
+ opt->dst_opt[4] = encap_limit;
+ opt->dst_opt[5] = IPV6_TLV_PADN;
+ opt->dst_opt[6] = 1;
+
+ opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+ opt->ops.opt_nflen = 8;
+}
+
+static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
+ struct net_device *dev,
+ __u8 dsfield,
+ struct flowi6 *fl6,
+ int encap_limit,
+ __u32 *pmtu)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct net *net = tunnel->net;
+ struct net_device *tdev; /* Device to other host */
+ struct ipv6hdr *ipv6h; /* Our new IP header */
+ unsigned int max_headroom = 0; /* The extra header space needed */
+ int gre_hlen;
+ struct ipv6_tel_txoption opt;
+ int mtu;
+ struct dst_entry *dst = NULL, *ndst = NULL;
+ struct net_device_stats *stats = &tunnel->dev->stats;
+ int err = -1;
+ u8 proto;
+ struct sk_buff *new_skb;
+
+ if (dev->type == ARPHRD_ETHER)
+ IPCB(skb)->flags = 0;
+
+ if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
+ gre_hlen = 0;
+ ipv6h = (struct ipv6hdr *)skb->data;
+ fl6->daddr = ipv6h->daddr;
+ } else {
+ gre_hlen = tunnel->hlen;
+ fl6->daddr = tunnel->parms.raddr;
+ }
+
+ if (!fl6->flowi6_mark)
+ dst = ip6_tnl_dst_check(tunnel);
+
+ if (!dst) {
+ ndst = ip6_route_output(net, NULL, fl6);
+
+ if (ndst->error)
+ goto tx_err_link_failure;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
+ goto tx_err_link_failure;
+ }
+ dst = ndst;
+ }
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ tunnel->parms.name);
+ goto tx_err_dst_release;
+ }
+
+ mtu = dst_mtu(dst) - sizeof(*ipv6h);
+ if (encap_limit >= 0) {
+ max_headroom += 8;
+ mtu -= 8;
+ }
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->len > mtu) {
+ *pmtu = mtu;
+ err = -EMSGSIZE;
+ goto tx_err_dst_release;
+ }
+
+ if (tunnel->err_count > 0) {
+ if (time_before(jiffies,
+ tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
+ tunnel->err_count--;
+
+ dst_link_failure(skb);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+
+ if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+ if (!new_skb)
+ goto tx_err_dst_release;
+
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+ if (fl6->flowi6_mark) {
+ skb_dst_set(skb, dst);
+ ndst = NULL;
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+
+ proto = NEXTHDR_GRE;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ }
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ skb_push(skb, gre_hlen);
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, sizeof(*ipv6h));
+
+ /*
+ * Push down and install the IP header.
+ */
+ ipv6h = ipv6_hdr(skb);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ipv6h->hop_limit = tunnel->parms.hop_limit;
+ ipv6h->nexthdr = proto;
+ ipv6h->saddr = fl6->saddr;
+ ipv6h->daddr = fl6->daddr;
+
+ ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
+ ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : skb->protocol;
+
+ if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
+
+ if (tunnel->parms.o_flags&GRE_SEQ) {
+ ++tunnel->o_seqno;
+ *ptr = htonl(tunnel->o_seqno);
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_KEY) {
+ *ptr = tunnel->parms.o_key;
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_CSUM) {
+ *ptr = 0;
+ *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
+ skb->len - sizeof(struct ipv6hdr));
+ }
+ }
+
+ ip6tunnel_xmit(skb, dev);
+ if (ndst)
+ ip6_tnl_dst_store(tunnel, ndst);
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(ndst);
+ return err;
+}
+
+static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ const struct iphdr *iph = ip_hdr(skb);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ return -1;
+
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ if (err == -EMSGSIZE)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+
+static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
+ const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u32 mtu;
+ int err;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = skb->protocol;
+
+ err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+
+ return err;
+}
+
+static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ int ret;
+
+ if (!ip6_tnl_xmit_ctl(t))
+ goto tx_err;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ret = ip6gre_xmit_ipv4(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ret = ip6gre_xmit_ipv6(skb, dev);
+ break;
+ default:
+ ret = ip6gre_xmit_other(skb, dev);
+ break;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+ struct net_device *dev = t->dev;
+ struct __ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
+ int addend = sizeof(struct ipv6hdr) + 4;
+
+ if (dev->type != ARPHRD_ETHER) {
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+ }
+
+ /* Set up flowi template */
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
+ fl6->flowi6_oif = p->link;
+ fl6->flowlabel = 0;
+
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+ fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+ if (p->flags&IP6_TNL_F_CAP_XMIT &&
+ p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+
+ /* Precalculate GRE options length */
+ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+ if (t->parms.o_flags&GRE_CSUM)
+ addend += 4;
+ if (t->parms.o_flags&GRE_KEY)
+ addend += 4;
+ if (t->parms.o_flags&GRE_SEQ)
+ addend += 4;
+ }
+ t->hlen = addend;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+ struct rt6_info *rt = rt6_lookup(t->net,
+ &p->raddr, &p->laddr,
+ p->link, strict);
+
+ if (rt == NULL)
+ return;
+
+ if (rt->dst.dev) {
+ dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+
+ if (set_mtu) {
+ dev->mtu = rt->dst.dev->mtu - addend;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ }
+ ip6_rt_put(rt);
+ }
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t,
+ const struct __ip6_tnl_parm *p, int set_mtu)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.flags = p->flags;
+ t->parms.hop_limit = p->hop_limit;
+ t->parms.encap_limit = p->encap_limit;
+ t->parms.flowinfo = p->flowinfo;
+ t->parms.link = p->link;
+ t->parms.proto = p->proto;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ t->parms.i_flags = p->i_flags;
+ t->parms.o_flags = p->o_flags;
+ ip6_tnl_dst_reset(t);
+ ip6gre_tnl_link_config(t, set_mtu);
+ return 0;
+}
+
+static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
+ const struct ip6_tnl_parm2 *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->flags = u->flags;
+ p->hop_limit = u->hop_limit;
+ p->encap_limit = u->encap_limit;
+ p->flowinfo = u->flowinfo;
+ p->link = u->link;
+ p->i_key = u->i_key;
+ p->o_key = u->o_key;
+ p->i_flags = u->i_flags;
+ p->o_flags = u->o_flags;
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
+ const struct __ip6_tnl_parm *p)
+{
+ u->proto = IPPROTO_GRE;
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->flags = p->flags;
+ u->hop_limit = p->hop_limit;
+ u->encap_limit = p->encap_limit;
+ u->flowinfo = p->flowinfo;
+ u->link = p->link;
+ u->i_key = p->i_key;
+ u->o_key = p->o_key;
+ u->i_flags = p->i_flags;
+ u->o_flags = p->o_flags;
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
+static int ip6gre_tunnel_ioctl(struct net_device *dev,
+ struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm2 p;
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ign->fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ }
+ memset(&p, 0, sizeof(p));
+ ip6gre_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto done;
+
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
+ goto done;
+
+ if (!(p.i_flags&GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags&GRE_KEY))
+ p.o_key = 0;
+
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
+
+ if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else {
+ t = netdev_priv(dev);
+
+ ip6gre_tunnel_unlink(ign, t);
+ synchronize_net();
+ ip6gre_tnl_change(t, &p1, 1);
+ ip6gre_tunnel_link(ign, t);
+ netdev_state_change(dev);
+ }
+ }
+
+ if (t) {
+ err = 0;
+
+ memset(&p, 0, sizeof(p));
+ ip6gre_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto done;
+
+ if (dev == ign->fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == netdev_priv(ign->fb_tunnel_dev))
+ goto done;
+ dev = t->dev;
+ }
+ unregister_netdevice(dev);
+ err = 0;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+
+static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < 68 ||
+ new_mtu > 0xFFF8 - dev->hard_header_len)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ const void *daddr, const void *saddr, unsigned int len)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+ __be16 *p = (__be16 *)(ipv6h+1);
+
+ ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+ ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->nexthdr = NEXTHDR_GRE;
+ ipv6h->saddr = t->parms.laddr;
+ ipv6h->daddr = t->parms.raddr;
+
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
+
+ /*
+ * Set the source hardware address.
+ */
+
+ if (saddr)
+ memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
+ if (daddr)
+ memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
+ if (!ipv6_addr_any(&ipv6h->daddr))
+ return t->hlen;
+
+ return -t->hlen;
+}
+
+static const struct header_ops ip6gre_header_ops = {
+ .create = ip6gre_header,
+};
+
+static const struct net_device_ops ip6gre_netdev_ops = {
+ .ndo_init = ip6gre_tunnel_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6gre_tunnel_xmit,
+ .ndo_do_ioctl = ip6gre_tunnel_ioctl,
+ .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+static void ip6gre_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static void ip6gre_tunnel_setup(struct net_device *dev)
+{
+ struct ip6_tnl *t;
+
+ dev->netdev_ops = &ip6gre_netdev_ops;
+ dev->destructor = ip6gre_dev_free;
+
+ dev->type = ARPHRD_IP6GRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+ dev->flags |= IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+ int i;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
+ strcpy(tunnel->parms.name, dev->name);
+
+ memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
+
+ if (ipv6_addr_any(&tunnel->parms.raddr))
+ dev->header_ops = &ip6gre_header_ops;
+
+ dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ struct pcpu_sw_netstats *ip6gre_tunnel_stats;
+ ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i);
+ u64_stats_init(&ip6gre_tunnel_stats->syncp);
+ }
+
+
+ return 0;
+}
+
+static void ip6gre_fb_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
+ strcpy(tunnel->parms.name, dev->name);
+
+ tunnel->hlen = sizeof(struct ipv6hdr) + 4;
+
+ dev_hold(dev);
+}
+
+
+static struct inet6_protocol ip6gre_protocol __read_mostly = {
+ .handler = ip6gre_rcv,
+ .err_handler = ip6gre_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
+{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct net_device *dev, *aux;
+ int prio;
+
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6gre_link_ops ||
+ dev->rtnl_link_ops == &ip6gre_tap_ops)
+ unregister_netdevice_queue(dev, head);
+
+ for (prio = 0; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip6_tnl *t;
+
+ t = rtnl_dereference(ign->tunnels[prio][h]);
+
+ while (t != NULL) {
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev,
+ head);
+ t = rtnl_dereference(t->next);
+ }
+ }
+ }
+}
+
+static int __net_init ip6gre_init_net(struct net *net)
+{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int err;
+
+ ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+ ip6gre_tunnel_setup);
+ if (!ign->fb_tunnel_dev) {
+ err = -ENOMEM;
+ goto err_alloc_dev;
+ }
+ dev_net_set(ign->fb_tunnel_dev, net);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
+
+ ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
+ ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
+
+ err = register_netdev(ign->fb_tunnel_dev);
+ if (err)
+ goto err_reg_dev;
+
+ rcu_assign_pointer(ign->tunnels_wc[0],
+ netdev_priv(ign->fb_tunnel_dev));
+ return 0;
+
+err_reg_dev:
+ ip6gre_dev_free(ign->fb_tunnel_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit ip6gre_exit_net(struct net *net)
+{
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ ip6gre_destroy_tunnels(net, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
+static struct pernet_operations ip6gre_net_ops = {
+ .init = ip6gre_init_net,
+ .exit = ip6gre_exit_net,
+ .id = &ip6gre_net_id,
+ .size = sizeof(struct ip6gre_net),
+};
+
+static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ __be16 flags;
+
+ if (!data)
+ return 0;
+
+ flags = 0;
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (flags & (GRE_VERSION|GRE_ROUTING))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ struct in6_addr daddr;
+
+ if (tb[IFLA_ADDRESS]) {
+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+ return -EINVAL;
+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+ return -EADDRNOTAVAIL;
+ }
+
+ if (!data)
+ goto out;
+
+ if (data[IFLA_GRE_REMOTE]) {
+ nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+ if (ipv6_addr_any(&daddr))
+ return -EINVAL;
+ }
+
+out:
+ return ip6gre_tunnel_validate(tb, data);
+}
+
+
+static void ip6gre_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_GRE_LINK])
+ parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+ if (data[IFLA_GRE_IFLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+
+ if (data[IFLA_GRE_OFLAGS])
+ parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+
+ if (data[IFLA_GRE_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+ if (data[IFLA_GRE_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+ if (data[IFLA_GRE_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
+
+ if (data[IFLA_GRE_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+
+ if (data[IFLA_GRE_TTL])
+ parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
+
+ if (data[IFLA_GRE_ENCAP_LIMIT])
+ parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
+
+ if (data[IFLA_GRE_FLOWINFO])
+ parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+
+ if (data[IFLA_GRE_FLAGS])
+ parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+}
+
+static int ip6gre_tap_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
+ strcpy(tunnel->parms.name, dev->name);
+
+ ip6gre_tnl_link_config(tunnel, 1);
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static const struct net_device_ops ip6gre_tap_netdev_ops = {
+ .ndo_init = ip6gre_tap_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6gre_tunnel_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+static void ip6gre_tap_setup(struct net_device *dev)
+{
+
+ ether_setup(dev);
+
+ dev->netdev_ops = &ip6gre_tap_netdev_ops;
+ dev->destructor = ip6gre_dev_free;
+
+ dev->iflink = 0;
+ dev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct ip6_tnl *nt;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int err;
+
+ nt = netdev_priv(dev);
+ ip6gre_netlink_parms(data, &nt->parms);
+
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+
+ if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+ eth_hw_addr_random(dev);
+
+ nt->dev = dev;
+ nt->net = dev_net(dev);
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
+ err = register_netdevice(dev);
+ if (err)
+ goto out;
+
+ dev_hold(dev);
+ ip6gre_tunnel_link(ign, nt);
+
+out:
+ return err;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t, *nt = netdev_priv(dev);
+ struct net *net = nt->net;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct __ip6_tnl_parm p;
+
+ if (dev == ign->fb_tunnel_dev)
+ return -EINVAL;
+
+ ip6gre_netlink_parms(data, &p);
+
+ t = ip6gre_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else {
+ t = nt;
+
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
+ netdev_state_change(dev);
+ }
+
+ return 0;
+}
+
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
+static size_t ip6gre_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_GRE_LINK */
+ nla_total_size(4) +
+ /* IFLA_GRE_IFLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_OFLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_IKEY */
+ nla_total_size(4) +
+ /* IFLA_GRE_OKEY */
+ nla_total_size(4) +
+ /* IFLA_GRE_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_GRE_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_GRE_TTL */
+ nla_total_size(1) +
+ /* IFLA_GRE_TOS */
+ nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_LIMIT */
+ nla_total_size(1) +
+ /* IFLA_GRE_FLOWINFO */
+ nla_total_size(4) +
+ /* IFLA_GRE_FLAGS */
+ nla_total_size(4) +
+ 0;
+}
+
+static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct __ip6_tnl_parm *p = &t->parms;
+
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+ nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
+ nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
+ /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
+ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
+ nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
+ nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
+ [IFLA_GRE_LINK] = { .type = NLA_U32 },
+ [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_IKEY] = { .type = NLA_U32 },
+ [IFLA_GRE_OKEY] = { .type = NLA_U32 },
+ [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_TTL] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
+ [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
+ [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
+ .kind = "ip6gre",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6gre_tunnel_setup,
+ .validate = ip6gre_tunnel_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+};
+
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
+ .kind = "ip6gretap",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6gre_tap_setup,
+ .validate = ip6gre_tap_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+};
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+static int __init ip6gre_init(void)
+{
+ int err;
+
+ pr_info("GRE over IPv6 tunneling driver\n");
+
+ err = register_pernet_device(&ip6gre_net_ops);
+ if (err < 0)
+ return err;
+
+ err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
+ if (err < 0) {
+ pr_info("%s: can't add protocol\n", __func__);
+ goto add_proto_failed;
+ }
+
+ err = rtnl_link_register(&ip6gre_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+ err = rtnl_link_register(&ip6gre_tap_ops);
+ if (err < 0)
+ goto tap_ops_failed;
+
+out:
+ return err;
+
+tap_ops_failed:
+ rtnl_link_unregister(&ip6gre_link_ops);
+rtnl_link_failed:
+ inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+add_proto_failed:
+ unregister_pernet_device(&ip6gre_net_ops);
+ goto out;
+}
+
+static void __exit ip6gre_fini(void)
+{
+ rtnl_link_unregister(&ip6gre_tap_ops);
+ rtnl_link_unregister(&ip6gre_link_ops);
+ inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+ unregister_pernet_device(&ip6gre_net_ops);
+}
+
+module_init(ip6gre_init);
+module_exit(ip6gre_fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
+MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
new file mode 100644
index 00000000000..4578e23834f
--- /dev/null
+++ b/net/ipv6/ip6_icmp.c
@@ -0,0 +1,47 @@
+#include <linux/export.h>
+#include <linux/icmpv6.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include <net/ipv6.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static ip6_icmp_send_t __rcu *ip6_icmp_send;
+
+int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
+{
+ return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
+ 0 : -EBUSY;
+}
+EXPORT_SYMBOL(inet6_register_icmp_sender);
+
+int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
+{
+ int ret;
+
+ ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ?
+ 0 : -EINVAL;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet6_unregister_icmp_sender);
+
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+ ip6_icmp_send_t *send;
+
+ rcu_read_lock();
+ send = rcu_dereference(ip6_icmp_send);
+
+ if (!send)
+ goto out;
+ send(skb, type, code, info);
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(icmpv6_send);
+#endif
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a83e9209cec..51d54dc376f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -44,12 +44,19 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
+#include <net/inet_ecn.h>
-
-inline int ip6_rcv_finish( struct sk_buff *skb)
+int ip6_rcv_finish(struct sk_buff *skb)
{
- if (skb_dst(skb) == NULL)
+ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
+ const struct inet6_protocol *ipprot;
+
+ ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
+ if (ipprot && ipprot->early_demux)
+ ipprot->early_demux(skb);
+ }
+ if (!skb_dst(skb))
ip6_route_input(skb);
return dst_input(skb);
@@ -57,7 +64,7 @@ inline int ip6_rcv_finish( struct sk_buff *skb)
int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
- struct ipv6hdr *hdr;
+ const struct ipv6hdr *hdr;
u32 pkt_len;
struct inet6_dev *idev;
struct net *net = dev_net(skb->dev);
@@ -102,6 +109,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
+ IP6_ADD_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_NOECTPKTS +
+ (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* A packet received on an interface with a destination address
@@ -111,6 +122,35 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
ipv6_addr_loopback(&hdr->daddr))
goto err;
+ /* RFC4291 Errata ID: 3480
+ * Interface-Local scope spans only a single interface on a
+ * node and is useful only for loopback transmission of
+ * multicast. Packets with interface-local scope received
+ * from another node must be discarded.
+ */
+ if (!(skb->pkt_type == PACKET_LOOPBACK ||
+ dev->flags & IFF_LOOPBACK) &&
+ ipv6_addr_is_multicast(&hdr->daddr) &&
+ IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
+ goto err;
+
+ /* RFC4291 2.7
+ * Nodes must not originate a packet to a multicast address whose scope
+ * field contains the reserved value 0; if such a packet is received, it
+ * must be silently dropped.
+ */
+ if (ipv6_addr_is_multicast(&hdr->daddr) &&
+ IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
+ goto err;
+
+ /*
+ * RFC4291 2.7
+ * Multicast addresses must not be used as source addresses in IPv6
+ * packets or appear in any Routing header.
+ */
+ if (ipv6_addr_is_multicast(&hdr->saddr))
+ goto err;
+
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
@@ -160,12 +200,12 @@ drop:
static int ip6_input_finish(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct inet6_protocol *ipprot;
- unsigned int nhoff;
- int nexthdr, raw;
- u8 hash;
struct inet6_dev *idev;
- struct net *net = dev_net(skb_dst(skb)->dev);
+ unsigned int nhoff;
+ int nexthdr;
+ bool raw;
/*
* Parse extension headers
@@ -180,13 +220,11 @@ resubmit:
nexthdr = skb_network_header(skb)[nhoff];
raw = raw6_local_deliver(skb, nexthdr);
-
- hash = nexthdr & (MAX_INET_PROTOS - 1);
- if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+ if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) {
int ret;
if (ipprot->flags & INET6_PROTO_FINAL) {
- struct ipv6hdr *hdr;
+ const struct ipv6hdr *hdr;
/* Free reference early: we don't need it any more,
and it may hold ip_conntrack module loaded
@@ -199,7 +237,7 @@ resubmit:
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
&hdr->saddr) &&
- !ipv6_is_mld(skb, nexthdr))
+ !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
goto discard;
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
@@ -219,9 +257,11 @@ resubmit:
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
}
- } else
+ kfree_skb(skb);
+ } else {
IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
- kfree_skb(skb);
+ consume_skb(skb);
+ }
}
rcu_read_unlock();
return 0;
@@ -242,8 +282,8 @@ int ip6_input(struct sk_buff *skb)
int ip6_mc_input(struct sk_buff *skb)
{
- struct ipv6hdr *hdr;
- int deliver;
+ const struct ipv6hdr *hdr;
+ bool deliver;
IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
@@ -257,7 +297,8 @@ int ip6_mc_input(struct sk_buff *skb)
* IPv6 multicast router mode is now supported ;)
*/
if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
- !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) &&
+ !(ipv6_addr_type(&hdr->daddr) &
+ (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
/*
* Okay, we try to forward - split and duplicate
@@ -267,46 +308,31 @@ int ip6_mc_input(struct sk_buff *skb)
struct inet6_skb_parm *opt = IP6CB(skb);
/* Check for MLD */
- if (unlikely(opt->ra)) {
+ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
/* Check if this is a mld message */
- u8 *ptr = skb_network_header(skb) + opt->ra;
- struct icmp6hdr *icmp6;
u8 nexthdr = hdr->nexthdr;
+ __be16 frag_off;
int offset;
/* Check if the value of Router Alert
* is for MLD (0x0000).
*/
- if ((ptr[2] | ptr[3]) == 0) {
- deliver = 0;
+ if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
+ deliver = false;
if (!ipv6_ext_hdr(nexthdr)) {
/* BUG */
goto out;
}
offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
- &nexthdr);
+ &nexthdr, &frag_off);
if (offset < 0)
goto out;
- if (nexthdr != IPPROTO_ICMPV6)
- goto out;
-
- if (!pskb_may_pull(skb, (skb_network_header(skb) +
- offset + 1 - skb->data)))
+ if (!ipv6_is_mld(skb, nexthdr, offset))
goto out;
- icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
-
- switch (icmp6->icmp6_type) {
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- case ICMPV6_MLD2_REPORT:
- deliver = 1;
- break;
- }
- goto out;
+ deliver = true;
}
/* unknown RA - process it normally */
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
new file mode 100644
index 00000000000..65eda2a8af4
--- /dev/null
+++ b/net/ipv6/ip6_offload.c
@@ -0,0 +1,337 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/printk.h>
+
+#include <net/protocol.h>
+#include <net/ipv6.h>
+
+#include "ip6_offload.h"
+
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+{
+ const struct net_offload *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = ipv6_optlen(opth);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return proto;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ ops = rcu_dereference(inet6_offloads[
+ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
+
+ if (likely(ops && ops->callbacks.gso_send_check)) {
+ skb_reset_transport_header(skb);
+ err = ops->callbacks.gso_send_check(skb);
+ }
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int proto;
+ struct frag_hdr *fptr;
+ unsigned int unfrag_ip6hlen;
+ u8 *prevhdr;
+ int offset = 0;
+ bool encap, udpfrag;
+ int nhoff;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
+ SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_MPLS |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ skb_reset_network_header(skb);
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ encap = SKB_GSO_CB(skb)->encap_level > 0;
+ if (encap)
+ features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->callbacks.gso_segment(skb, features);
+ }
+
+ if (IS_ERR(segs))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
+ ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
+ skb->network_header = (u8 *)ipv6h - skb->head;
+
+ if (udpfrag) {
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+ fptr->frag_off = htons(offset);
+ if (skb->next != NULL)
+ fptr->frag_off |= htons(IP6_MF);
+ offset += (ntohs(ipv6h->payload_len) -
+ sizeof(struct frag_hdr));
+ }
+ if (encap)
+ skb_reset_inner_headers(skb);
+ }
+
+out:
+ return segs;
+}
+
+/* Return the total length of all the extension hdrs, following the same
+ * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs.
+ */
+static int ipv6_exthdrs_len(struct ipv6hdr *iph,
+ const struct net_offload **opps)
+{
+ struct ipv6_opt_hdr *opth = (void *)iph;
+ int len = 0, proto, optlen = sizeof(*iph);
+
+ proto = iph->nexthdr;
+ for (;;) {
+ if (proto != NEXTHDR_HOP) {
+ *opps = rcu_dereference(inet6_offloads[proto]);
+ if (unlikely(!(*opps)))
+ break;
+ if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+ opth = (void *)opth + optlen;
+ optlen = ipv6_optlen(opth);
+ len += optlen;
+ proto = opth->nexthdr;
+ }
+ return len;
+}
+
+static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ struct ipv6hdr *iph;
+ unsigned int nlen;
+ unsigned int hlen;
+ unsigned int off;
+ u16 flush = 1;
+ int proto;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*iph);
+ iph = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
+ }
+
+ skb_set_network_header(skb, off);
+ skb_gro_pull(skb, sizeof(*iph));
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
+ flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+
+ rcu_read_lock();
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive) {
+ __pskb_pull(skb, skb_gro_offset(skb));
+ proto = ipv6_gso_pull_exthdrs(skb, proto);
+ skb_gro_pull(skb, -skb_transport_offset(skb));
+ skb_reset_transport_header(skb);
+ __skb_push(skb, skb_gro_offset(skb));
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ iph = ipv6_hdr(skb);
+ }
+
+ NAPI_GRO_CB(skb)->proto = proto;
+
+ flush--;
+ nlen = skb_network_header_len(skb);
+
+ for (p = *head; p; p = p->next) {
+ const struct ipv6hdr *iph2;
+ __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ iph2 = (struct ipv6hdr *)(p->data + off);
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
+
+ /* All fields must match except length and Traffic Class.
+ * XXX skbs on the gro_list have all been parsed and pulled
+ * already so we don't need to compare nlen
+ * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops)))
+ * memcmp() alone below is suffcient, right?
+ */
+ if ((first_word & htonl(0xF00FFFFF)) ||
+ memcmp(&iph->nexthdr, &iph2->nexthdr,
+ nlen - offsetof(struct ipv6hdr, nexthdr))) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ /* flush if Traffic Class fields are different */
+ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= flush;
+ }
+
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ skb_gro_postpull_rcsum(skb, iph, nlen);
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ const struct net_offload *ops;
+ struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
+ int err = -ENOSYS;
+
+ iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
+
+ rcu_read_lock();
+
+ nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+};
+
+static const struct net_offload sit_offload = {
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ },
+};
+
+static int __init ipv6_offload_init(void)
+{
+
+ if (tcpv6_offload_init() < 0)
+ pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+ if (udp_offload_init() < 0)
+ pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+ if (ipv6_exthdrs_offload_init() < 0)
+ pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
+
+ dev_add_offload(&ipv6_packet_offload);
+
+ inet_add_offload(&sit_offload, IPPROTO_IPV6);
+
+ return 0;
+}
+
+fs_initcall(ipv6_offload_init);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
new file mode 100644
index 00000000000..2e155c651b3
--- /dev/null
+++ b/net/ipv6/ip6_offload.h
@@ -0,0 +1,18 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ip6_offload_h
+#define __ip6_offload_h
+
+int ipv6_exthdrs_offload_init(void);
+int udp_offload_init(void);
+int tcpv6_offload_init(void);
+
+#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 99157b4cd56..45702b8cd14 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,50 +56,13 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-
-int __ip6_local_out(struct sk_buff *skb)
-{
- int len;
-
- len = skb->len - sizeof(struct ipv6hdr);
- if (len > IPV6_MAXPLEN)
- len = 0;
- ipv6_hdr(skb)->payload_len = htons(len);
-
- return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- skb_dst(skb)->dev, dst_output);
-}
-
-int ip6_local_out(struct sk_buff *skb)
-{
- int err;
-
- err = __ip6_local_out(skb);
- if (likely(err == 1))
- err = dst_output(skb);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(ip6_local_out);
-
-/* dev_loopback_xmit for use with netfilter. */
-static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
-{
- skb_reset_mac_header(newskb);
- __skb_pull(newskb, skb_network_offset(newskb));
- newskb->pkt_type = PACKET_LOOPBACK;
- newskb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!skb_dst(newskb));
-
- netif_rx_ni(newskb);
- return 0;
-}
-
static int ip6_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ struct neighbour *neigh;
+ struct in6_addr *nexthop;
+ int ret;
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -120,7 +83,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
newskb, NULL, newskb->dev,
- ip6_dev_loopback_xmit);
+ dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
IP6_INC_STATS(dev_net(dev), idev,
@@ -132,37 +95,44 @@ static int ip6_finish_output2(struct sk_buff *skb)
IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
skb->len);
+
+ if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
+ IPV6_ADDR_SCOPE_NODELOCAL &&
+ !(dev->flags & IFF_LOOPBACK)) {
+ kfree_skb(skb);
+ return 0;
+ }
}
- if (dst->hh)
- return neigh_hh_output(dst->hh, skb);
- else if (dst->neighbour)
- return dst->neighbour->output(skb);
+ rcu_read_lock_bh();
+ nexthop = rt6_nexthop((struct rt6_info *)dst);
+ neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ if (!IS_ERR(neigh)) {
+ ret = dst_neigh_output(dst, neigh, skb);
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
- IP6_INC_STATS_BH(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
-static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
-
- return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
- skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
-}
-
static int ip6_finish_output(struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)))
+ dst_allfrag(skb_dst(skb)) ||
+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(skb, ip6_finish_output2);
else
return ip6_finish_output2(skb);
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -182,18 +152,17 @@ int ip6_output(struct sk_buff *skb)
* xmit an sk_buff (used by TCP, SCTP and DCCP)
*/
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
- struct ipv6_txoptions *opt)
+int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+ struct ipv6_txoptions *opt, int tclass)
{
struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_addr *first_hop = &fl->fl6_dst;
+ struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr;
- u8 proto = fl->proto;
+ u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
int hlimit = -1;
- int tclass = 0;
u32 mtu;
if (opt) {
@@ -214,7 +183,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
kfree_skb(skb);
return -ENOBUFS;
}
- kfree_skb(skb);
+ consume_skb(skb);
skb = skb2;
skb_set_owner_w(skb, sk);
}
@@ -231,37 +200,34 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
/*
* Fill in the IPv6 header
*/
- if (np) {
- tclass = np->tclass;
+ if (np)
hlimit = np->hop_limit;
- }
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
+ ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
hdr->hop_limit = hlimit;
- ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
- ipv6_addr_copy(&hdr->daddr, first_hop);
+ hdr->saddr = fl6->saddr;
+ hdr->daddr = *first_hop;
+ skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
mtu = dst_mtu(dst);
- if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+ if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
dst->dev, dst_output);
}
- if (net_ratelimit())
- printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
skb->dev = dst->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
@@ -269,42 +235,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
EXPORT_SYMBOL(ip6_xmit);
-/*
- * To avoid extra problems ND packets are send through this
- * routine. It's code duplication but I really want to avoid
- * extra checks since ipv6_build_header is used by TCP (which
- * is for us performance critical)
- */
-
-int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- int proto, int len)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6hdr *hdr;
- int totlen;
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->dev = dev;
-
- totlen = len + sizeof(struct ipv6hdr);
-
- skb_reset_network_header(skb);
- skb_put(skb, sizeof(struct ipv6hdr));
- hdr = ipv6_hdr(skb);
-
- *(__be32*)hdr = htonl(0x60000000);
-
- hdr->payload_len = htons(len);
- hdr->nexthdr = proto;
- hdr->hop_limit = np->hop_limit;
-
- ipv6_addr_copy(&hdr->saddr, saddr);
- ipv6_addr_copy(&hdr->daddr, daddr);
-
- return 0;
-}
-
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
{
struct ip6_ra_chain *ra;
@@ -338,10 +268,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
{
struct ipv6hdr *hdr = ipv6_hdr(skb);
u8 nexthdr = hdr->nexthdr;
+ __be16 frag_off;
int offset;
if (ipv6_ext_hdr(nexthdr)) {
- offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
if (offset < 0)
return 0;
} else
@@ -390,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
return dst_output(skb);
}
+static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+{
+ unsigned int mtu;
+ struct inet6_dev *idev;
+
+ if (dst_metric_locked(dst, RTAX_MTU)) {
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ return mtu;
+ }
+
+ mtu = IPV6_MIN_MTU;
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+ return mtu;
+}
+
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
+ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+ return true;
+
+ if (skb->ignore_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -401,11 +371,15 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -424,9 +398,8 @@ int ip6_forward(struct sk_buff *skb)
* cannot be fragmented, because there is no warranty
* that different fragments will go along one path. --ANK
*/
- if (opt->ra) {
- u8 *ptr = skb_network_header(skb) + opt->ra;
- if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
+ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
+ if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
return 0;
}
@@ -437,8 +410,8 @@ int ip6_forward(struct sk_buff *skb)
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
@@ -451,14 +424,15 @@ int ip6_forward(struct sk_buff *skb)
if (proxied > 0)
return ip6_input(skb);
else if (proxied < 0) {
- IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
dst = skb_dst(skb);
@@ -467,11 +441,10 @@ int ip6_forward(struct sk_buff *skb)
send redirects to source routed frames.
We don't send redirects to frames decapsulated from IPsec.
*/
- if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
- !skb_sec_path(skb)) {
+ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
+ struct inet_peer *peer;
struct rt6_info *rt;
- struct neighbour *n = dst->neighbour;
/*
* incoming and outgoing devices are the same
@@ -479,16 +452,20 @@ int ip6_forward(struct sk_buff *skb)
*/
rt = (struct rt6_info *) dst;
- if ((rt->rt6i_flags & RTF_GATEWAY))
- target = (struct in6_addr*)&n->primary_key;
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ target = &rt->rt6i_gateway;
else
target = &hdr->daddr;
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
- if (xrlim_allow(dst, 1*HZ))
- ndisc_send_redirect(skb, n, target);
+ if (inet_peer_xrlim_allow(peer, 1*HZ))
+ ndisc_send_redirect(skb, target);
+ if (peer)
+ inet_putpeer(peer);
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -503,24 +480,25 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = dst_mtu(dst);
+ mtu = ip6_dst_mtu_forward(dst);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (skb->len > mtu && !skb_is_gso(skb)) {
+ if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INTOOBIGERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
if (skb_cow(skb, dst->dev->hard_header_len)) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_OUTDISCARDS);
goto drop;
}
@@ -531,6 +509,7 @@ int ip6_forward(struct sk_buff *skb)
hdr->hop_limit--;
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
ip6_forward_finish);
@@ -555,53 +534,24 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- to->nf_trace = from->nf_trace;
-#endif
skb_copy_secmark(to, from);
}
-int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- unsigned int packet_len = skb->tail - skb->network_header;
- int found_rhdr = 0;
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
+ static u32 ip6_idents_hashrnd __read_mostly;
+ u32 hash, id;
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
-
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
- break;
-#endif
- if (found_rhdr)
- return offset;
- break;
- default :
- return offset;
- }
+ net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
- offset);
- }
+ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
- return offset;
+ id = ip_idents_reserve(hash, 1);
+ fhdr->identification = htonl(id);
}
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
@@ -609,6 +559,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
+ int hroom, troom;
__be32 frag_id = 0;
int ptr, offset = 0, err=0;
u8 *prevhdr, nexthdr = 0;
@@ -622,7 +573,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (!skb->local_df && skb->len > mtu) {
+ if (unlikely(!skb->ignore_df && skb->len > mtu) ||
+ (IP6CB(skb)->frag_max_size &&
+ IP6CB(skb)->frag_max_size > mtu)) {
+ if (skb->sk && dst_allfrag(skb_dst(skb)))
+ sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+
skb->dev = skb_dst(skb)->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
@@ -685,7 +641,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
- ipv6_select_ident(fh);
+ ipv6_select_ident(fh, rt);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
@@ -741,7 +697,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return 0;
}
@@ -753,7 +709,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return err;
slow_path_clean:
@@ -767,6 +723,10 @@ slow_path_clean:
}
slow_path:
+ if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
+ skb_checksum_help(skb))
+ goto fail;
+
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
@@ -775,6 +735,8 @@ slow_path:
*/
*prevhdr = NEXTHDR_FRAGMENT;
+ hroom = LL_RESERVED_SPACE(rt->dst.dev);
+ troom = rt->dst.dev->needed_tailroom;
/*
* Keep copying data until we run out.
@@ -784,7 +746,7 @@ slow_path:
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
len = mtu;
- /* IF: we are not sending upto and including the packet end
+ /* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < left) {
len &= ~7;
@@ -793,7 +755,8 @@ slow_path:
* Allocate buffer.
*/
- if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
+ if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+ hroom + troom, GFP_ATOMIC)) == NULL) {
NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
@@ -806,7 +769,7 @@ slow_path:
*/
ip6_copy_metadata(frag, skb);
- skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(frag, hroom);
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
skb_reset_network_header(frag);
fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -831,7 +794,7 @@ slow_path:
fh->nexthdr = nexthdr;
fh->reserved = 0;
if (!frag_id) {
- ipv6_select_ident(fh);
+ ipv6_select_ident(fh, rt);
frag_id = fh->identification;
} else
fh->identification = frag_id;
@@ -864,7 +827,7 @@ slow_path:
}
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGOKS);
- kfree_skb(skb);
+ consume_skb(skb);
return err;
fail:
@@ -874,9 +837,9 @@ fail:
return err;
}
-static inline int ip6_rt_check(struct rt6key *rt_key,
- struct in6_addr *fl_addr,
- struct in6_addr *addr_cache)
+static inline int ip6_rt_check(const struct rt6key *rt_key,
+ const struct in6_addr *fl_addr,
+ const struct in6_addr *addr_cache)
{
return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
@@ -884,14 +847,20 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
struct dst_entry *dst,
- struct flowi *fl)
+ const struct flowi6 *fl6)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt;
if (!dst)
goto out;
+ if (dst->ops->family != AF_INET6) {
+ dst_release(dst);
+ return NULL;
+ }
+
+ rt = (struct rt6_info *)dst;
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
@@ -909,11 +878,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
* sockets.
* 2. oif also should be the same.
*/
- if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
+ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
#ifdef CONFIG_IPV6_SUBTREES
- ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
+ ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (fl->oif && fl->oif != dst->dev->ifindex)) {
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
dst_release(dst);
dst = NULL;
}
@@ -923,22 +892,26 @@ out:
}
static int ip6_dst_lookup_tail(struct sock *sk,
- struct dst_entry **dst, struct flowi *fl)
+ struct dst_entry **dst, struct flowi6 *fl6)
{
- int err;
struct net *net = sock_net(sk);
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ struct neighbour *n;
+ struct rt6_info *rt;
+#endif
+ int err;
if (*dst == NULL)
- *dst = ip6_route_output(net, sk, fl);
+ *dst = ip6_route_output(net, sk, fl6);
if ((err = (*dst)->error))
goto out_err_release;
- if (ipv6_addr_any(&fl->fl6_src)) {
- err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
- &fl->fl6_dst,
- sk ? inet6_sk(sk)->srcprefs : 0,
- &fl->fl6_src);
+ if (ipv6_addr_any(&fl6->saddr)) {
+ struct rt6_info *rt = (struct rt6_info *) *dst;
+ err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+ sk ? inet6_sk(sk)->srcprefs : 0,
+ &fl6->saddr);
if (err)
goto out_err_release;
}
@@ -952,12 +925,18 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
- if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
+ rt = (struct rt6_info *) *dst;
+ rcu_read_lock_bh();
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
+ err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
+ rcu_read_unlock_bh();
+
+ if (err) {
struct inet6_ifaddr *ifp;
- struct flowi fl_gw;
+ struct flowi6 fl_gw6;
int redirect;
- ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
+ ifp = ipv6_get_ifaddr(net, &fl6->saddr,
(*dst)->dev, 1);
redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
@@ -970,9 +949,9 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* default router instead
*/
dst_release(*dst);
- memcpy(&fl_gw, fl, sizeof(struct flowi));
- memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
- *dst = ip6_route_output(net, sk, &fl_gw);
+ memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
+ memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
+ *dst = ip6_route_output(net, sk, &fl_gw6);
if ((err = (*dst)->error))
goto out_err_release;
}
@@ -983,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
out_err_release:
if (err == -ENETUNREACH)
- IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
dst_release(*dst);
*dst = NULL;
return err;
@@ -993,52 +972,88 @@ out_err_release:
* ip6_dst_lookup - perform route lookup on flow
* @sk: socket which provides route info
* @dst: pointer to dst_entry * for result
- * @fl: flow to lookup
+ * @fl6: flow to lookup
*
* This function performs a route lookup on the given flow.
*
* It returns zero on success, or a standard errno code on error.
*/
-int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
{
*dst = NULL;
- return ip6_dst_lookup_tail(sk, dst, fl);
+ return ip6_dst_lookup_tail(sk, dst, fl6);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
/**
- * ip6_sk_dst_lookup - perform socket cached route lookup on flow
+ * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
+ * @sk: socket which provides route info
+ * @fl6: flow to lookup
+ * @final_dst: final destination address for ipsec lookup
+ *
+ * This function performs a route lookup on the given flow.
+ *
+ * It returns a valid dst pointer on success, or a pointer encoded
+ * error code.
+ */
+struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+ const struct in6_addr *final_dst)
+{
+ struct dst_entry *dst = NULL;
+ int err;
+
+ err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ if (err)
+ return ERR_PTR(err);
+ if (final_dst)
+ fl6->daddr = *final_dst;
+
+ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
+
+/**
+ * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
* @sk: socket which provides the dst cache and route info
- * @dst: pointer to dst_entry * for result
- * @fl: flow to lookup
+ * @fl6: flow to lookup
+ * @final_dst: final destination address for ipsec lookup
*
* This function performs a route lookup on the given flow with the
* possibility of using the cached route in the socket if it is valid.
* It will take the socket dst lock when operating on the dst cache.
* As a result, this function can only be used in process context.
*
- * It returns zero on success, or a standard errno code on error.
+ * It returns a valid dst pointer on success, or a pointer encoded
+ * error code.
*/
-int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+ const struct in6_addr *final_dst)
{
- *dst = NULL;
- if (sk) {
- *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
- *dst = ip6_sk_dst_check(sk, *dst, fl);
- }
+ struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
+ int err;
+
+ dst = ip6_sk_dst_check(sk, dst, fl6);
+
+ err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ if (err)
+ return ERR_PTR(err);
+ if (final_dst)
+ fl6->daddr = *final_dst;
- return ip6_dst_lookup_tail(sk, dst, fl);
+ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
-EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
+EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
static inline int ip6_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen,
- int transhdrlen, int mtu,unsigned int flags)
+ int transhdrlen, int mtu,unsigned int flags,
+ struct rt6_info *rt)
{
struct sk_buff *skb;
+ struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1050,7 +1065,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
if (skb == NULL)
- return -ENOMEM;
+ return err;
/* reserve space for Hardware header */
skb_reserve(skb, hh_len);
@@ -1064,34 +1079,27 @@ static inline int ip6_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->transport_header = skb->network_header + fragheaderlen;
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->protocol = htons(ETH_P_IPV6);
skb->csum = 0;
- sk->sk_sndmsg_off = 0;
- }
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- struct frag_hdr fhdr;
-
- /* Specify the length of each IPv6 datagram fragment.
- * It has to be a multiple of 8.
- */
- skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
- sizeof(struct frag_hdr)) & ~7;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(&fhdr);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
+ } else if (skb_is_gso(skb)) {
+ goto append;
}
- /* There is not enough support do UPD LSO,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ /* Specify the length of each IPv6 datagram fragment.
+ * It has to be a multiple of 8.
+ */
+ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+ sizeof(struct frag_hdr)) & ~7;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ ipv6_select_ident(&fhdr, rt);
+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+
+append:
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1106,26 +1114,52 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}
+static void ip6_append_data_mtu(unsigned int *mtu,
+ int *maxfraglen,
+ unsigned int fragheaderlen,
+ struct sk_buff *skb,
+ struct rt6_info *rt,
+ unsigned int orig_mtu)
+{
+ if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
+ if (skb == NULL) {
+ /* first fragment, reserve header_len */
+ *mtu = orig_mtu - rt->dst.header_len;
+
+ } else {
+ /*
+ * this fragment is not first, the headers
+ * space is regarded as data space.
+ */
+ *mtu = orig_mtu;
+ }
+ *maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ + fragheaderlen - sizeof(struct frag_hdr);
+ }
+}
+
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
int offset, int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
+ int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags, int dontfrag)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sk_buff *skb;
- unsigned int maxfraglen, fragheaderlen;
+ struct inet_cork *cork;
+ struct sk_buff *skb, *skb_prev = NULL;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
int exthdrlen;
+ int dst_exthdrlen;
int hh_len;
- int mtu;
int copy;
int err;
int offset = 0;
- int csummode = CHECKSUM_NONE;
+ __u8 tx_flags = 0;
if (flags&MSG_PROBE)
return 0;
+ cork = &inet->cork.base;
if (skb_queue_empty(&sk->sk_write_queue)) {
/*
* setup for corking
@@ -1134,7 +1168,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (WARN_ON(np->cork.opt))
return -EINVAL;
- np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
+ np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
if (unlikely(np->cork.opt == NULL))
return -ENOBUFS;
@@ -1165,48 +1199,82 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
/* need source address above miyazawa*/
}
dst_hold(&rt->dst);
- inet->cork.dst = &rt->dst;
- inet->cork.fl = *fl;
+ cork->dst = &rt->dst;
+ inet->cork.fl.u.ip6 = *fl6;
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
- mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ if (rt->dst.flags & DST_XFRM_TUNNEL)
+ mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ rt->dst.dev->mtu : dst_mtu(&rt->dst);
+ else
+ mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ rt->dst.dev->mtu : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
- inet->cork.fragsize = mtu;
+ cork->fragsize = mtu;
if (dst_allfrag(rt->dst.path))
- inet->cork.flags |= IPCORK_ALLFRAG;
- inet->cork.length = 0;
- sk->sk_sndmsg_page = NULL;
- sk->sk_sndmsg_off = 0;
- exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
- rt->rt6i_nfheader_len;
+ cork->flags |= IPCORK_ALLFRAG;
+ cork->length = 0;
+ exthdrlen = (opt ? opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
+ dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
} else {
- rt = (struct rt6_info *)inet->cork.dst;
- fl = &inet->cork.fl;
+ rt = (struct rt6_info *)cork->dst;
+ fl6 = &inet->cork.fl.u.ip6;
opt = np->cork.opt;
transhdrlen = 0;
exthdrlen = 0;
- mtu = inet->cork.fragsize;
+ dst_exthdrlen = 0;
+ mtu = cork->fragsize;
}
+ orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
- if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
- ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+ unsigned int maxnonfragsize, headersize;
+
+ headersize = sizeof(struct ipv6hdr) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+ (dst_allfrag(&rt->dst) ?
+ sizeof(struct frag_hdr) : 0) +
+ rt->rt6i_nfheader_len;
+
+ if (ip6_sk_ignore_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
+
+ /* dontfrag active */
+ if ((cork->length + length > mtu - headersize) && dontfrag &&
+ (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
+ sizeof(struct ipv6hdr));
+ goto emsgsize;
+ }
+
+ if (cork->length + length > maxnonfragsize - headersize) {
+emsgsize:
+ ipv6_local_error(sk, EMSGSIZE, fl6,
+ mtu - headersize +
+ sizeof(struct ipv6hdr));
return -EMSGSIZE;
}
}
+ /* For UDP, check if TX timestamp is enabled */
+ if (sk->sk_type == SOCK_DGRAM)
+ sock_tx_timestamp(sk, &tx_flags);
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1223,32 +1291,26 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji
*/
- inet->cork.length += length;
- if (length > mtu) {
- int proto = sk->sk_protocol;
- if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
- ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
- return -EMSGSIZE;
- }
-
- if (proto == IPPROTO_UDP &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
-
- err = ip6_ufo_append_data(sk, getfrag, from, length,
- hh_len, fragheaderlen,
- transhdrlen, mtu, flags);
- if (err)
- goto error;
- return 0;
- }
+ skb = skb_peek_tail(&sk->sk_write_queue);
+ cork->length += length;
+ if (((length > mtu) ||
+ (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ if (!skb)
goto alloc_new_skb;
while (length > 0) {
/* Check if the remaining data fits into current packet. */
- copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+ copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
if (copy < length)
copy = maxfraglen - skb->len;
@@ -1258,38 +1320,46 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- struct sk_buff *skb_prev;
alloc_new_skb:
- skb_prev = skb;
-
/* There's no room in the current skb */
- if (skb_prev)
- fraggap = skb_prev->len - maxfraglen;
+ if (skb)
+ fraggap = skb->len - maxfraglen;
else
fraggap = 0;
+ /* update mtu and maxfraglen if necessary */
+ if (skb == NULL || skb_prev == NULL)
+ ip6_append_data_mtu(&mtu, &maxfraglen,
+ fragheaderlen, skb, rt,
+ orig_mtu);
+
+ skb_prev = skb;
/*
* If remaining data exceeds the mtu,
* we know we need more fragment(s).
*/
datalen = length + fraggap;
- if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
- datalen = maxfraglen - fragheaderlen;
- fraglen = datalen + fragheaderlen;
+ if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+ datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
else
alloclen = datalen + fragheaderlen;
- /*
- * The last fragment gets additional space at tail.
- * Note: we overallocate on fragments with MSG_MODE
- * because we have no idea if we're the last one.
- */
- if (datalen == length + fraggap)
- alloclen += rt->dst.trailer_len;
+ alloclen += dst_exthdrlen;
+
+ if (datalen != length + fraggap) {
+ /*
+ * this is not the last fragment, the trailer
+ * space is regarded as data space.
+ */
+ datalen += rt->dst.trailer_len;
+ }
+
+ alloclen += rt->dst.trailer_len;
+ fraglen = datalen + fragheaderlen;
/*
* We just reserve space for fragment header.
@@ -1311,16 +1381,27 @@ alloc_new_skb:
sk->sk_allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
+ else {
+ /* Only the initial fragment
+ * is time stamped.
+ */
+ tx_flags = 0;
+ }
}
if (skb == NULL)
goto error;
/*
* Fill in the control structures
*/
- skb->ip_summed = csummode;
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->ip_summed = CHECKSUM_NONE;
skb->csum = 0;
- /* reserve for fragmentation */
- skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+ /* reserve for fragmentation and ipsec header */
+ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
+ dst_exthdrlen);
+
+ if (sk->sk_type == SOCK_DGRAM)
+ skb_shinfo(skb)->tx_flags = tx_flags;
/*
* Find where to start putting bytes
@@ -1340,6 +1421,7 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
copy = datalen - transhdrlen - fraggap;
+
if (copy < 0) {
err = -EINVAL;
kfree_skb(skb);
@@ -1354,7 +1436,7 @@ alloc_new_skb:
length -= datalen - fraggap;
transhdrlen = 0;
exthdrlen = 0;
- csummode = CHECKSUM_NONE;
+ dst_exthdrlen = 0;
/*
* Put the packet on the pending queue
@@ -1378,46 +1460,31 @@ alloc_new_skb:
}
} else {
int i = skb_shinfo(skb)->nr_frags;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- struct page *page = sk->sk_sndmsg_page;
- int off = sk->sk_sndmsg_off;
- unsigned int left;
-
- if (page && (left = PAGE_SIZE - off) > 0) {
- if (copy >= left)
- copy = left;
- if (page != frag->page) {
- if (i == MAX_SKB_FRAGS) {
- err = -EMSGSIZE;
- goto error;
- }
- get_page(page);
- skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
- frag = &skb_shinfo(skb)->frags[i];
- }
- } else if(i < MAX_SKB_FRAGS) {
- if (copy > PAGE_SIZE)
- copy = PAGE_SIZE;
- page = alloc_pages(sk->sk_allocation, 0);
- if (page == NULL) {
- err = -ENOMEM;
- goto error;
- }
- sk->sk_sndmsg_page = page;
- sk->sk_sndmsg_off = 0;
+ struct page_frag *pfrag = sk_page_frag(sk);
- skb_fill_page_desc(skb, i, page, 0, 0);
- frag = &skb_shinfo(skb)->frags[i];
- } else {
- err = -EMSGSIZE;
- goto error;
- }
- if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
- err = -EFAULT;
+ err = -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag))
goto error;
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+ err = -EMSGSIZE;
+ if (i == MAX_SKB_FRAGS)
+ goto error;
+
+ __skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, 0);
+ skb_shinfo(skb)->nr_frags = ++i;
+ get_page(pfrag->page);
}
- sk->sk_sndmsg_off += copy;
- frag->size += copy;
+ copy = min_t(int, copy, pfrag->size - pfrag->offset);
+ if (getfrag(from,
+ page_address(pfrag->page) + pfrag->offset,
+ offset, copy, skb->len, skb) < 0)
+ goto error_efault;
+
+ pfrag->offset += copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
@@ -1426,12 +1493,17 @@ alloc_new_skb:
offset += copy;
length -= copy;
}
+
return 0;
+
+error_efault:
+ err = -EFAULT;
error:
- inet->cork.length -= length;
+ cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
return err;
}
+EXPORT_SYMBOL_GPL(ip6_append_data);
static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
{
@@ -1444,10 +1516,10 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
np->cork.opt = NULL;
}
- if (inet->cork.dst) {
- dst_release(inet->cork.dst);
- inet->cork.dst = NULL;
- inet->cork.flags &= ~IPCORK_ALLFRAG;
+ if (inet->cork.base.dst) {
+ dst_release(inet->cork.base.dst);
+ inet->cork.base.dst = NULL;
+ inet->cork.base.flags &= ~IPCORK_ALLFRAG;
}
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
}
@@ -1462,9 +1534,9 @@ int ip6_push_pending_frames(struct sock *sk)
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = np->cork.opt;
- struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
- struct flowi *fl = &inet->cork.fl;
- unsigned char proto = fl->proto;
+ struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
+ struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+ unsigned char proto = fl6->flowi6_proto;
int err = 0;
if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
@@ -1486,10 +1558,9 @@ int ip6_push_pending_frames(struct sock *sk)
}
/* Allow local fragmentation. */
- if (np->pmtudisc < IPV6_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->ignore_df = ip6_sk_ignore_df(sk);
- ipv6_addr_copy(final_dst, &fl->fl6_dst);
+ *final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
@@ -1500,13 +1571,11 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- *(__be32*)hdr = fl->fl6_flowlabel |
- htonl(0x60000000 | ((int)np->cork.tclass << 20));
-
+ ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
- ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
- ipv6_addr_copy(&hdr->daddr, final_dst);
+ hdr->saddr = fl6->saddr;
+ hdr->daddr = *final_dst;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -1516,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk)
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
err = ip6_local_out(skb);
@@ -1535,6 +1604,7 @@ error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
goto out;
}
+EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
void ip6_flush_pending_frames(struct sock *sk)
{
@@ -1549,3 +1619,4 @@ void ip6_flush_pending_frames(struct sock *sk)
ip6_cork_release(inet_sk(sk), inet6_sk(sk));
}
+EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b1155554bb1..afa08245836 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -18,6 +18,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/errno.h>
@@ -27,7 +29,6 @@
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
-#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
@@ -38,12 +39,15 @@
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/etherdevice.h>
#include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <net/icmp.h>
#include <net/ip.h>
+#include <net/ip_tunnels.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
@@ -57,24 +61,32 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
+MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
-#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
+#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__)
#else
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif
-#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
-#define IPV6_TCLASS_SHIFT 20
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-#define HASH_SIZE 32
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+ u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
-#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
- (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
- (HASH_SIZE - 1))
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops ip6_link_ops __read_mostly;
static int ip6_tnl_net_id __read_mostly;
struct ip6_tnl_net {
@@ -86,26 +98,28 @@ struct ip6_tnl_net {
struct ip6_tnl __rcu **tnls[2];
};
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- unsigned long rx_packets;
- unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_bytes;
-};
-
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
- struct pcpu_tstats sum = { 0 };
+ struct pcpu_sw_netstats tmp, sum = { 0 };
int i;
for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-
- sum.rx_packets += tstats->rx_packets;
- sum.rx_bytes += tstats->rx_bytes;
- sum.tx_packets += tstats->tx_packets;
- sum.tx_bytes += tstats->tx_bytes;
+ unsigned int start;
+ const struct pcpu_sw_netstats *tstats =
+ per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
+ tmp.rx_packets = tstats->rx_packets;
+ tmp.rx_bytes = tstats->rx_bytes;
+ tmp.tx_packets = tstats->tx_packets;
+ tmp.tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+ sum.rx_packets += tmp.rx_packets;
+ sum.rx_bytes += tmp.rx_bytes;
+ sum.tx_packets += tmp.tx_packets;
+ sum.tx_bytes += tmp.tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
@@ -118,7 +132,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
* Locking : hash tables are protected by RCU and RTNL
*/
-static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
struct dst_entry *dst = t->dst_cache;
@@ -131,20 +145,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
return dst;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
-static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
dst_release(t->dst_cache);
t->dst_cache = NULL;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
-static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *) dst;
t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
dst_release(t->dst_cache);
t->dst_cache = dst;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -161,14 +178,13 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
{
- unsigned int h0 = HASH(remote);
- unsigned int h1 = HASH(local);
+ unsigned int hash = HASH(remote, local);
struct ip6_tnl *t;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr) &&
(t->dev->flags & IFF_UP))
@@ -193,16 +209,16 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
**/
static struct ip6_tnl __rcu **
-ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
{
- struct in6_addr *remote = &p->raddr;
- struct in6_addr *local = &p->laddr;
- unsigned h = 0;
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = 0;
int prio = 0;
if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
prio = 1;
- h = HASH(remote) ^ HASH(local);
+ h = HASH(remote, local);
}
return &ip6n->tnls[prio][h];
}
@@ -248,8 +264,35 @@ static void ip6_dev_free(struct net_device *dev)
free_netdev(dev);
}
+static int ip6_tnl_create2(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err;
+
+ t = netdev_priv(dev);
+ err = ip6_tnl_dev_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &ip6_link_ops;
+
+ dev_hold(dev);
+ ip6_tnl_link(ip6n, t);
+ return 0;
+
+out:
+ return err;
+}
+
/**
- * ip6_tnl_create() - create a new tunnel
+ * ip6_tnl_create - create a new tunnel
* @p: tunnel parameters
* @pt: pointer to new tunnel
*
@@ -260,13 +303,12 @@ static void ip6_dev_free(struct net_device *dev)
* created tunnel or NULL
**/
-static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
{
struct net_device *dev;
struct ip6_tnl *t;
char name[IFNAMSIZ];
int err;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (p->name[0])
strlcpy(name, p->name, IFNAMSIZ);
@@ -279,22 +321,13 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
dev_net_set(dev, net);
- if (strchr(name, '%')) {
- if (dev_alloc_name(dev, name) < 0)
- goto failed_free;
- }
-
t = netdev_priv(dev);
t->parms = *p;
- err = ip6_tnl_dev_init(dev);
+ t->net = dev_net(dev);
+ err = ip6_tnl_create2(dev);
if (err < 0)
goto failed_free;
- if ((err = register_netdevice(dev)) < 0)
- goto failed_free;
-
- dev_hold(dev);
- ip6_tnl_link(ip6n, t);
return t;
failed_free:
@@ -318,10 +351,10 @@ failed:
**/
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
- struct ip6_tnl_parm *p, int create)
+ struct __ip6_tnl_parm *p, int create)
{
- struct in6_addr *remote = &p->raddr;
- struct in6_addr *local = &p->laddr;
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
struct ip6_tnl __rcu **tp;
struct ip6_tnl *t;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -350,11 +383,11 @@ static void
ip6_tnl_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev)
- rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
ip6_tnl_unlink(ip6n, t);
ip6_tnl_dst_reset(t);
@@ -370,10 +403,9 @@ ip6_tnl_dev_uninit(struct net_device *dev)
* else index to encapsulation limit
**/
-static __u16
-parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
{
- struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
__u8 nexthdr = ipv6h->nexthdr;
__u16 off = sizeof (*ipv6h);
@@ -421,6 +453,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
}
return 0;
}
+EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
/**
* ip6_tnl_err - tunnel error handler
@@ -434,7 +467,7 @@ static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
{
- struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
struct ip6_tnl *t;
int rel_msg = 0;
u8 rel_type = ICMPV6_DEST_UNREACH;
@@ -462,41 +495,32 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
case ICMPV6_DEST_UNREACH:
- if (net_ratelimit())
- printk(KERN_WARNING
- "%s: Path to destination invalid "
- "or inactive!\n", t->parms.name);
+ net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
rel_msg = 1;
break;
case ICMPV6_TIME_EXCEED:
if ((*code) == ICMPV6_EXC_HOPLIMIT) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "%s: Too small hop limit or "
- "routing loop in tunnel!\n",
- t->parms.name);
+ net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
rel_msg = 1;
}
break;
case ICMPV6_PARAMPROB:
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
- teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+ teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
if (teli && teli == *info - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "%s: Too small encapsulation "
- "limit or routing loop in "
- "tunnel!\n", t->parms.name);
+ net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
rel_msg = 1;
}
- } else if (net_ratelimit()) {
- printk(KERN_WARNING
- "%s: Recipient unable to parse tunneled "
- "packet!\n ", t->parms.name);
+ } else {
+ net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
@@ -534,9 +558,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u32 rel_info = ntohl(info);
int err;
struct sk_buff *skb2;
- struct iphdr *eiph;
- struct flowi fl;
+ const struct iphdr *eiph;
struct rtable *rt;
+ struct flowi4 fl4;
err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
@@ -559,6 +583,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
+ case NDISC_REDIRECT:
+ rel_type = ICMP_REDIRECT;
+ rel_code = ICMP_REDIR_HOST;
default:
return 0;
}
@@ -577,11 +604,11 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
eiph = ip_hdr(skb2);
/* Try to guess incoming interface */
- memset(&fl, 0, sizeof(fl));
- fl.fl4_dst = eiph->saddr;
- fl.fl4_tos = RT_TOS(eiph->tos);
- fl.proto = IPPROTO_IPIP;
- if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
+ eiph->saddr, 0,
+ 0, 0,
+ IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt))
goto out;
skb2->dev = rt->dst.dev;
@@ -590,15 +617,18 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
rt = NULL;
- fl.fl4_dst = eiph->daddr;
- fl.fl4_src = eiph->saddr;
- fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
+ eiph->daddr, eiph->saddr,
+ 0, 0,
+ IPPROTO_IPIP,
+ RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt) ||
rt->dst.dev->type != ARPHRD_TUNNEL) {
- ip_rt_put(rt);
+ if (!IS_ERR(rt))
+ ip_rt_put(rt);
goto out;
}
- skb_dst_set(skb2, (struct dst_entry *)rt);
+ skb_dst_set(skb2, &rt->dst);
} else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
@@ -612,8 +642,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
}
+ if (rel_type == ICMP_REDIRECT)
+ skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -652,13 +684,12 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
NULL, 0, 0);
- if (rt && rt->rt6i_dev)
- skb2->dev = rt->rt6i_dev;
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
icmpv6_send(skb2, rel_type, rel_code, rel_info);
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
kfree_skb(skb2);
}
@@ -666,51 +697,77 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
-static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
- struct ipv6hdr *ipv6h,
- struct sk_buff *skb)
+static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
{
__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
- if (INET_ECN_is_ce(dsfield))
- IP_ECN_set_ce(ip_hdr(skb));
+ return IP6_ECN_decapsulate(ipv6h, skb);
}
-static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
- struct ipv6hdr *ipv6h,
- struct sk_buff *skb)
+static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
{
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
- if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
- IP6_ECN_set_ce(ipv6_hdr(skb));
+ return IP6_ECN_decapsulate(ipv6h, skb);
}
+__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
+{
+ struct __ip6_tnl_parm *p = &t->parms;
+ int ltype = ipv6_addr_type(laddr);
+ int rtype = ipv6_addr_type(raddr);
+ __u32 flags = 0;
+
+ if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
+ flags = IP6_TNL_F_CAP_PER_PACKET;
+ } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+ (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+ if (ltype&IPV6_ADDR_UNICAST)
+ flags |= IP6_TNL_F_CAP_XMIT;
+ if (rtype&IPV6_ADDR_UNICAST)
+ flags |= IP6_TNL_F_CAP_RCV;
+ }
+ return flags;
+}
+EXPORT_SYMBOL(ip6_tnl_get_cap);
+
/* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
+int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
- if (p->flags & IP6_TNL_F_CAP_RCV) {
+ if ((p->flags & IP6_TNL_F_CAP_RCV) ||
+ ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
+ (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
struct net_device *ldev = NULL;
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if ((ipv6_addr_is_multicast(&p->laddr) ||
- likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+ if ((ipv6_addr_is_multicast(laddr) ||
+ likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
ret = 1;
-
}
return ret;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
/**
* ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -723,18 +780,19 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
__u8 ipproto,
- void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
- struct ipv6hdr *ipv6h,
- struct sk_buff *skb))
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
{
struct ip6_tnl *t;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int err;
rcu_read_lock();
if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_tstats *tstats;
+ struct pcpu_sw_netstats *tstats;
if (t->parms.proto != ipproto && t->parms.proto != 0) {
rcu_read_unlock();
@@ -746,25 +804,37 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
goto discard;
}
- if (!ip6_tnl_rcv_ctl(t)) {
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
t->dev->stats.rx_dropped++;
rcu_read_unlock();
goto discard;
}
- secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
skb->protocol = htons(protocol);
- skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ __skb_tunnel_rx(skb, t->dev, t->net);
+
+ err = dscp_ecn_decapsulate(t, ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++t->dev->stats.rx_frame_errors;
+ ++t->dev->stats.rx_errors;
+ rcu_read_unlock();
+ goto discard;
+ }
+ }
+
tstats = this_cpu_ptr(t->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
-
- __skb_tunnel_rx(skb, t->dev);
-
- dscp_ecn_decapsulate(t, ipv6h, skb);
+ u64_stats_update_end(&tstats->syncp);
netif_rx(skb);
@@ -824,17 +894,17 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
* 0 else
**/
-static inline int
-ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+static inline bool
+ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
{
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
-static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
@@ -844,21 +914,20 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
ldev = dev_get_by_index_rcu(net, p->link);
if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
- printk(KERN_WARNING
- "%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!ipv6_addr_is_multicast(&p->raddr) &&
unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
- printk(KERN_WARNING
- "%s xmit: Routing loop! "
- "Remote address found on this node!\n",
- p->name);
+ pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
}
return ret;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
+
/**
* ip6_tnl_xmit2 - encapsulate packet and send
* @skb: the outgoing socket buffer
@@ -881,40 +950,44 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
static int ip6_tnl_xmit2(struct sk_buff *skb,
struct net_device *dev,
__u8 dsfield,
- struct flowi *fl,
+ struct flowi6 *fl6,
int encap_limit,
__u32 *pmtu)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct net_device_stats *stats = &t->dev->stats;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
- struct dst_entry *dst;
+ struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
int mtu;
unsigned int max_headroom = sizeof(struct ipv6hdr);
u8 proto;
int err = -1;
- int pkt_len;
- if ((dst = ip6_tnl_dst_check(t)) != NULL)
- dst_hold(dst);
- else {
- dst = ip6_route_output(net, NULL, fl);
+ if (!fl6->flowi6_mark)
+ dst = ip6_tnl_dst_check(t);
+ if (!dst) {
+ ndst = ip6_route_output(net, NULL, fl6);
- if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
+ if (ndst->error)
goto tx_err_link_failure;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
+ goto tx_err_link_failure;
+ }
+ dst = ndst;
}
tdev = dst->dev;
if (tdev == dev) {
stats->collisions++;
- if (net_ratelimit())
- printk(KERN_WARNING
- "%s: Local routing loop detected!\n",
- t->parms.name);
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ t->parms.name);
goto tx_err_dst_release;
}
mtu = dst_mtu(dst) - sizeof (*ipv6h);
@@ -925,13 +998,15 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
@@ -946,49 +1021,45 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
- kfree_skb(skb);
+ consume_skb(skb);
skb = new_skb;
}
- skb_dst_drop(skb);
- skb_dst_set(skb, dst_clone(dst));
-
+ if (fl6->flowi6_mark) {
+ skb_dst_set(skb, dst);
+ ndst = NULL;
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
skb->transport_header = skb->network_header;
- proto = fl->proto;
+ proto = fl6->flowi6_proto;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
- dsfield = INET_ECN_encapsulate(0, dsfield);
- ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
- ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
- ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
- nf_reset(skb);
- pkt_len = skb->len;
- err = ip6_local_out(skb);
-
- if (net_xmit_eval(err) == 0) {
- struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
-
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- } else {
- stats->tx_errors++;
- stats->tx_aborted_errors++;
- }
- ip6_tnl_dst_store(t, dst);
+ ipv6h->saddr = fl6->saddr;
+ ipv6h->daddr = fl6->daddr;
+ ip6tunnel_xmit(skb, dev);
+ if (ndst)
+ ip6_tnl_dst_store(t, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(dst);
+ dst_release(ndst);
return err;
}
@@ -996,9 +1067,9 @@ static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
int encap_limit = -1;
- struct flowi fl;
+ struct flowi6 fl6;
__u8 dsfield;
__u32 mtu;
int err;
@@ -1010,16 +1081,18 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl, &t->fl, sizeof (fl));
- fl.proto = IPPROTO_IPIP;
+ memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
dsfield = ipv4_get_dsfield(iph);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
- fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
& IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
@@ -1038,7 +1111,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int encap_limit = -1;
__u16 offset;
- struct flowi fl;
+ struct flowi6 fl6;
__u8 dsfield;
__u32 mtu;
int err;
@@ -1047,7 +1120,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
!ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
return -1;
- offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1060,16 +1133,18 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl, &t->fl, sizeof (fl));
- fl.proto = IPPROTO_IPV6;
+ memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
dsfield = ipv6_get_dsfield(ipv6h);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
- fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
-
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
if (err != 0) {
if (err == -EMSGSIZE)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1109,46 +1184,28 @@ tx_err:
return NETDEV_TX_OK;
}
-static void ip6_tnl_set_cap(struct ip6_tnl *t)
-{
- struct ip6_tnl_parm *p = &t->parms;
- int ltype = ipv6_addr_type(&p->laddr);
- int rtype = ipv6_addr_type(&p->raddr);
-
- p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
-
- if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
- (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
- if (ltype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_XMIT;
- if (rtype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_RCV;
- }
-}
-
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
- struct ip6_tnl_parm *p = &t->parms;
- struct flowi *fl = &t->fl;
+ struct __ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
/* Set up flowi template */
- ipv6_addr_copy(&fl->fl6_src, &p->laddr);
- ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
- fl->oif = p->link;
- fl->fl6_flowlabel = 0;
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
+ fl6->flowi6_oif = p->link;
+ fl6->flowlabel = 0;
if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
- fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+ fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+ fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
- ip6_tnl_set_cap(t);
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
dev->flags |= IFF_POINTOPOINT;
@@ -1161,23 +1218,25 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, strict);
if (rt == NULL)
return;
- if (rt->rt6i_dev) {
- dev->hard_header_len = rt->rt6i_dev->hard_header_len +
+ if (rt->dst.dev) {
+ dev->hard_header_len = rt->dst.dev->hard_header_len +
sizeof (struct ipv6hdr);
- dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+ dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
}
@@ -1191,10 +1250,10 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
**/
static int
-ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
{
- ipv6_addr_copy(&t->parms.laddr, &p->laddr);
- ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
t->parms.flags = p->flags;
t->parms.hop_limit = p->hop_limit;
t->parms.encap_limit = p->encap_limit;
@@ -1206,6 +1265,48 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
return 0;
}
+static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ struct net *net = t->net;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err;
+
+ ip6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = ip6_tnl_change(t, p);
+ ip6_tnl_link(ip6n, t);
+ netdev_state_change(t->dev);
+ return err;
+}
+
+static void
+ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->flags = u->flags;
+ p->hop_limit = u->hop_limit;
+ p->encap_limit = u->encap_limit;
+ p->flowinfo = u->flowinfo;
+ p->link = u->link;
+ p->proto = u->proto;
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
+{
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->flags = p->flags;
+ u->hop_limit = p->hop_limit;
+ u->encap_limit = p->encap_limit;
+ u->flowinfo = p->flowinfo;
+ u->link = p->link;
+ u->proto = p->proto;
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
/**
* ip6_tnl_ioctl - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
@@ -1239,8 +1340,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
- struct ip6_tnl *t = NULL;
- struct net *net = dev_net(dev);
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
switch (cmd) {
@@ -1250,11 +1352,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
break;
}
- t = ip6_tnl_locate(net, &p, 0);
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ } else {
+ memset(&p, 0, sizeof(p));
}
- if (t == NULL)
- t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof (p));
+ ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
}
@@ -1262,7 +1367,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
@@ -1271,7 +1376,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
p.proto != 0)
break;
- t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
@@ -1281,15 +1387,12 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- ip6_tnl_unlink(ip6n, t);
- synchronize_net();
- err = ip6_tnl_change(t, &p);
- ip6_tnl_link(ip6n, t);
- netdev_state_change(dev);
+ err = ip6_tnl_update(t, &p1);
}
if (t) {
err = 0;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
+ ip6_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
} else
@@ -1297,7 +1400,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
if (dev == ip6n->fb_tnl_dev) {
@@ -1305,7 +1408,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -ENOENT;
- if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
break;
err = -EPERM;
if (t->dev == ip6n->fb_tnl_dev)
@@ -1334,9 +1439,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static int
ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU) {
- return -EINVAL;
+ struct ip6_tnl *tnl = netdev_priv(dev);
+
+ if (tnl->parms.proto == IPPROTO_IPIP) {
+ if (new_mtu < 68)
+ return -EINVAL;
+ } else {
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
}
+ if (new_mtu > 0xFFF8 - dev->hard_header_len)
+ return -EINVAL;
dev->mtu = new_mtu;
return 0;
}
@@ -1361,16 +1474,23 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev)
{
+ struct ip6_tnl *t;
+
dev->netdev_ops = &ip6_tnl_netdev_ops;
dev->destructor = ip6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ /* This perm addr will be used as interface identifier by IPv6 */
+ dev->addr_assign_type = NET_ADDR_RANDOM;
+ eth_random_addr(dev->perm_addr);
}
@@ -1385,8 +1505,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
t->dev = dev;
- strcpy(t->parms.name, dev->name);
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ t->net = dev_net(dev);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
return 0;
@@ -1427,10 +1547,181 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
+
+ ip6_tnl_link_config(t);
+
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
}
+static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPV6 &&
+ proto != IPPROTO_IPIP &&
+ proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void ip6_tnl_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_IPTUN_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_IPTUN_TTL])
+ parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
+
+ if (data[IFLA_IPTUN_ENCAP_LIMIT])
+ parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
+
+ if (data[IFLA_IPTUN_FLOWINFO])
+ parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+}
+
+static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *nt;
+
+ nt = netdev_priv(dev);
+ ip6_tnl_netlink_parms(data, &nt->parms);
+
+ if (ip6_tnl_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return ip6_tnl_create2(dev);
+}
+
+static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct __ip6_tnl_parm p;
+ struct net *net = t->net;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ return -EINVAL;
+
+ ip6_tnl_netlink_parms(data, &p);
+
+ t = ip6_tnl_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ return ip6_tnl_update(t, &p);
+}
+
+static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev != ip6n->fb_tnl_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
+static size_t ip6_tnl_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_LIMIT */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_FLOWINFO */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_FLAGS */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
+ 0;
+}
+
+static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
+ &parm->laddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
+ nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
+ nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
+ nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
+ [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
+ [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
+};
+
+static struct rtnl_link_ops ip6_link_ops __read_mostly = {
+ .kind = "ip6tnl",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ip6_tnl_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6_tnl_dev_setup,
+ .validate = ip6_tnl_validate,
+ .newlink = ip6_tnl_newlink,
+ .changelink = ip6_tnl_changelink,
+ .dellink = ip6_tnl_dellink,
+ .get_size = ip6_tnl_get_size,
+ .fill_info = ip6_tnl_fill_info,
+};
+
static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
.handler = ip4ip6_rcv,
.err_handler = ip4ip6_err,
@@ -1443,28 +1734,37 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
{
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
LIST_HEAD(list);
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6_link_ops)
+ unregister_netdevice_queue(dev, &list);
+
for (h = 0; h < HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, &list);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, &list);
t = rtnl_dereference(t->next);
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
unregister_netdevice_many(&list);
}
static int __net_init ip6_tnl_init_net(struct net *net)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct ip6_tnl *t = NULL;
int err;
ip6n->tnls[0] = ip6n->tnls_wc;
@@ -1477,6 +1777,11 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
@@ -1485,6 +1790,10 @@ static int __net_init ip6_tnl_init_net(struct net *net)
err = register_netdev(ip6n->fb_tnl_dev);
if (err < 0)
goto err_register;
+
+ t = netdev_priv(ip6n->fb_tnl_dev);
+
+ strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
return 0;
err_register:
@@ -1495,10 +1804,8 @@ err_alloc_dev:
static void __net_exit ip6_tnl_exit_net(struct net *net)
{
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
rtnl_lock();
- ip6_tnl_destroy_tunnels(ip6n);
+ ip6_tnl_destroy_tunnels(net);
rtnl_unlock();
}
@@ -1525,18 +1832,23 @@ static int __init ip6_tunnel_init(void)
err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
if (err < 0) {
- printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
+ pr_err("%s: can't register ip4ip6\n", __func__);
goto out_ip4ip6;
}
err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
if (err < 0) {
- printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
+ pr_err("%s: can't register ip6ip6\n", __func__);
goto out_ip6ip6;
}
+ err = rtnl_link_register(&ip6_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
return 0;
+rtnl_link_failed:
+ xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
out_ip6ip6:
xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
out_ip4ip6:
@@ -1551,11 +1863,12 @@ out_pernet:
static void __exit ip6_tunnel_cleanup(void)
{
+ rtnl_link_unregister(&ip6_link_ops);
if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
- printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
+ pr_info("%s: can't deregister ip4ip6\n", __func__);
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
- printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
+ pr_info("%s: can't deregister ip6ip6\n", __func__);
unregister_pernet_device(&ip6_tnl_net_ops);
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
new file mode 100644
index 00000000000..9aaa6bb229e
--- /dev/null
+++ b/net/ipv6/ip6_vti.c
@@ -0,0 +1,1160 @@
+/*
+ * IPv6 virtual tunneling interface
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv6/ip6_tunnel.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+ u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+static int vti6_dev_init(struct net_device *dev);
+static void vti6_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops vti6_link_ops __read_mostly;
+
+static int vti6_net_id __read_mostly;
+struct vti6_net {
+ /* the vti6 tunnel fallback device */
+ struct net_device *fb_tnl_dev;
+ /* lists for storing tunnels in use */
+ struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_wc[1];
+ struct ip6_tnl __rcu **tnls[2];
+};
+
+#define for_each_vti6_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/**
+ * vti6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @net: network namespace
+ * @remote: the address of the tunnel exit-point
+ * @local: the address of the tunnel entry-point
+ *
+ * Return:
+ * tunnel matching given end-points if found,
+ * else fallback tunnel if its device is up,
+ * else %NULL
+ **/
+static struct ip6_tnl *
+vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
+ const struct in6_addr *local)
+{
+ unsigned int hash = HASH(remote, local);
+ struct ip6_tnl *t;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ t = rcu_dereference(ip6n->tnls_wc[0]);
+ if (t && (t->dev->flags & IFF_UP))
+ return t;
+
+ return NULL;
+}
+
+/**
+ * vti6_tnl_bucket - get head of list matching given tunnel parameters
+ * @p: parameters containing tunnel end-points
+ *
+ * Description:
+ * vti6_tnl_bucket() returns the head of the list matching the
+ * &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list
+ **/
+static struct ip6_tnl __rcu **
+vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = 0;
+ int prio = 0;
+
+ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+ prio = 1;
+ h = HASH(remote, local);
+ }
+ return &ip6n->tnls[prio][h];
+}
+
+static void
+vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms);
+
+ rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void
+vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = vti6_tnl_bucket(ip6n, &t->parms);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static void vti6_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static int vti6_tnl_create2(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err;
+
+ err = vti6_dev_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &vti6_link_ops;
+
+ dev_hold(dev);
+ vti6_tnl_link(ip6n, t);
+
+ return 0;
+
+out:
+ return err;
+}
+
+static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
+{
+ struct net_device *dev;
+ struct ip6_tnl *t;
+ char name[IFNAMSIZ];
+ int err;
+
+ if (p->name[0])
+ strlcpy(name, p->name, IFNAMSIZ);
+ else
+ sprintf(name, "ip6_vti%%d");
+
+ dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+ if (dev == NULL)
+ goto failed;
+
+ dev_net_set(dev, net);
+
+ t = netdev_priv(dev);
+ t->parms = *p;
+ t->net = dev_net(dev);
+
+ err = vti6_tnl_create2(dev);
+ if (err < 0)
+ goto failed_free;
+
+ return t;
+
+failed_free:
+ vti6_dev_free(dev);
+failed:
+ return NULL;
+}
+
+/**
+ * vti6_locate - find or create tunnel matching given parameters
+ * @net: network namespace
+ * @p: tunnel parameters
+ * @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ * vti6_locate() first tries to locate an existing tunnel
+ * based on @parms. If this is unsuccessful, but @create is set a new
+ * tunnel device is created and registered for use.
+ *
+ * Return:
+ * matching tunnel or NULL
+ **/
+static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
+ int create)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *t;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ for (tp = vti6_tnl_bucket(ip6n, p);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr))
+ return t;
+ }
+ if (!create)
+ return NULL;
+ return vti6_tnl_create(net, p);
+}
+
+/**
+ * vti6_dev_uninit - tunnel device uninitializer
+ * @dev: the device to be destroyed
+ *
+ * Description:
+ * vti6_dev_uninit() removes tunnel from its list
+ **/
+static void vti6_dev_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
+ else
+ vti6_tnl_unlink(ip6n, t);
+ dev_put(dev);
+}
+
+static int vti6_rcv(struct sk_buff *skb)
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ rcu_read_lock();
+ if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+ &ipv6h->daddr)) != NULL) {
+ if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
+ t->dev->stats.rx_dropped++;
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
+
+ rcu_read_unlock();
+
+ return xfrm6_rcv(skb);
+ }
+ rcu_read_unlock();
+ return -EINVAL;
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
+/**
+ * vti6_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+static inline bool
+vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
+/**
+ * vti6_xmit - send a packet
+ * @skb: the outgoing socket buffer
+ * @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
+ **/
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *tdev;
+ int err = -1;
+
+ if (!dst)
+ goto tx_err_link_failure;
+
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
+ goto tx_err_link_failure;
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ t->parms.name);
+ goto tx_err_dst_release;
+ }
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
+
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
+ }
+
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(dst);
+ return err;
+}
+
+static netdev_tx_t
+vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
+ int ret;
+
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IPV6):
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ break;
+ default:
+ goto tx_err;
+ }
+
+ ret = vti6_xmit(skb, dev, &fl);
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ __u32 mark;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ mark = be32_to_cpu(t->parms.o_key);
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
+static void vti6_link_config(struct ip6_tnl *t)
+{
+ struct net_device *dev = t->dev;
+ struct __ip6_tnl_parm *p = &t->parms;
+
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
+ IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+}
+
+/**
+ * vti6_tnl_change - update the tunnel parameters
+ * @t: tunnel to be changed
+ * @p: tunnel configuration parameters
+ *
+ * Description:
+ * vti6_tnl_change() updates the tunnel parameters
+ **/
+static int
+vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.link = p->link;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ t->parms.proto = p->proto;
+ ip6_tnl_dst_reset(t);
+ vti6_link_config(t);
+ return 0;
+}
+
+static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err;
+
+ vti6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = vti6_tnl_change(t, p);
+ vti6_tnl_link(ip6n, t);
+ netdev_state_change(t->dev);
+ return err;
+}
+
+static void
+vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->link = u->link;
+ p->i_key = u->i_key;
+ p->o_key = u->o_key;
+ p->proto = u->proto;
+
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
+{
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->link = p->link;
+ u->i_key = p->i_key;
+ u->o_key = p->o_key;
+ u->proto = p->proto;
+
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
+/**
+ * vti6_tnl_ioctl - configure vti6 tunnels from userspace
+ * @dev: virtual device associated with tunnel
+ * @ifr: parameters passed from userspace
+ * @cmd: command to be performed
+ *
+ * Description:
+ * vti6_ioctl() is used for managing vti6 tunnels
+ * from userspace.
+ *
+ * The possible commands are the following:
+ * %SIOCGETTUNNEL: get tunnel parameters for device
+ * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ * %SIOCCHGTUNNEL: change tunnel parameters to those given
+ * %SIOCDELTUNNEL: delete tunnel
+ *
+ * The fallback device "ip6_vti0", created during module
+ * initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ * 0 on success,
+ * %-EFAULT if unable to copy data to or from userspace,
+ * %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ * %-EINVAL if passed tunnel parameters are invalid,
+ * %-EEXIST if changing a tunnel's parameters would cause a conflict
+ * %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+static int
+vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm2 p;
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t = NULL;
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ip6n->fb_tnl_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, 0);
+ } else {
+ memset(&p, 0, sizeof(p));
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+ vti6_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ break;
+ err = -EINVAL;
+ if (p.proto != IPPROTO_IPV6 && p.proto != 0)
+ break;
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL);
+ if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else
+ t = netdev_priv(dev);
+
+ err = vti6_update(t, &p1);
+ }
+ if (t) {
+ err = 0;
+ vti6_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+
+ if (dev == ip6n->fb_tnl_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ break;
+ err = -ENOENT;
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, 0);
+ if (t == NULL)
+ break;
+ err = -EPERM;
+ if (t->dev == ip6n->fb_tnl_dev)
+ break;
+ dev = t->dev;
+ }
+ err = 0;
+ unregister_netdevice(dev);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ return err;
+}
+
+/**
+ * vti6_tnl_change_mtu - change mtu manually for tunnel device
+ * @dev: virtual device associated with tunnel
+ * @new_mtu: the new mtu
+ *
+ * Return:
+ * 0 on success,
+ * %-EINVAL if mtu too small
+ **/
+static int vti6_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static const struct net_device_ops vti6_netdev_ops = {
+ .ndo_uninit = vti6_dev_uninit,
+ .ndo_start_xmit = vti6_tnl_xmit,
+ .ndo_do_ioctl = vti6_ioctl,
+ .ndo_change_mtu = vti6_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+/**
+ * vti6_dev_setup - setup virtual tunnel device
+ * @dev: virtual device associated with tunnel
+ *
+ * Description:
+ * Initialize function pointers and device parameters
+ **/
+static void vti6_dev_setup(struct net_device *dev)
+{
+ dev->netdev_ops = &vti6_netdev_ops;
+ dev->destructor = vti6_dev_free;
+
+ dev->type = ARPHRD_TUNNEL6;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
+ dev->mtu = ETH_DATA_LEN;
+ dev->flags |= IFF_NOARP;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+/**
+ * vti6_dev_init_gen - general initializer for all tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+static inline int vti6_dev_init_gen(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ t->dev = dev;
+ t->net = dev_net(dev);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * vti6_dev_init - initializer for all non fallback tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+static int vti6_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int err = vti6_dev_init_gen(dev);
+
+ if (err)
+ return err;
+ vti6_link_config(t);
+ return 0;
+}
+
+/**
+ * vti6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * @dev: fallback device
+ *
+ * Return: 0
+ **/
+static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err = vti6_dev_init_gen(dev);
+
+ if (err)
+ return err;
+
+ t->parms.proto = IPPROTO_IPV6;
+ dev_hold(dev);
+
+ vti6_link_config(t);
+
+ rcu_assign_pointer(ip6n->tnls_wc[0], t);
+ return 0;
+}
+
+static int vti6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ return 0;
+}
+
+static void vti6_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_VTI_LINK])
+ parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
+
+ if (data[IFLA_VTI_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_VTI_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_VTI_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
+
+ if (data[IFLA_VTI_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+}
+
+static int vti6_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *nt;
+
+ nt = netdev_priv(dev);
+ vti6_netlink_parms(data, &nt->parms);
+
+ nt->parms.proto = IPPROTO_IPV6;
+
+ if (vti6_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return vti6_tnl_create2(dev);
+}
+
+static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t;
+ struct __ip6_tnl_parm p;
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ return -EINVAL;
+
+ vti6_netlink_parms(data, &p);
+
+ t = vti6_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ return vti6_update(t, &p);
+}
+
+static size_t vti6_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_VTI_LINK */
+ nla_total_size(4) +
+ /* IFLA_VTI_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_VTI_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_VTI_IKEY */
+ nla_total_size(4) +
+ /* IFLA_VTI_OKEY */
+ nla_total_size(4) +
+ 0;
+}
+
+static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
+ nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
+ &parm->laddr) ||
+ nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
+ nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
+ nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
+ [IFLA_VTI_LINK] = { .type = NLA_U32 },
+ [IFLA_VTI_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) },
+ [IFLA_VTI_IKEY] = { .type = NLA_U32 },
+ [IFLA_VTI_OKEY] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops vti6_link_ops __read_mostly = {
+ .kind = "vti6",
+ .maxtype = IFLA_VTI_MAX,
+ .policy = vti6_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = vti6_dev_setup,
+ .validate = vti6_validate,
+ .newlink = vti6_newlink,
+ .changelink = vti6_changelink,
+ .get_size = vti6_get_size,
+ .fill_info = vti6_fill_info,
+};
+
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+{
+ int h;
+ struct ip6_tnl *t;
+ LIST_HEAD(list);
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, &list);
+ t = rtnl_dereference(t->next);
+ }
+ }
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_many(&list);
+}
+
+static int __net_init vti6_init_net(struct net *net)
+{
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct ip6_tnl *t = NULL;
+ int err;
+
+ ip6n->tnls[0] = ip6n->tnls_wc;
+ ip6n->tnls[1] = ip6n->tnls_r_l;
+
+ err = -ENOMEM;
+ ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
+ vti6_dev_setup);
+
+ if (!ip6n->fb_tnl_dev)
+ goto err_alloc_dev;
+ dev_net_set(ip6n->fb_tnl_dev, net);
+
+ err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ err = register_netdev(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ t = netdev_priv(ip6n->fb_tnl_dev);
+
+ strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
+ return 0;
+
+err_register:
+ vti6_dev_free(ip6n->fb_tnl_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit vti6_exit_net(struct net *net)
+{
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ rtnl_lock();
+ vti6_destroy_tunnels(ip6n);
+ rtnl_unlock();
+}
+
+static struct pernet_operations vti6_net_ops = {
+ .init = vti6_init_net,
+ .exit = vti6_exit_net,
+ .id = &vti6_net_id,
+ .size = sizeof(struct vti6_net),
+};
+
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+/**
+ * vti6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+static int __init vti6_tunnel_init(void)
+{
+ int err;
+
+ err = register_pernet_device(&vti6_net_ops);
+ if (err < 0)
+ goto out_pernet;
+
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
+ if (err < 0) {
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = rtnl_link_register(&vti6_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+ return 0;
+
+rtnl_link_failed:
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+out:
+ unregister_pernet_device(&vti6_net_ops);
+out_pernet:
+ return err;
+}
+
+/**
+ * vti6_tunnel_cleanup - free resources and unregister protocol
+ **/
+static void __exit vti6_tunnel_cleanup(void)
+{
+ rtnl_link_unregister(&vti6_link_ops);
+ if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+
+ unregister_pernet_device(&vti6_net_ops);
+}
+
+module_init(vti6_tunnel_init);
+module_exit(vti6_tunnel_cleanup);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("vti6");
+MODULE_ALIAS_NETDEV("ip6_vti0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("IPv6 virtual tunnel interface");
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9fab274019c..8250474ab7d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -16,7 +16,6 @@
*
*/
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
#include <linux/sched.h>
@@ -34,6 +33,7 @@
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -50,7 +50,9 @@
#include <linux/pim.h>
#include <net/addrconf.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
#include <net/ip6_checksum.h>
+#include <linux/netconf.h>
struct mr6_table {
struct list_head list;
@@ -65,8 +67,8 @@ struct mr6_table {
struct mif_device vif6_table[MAXMIFS];
int maxvif;
atomic_t cache_resolve_queue_len;
- int mroute_do_assert;
- int mroute_do_pim;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
#ifdef CONFIG_IPV6_PIMSM_V2
int mroute_reg_vif_num;
#endif
@@ -108,12 +110,14 @@ static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr6_table *mrt);
-static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache);
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache);
static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
struct mfc6_cache *c, struct rtmsg *rtm);
+static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+ int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
static void mroute_clean_tables(struct mr6_table *mrt);
@@ -134,14 +138,18 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
return NULL;
}
-static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr6_table **mrt)
{
- struct ip6mr_result res;
- struct fib_lookup_arg arg = { .result = &res, };
int err;
+ struct ip6mr_result res;
+ struct fib_lookup_arg arg = {
+ .result = &res,
+ .flags = FIB_LOOKUP_NOREF,
+ };
- err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
+ err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
+ flowi6_to_flowi(flp6), 0, &arg);
if (err < 0)
return err;
*mrt = res.mrt;
@@ -203,7 +211,7 @@ static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
return 0;
}
-static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
.family = RTNL_FAMILY_IP6MR,
.rule_size = sizeof(struct ip6mr_rule),
.addr_size = sizeof(struct in6_addr),
@@ -254,10 +262,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr6_table *mrt, *next;
+ rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
+ rtnl_unlock();
fib_rules_unregister(net->ipv6.mr6_rules_ops);
}
#else
@@ -269,7 +279,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
return net->ipv6.mrt6;
}
-static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr6_table **mrt)
{
*mrt = net->ipv6.mrt6;
@@ -284,7 +294,10 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
+ rtnl_lock();
ip6mr_free_table(net->ipv6.mrt6);
+ net->ipv6.mrt6 = NULL;
+ rtnl_unlock();
}
#endif
@@ -616,9 +629,9 @@ static int pim6_rcv(struct sk_buff *skb)
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
struct mr6_table *mrt;
- struct flowi fl = {
- .iif = skb->dev->ifindex,
- .mark = skb->mark,
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .flowi6_mark = skb->mark,
};
int reg_vif_num;
@@ -643,7 +656,7 @@ static int pim6_rcv(struct sk_buff *skb)
ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
goto drop;
- if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
goto drop;
reg_vif_num = mrt->mroute_reg_vif_num;
@@ -661,10 +674,9 @@ static int pim6_rcv(struct sk_buff *skb)
skb_pull(skb, (u8 *)encap - skb->data);
skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IPV6);
- skb->ip_summed = 0;
- skb->pkt_type = PACKET_HOST;
+ skb->ip_summed = CHECKSUM_NONE;
- skb_tunnel_rx(skb, reg_dev);
+ skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
netif_rx(skb);
@@ -686,16 +698,18 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
{
struct net *net = dev_net(dev);
struct mr6_table *mrt;
- struct flowi fl = {
- .oif = dev->ifindex,
- .iif = skb->skb_iif,
- .mark = skb->mark,
+ struct flowi6 fl6 = {
+ .flowi6_oif = dev->ifindex,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
+ .flowi6_mark = skb->mark,
};
int err;
- err = ip6mr_fib_lookup(net, &fl, &mrt);
- if (err < 0)
+ err = ip6mr_fib_lookup(net, &fl6, &mrt);
+ if (err < 0) {
+ kfree_skb(skb);
return err;
+ }
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
@@ -801,8 +815,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
dev_set_allmulti(dev, -1);
in6_dev = __in6_dev_get(dev);
- if (in6_dev)
+ if (in6_dev) {
in6_dev->cnf.mc_forwarding--;
+ inet6_netconf_notify_devconf(dev_net(dev),
+ NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in6_dev->cnf);
+ }
if (v->flags & MIFF_REGISTER)
unregister_netdevice_queue(dev, head);
@@ -831,10 +849,10 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
nlh->nlmsg_type = NLMSG_ERROR;
- nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
- ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
- rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
+ rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
kfree_skb(skb);
}
@@ -861,6 +879,7 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_destroy_unres(mrt, c);
}
@@ -954,8 +973,12 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
}
in6_dev = __in6_dev_get(dev);
- if (in6_dev)
+ if (in6_dev) {
in6_dev->cnf.mc_forwarding++;
+ inet6_netconf_notify_devconf(dev_net(dev),
+ NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in6_dev->cnf);
+ }
/*
* Fill in the VIF structures
@@ -987,8 +1010,8 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
}
static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
- struct in6_addr *origin,
- struct in6_addr *mcastgrp)
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp)
{
int line = MFC6_HASH(mcastgrp, origin);
struct mfc6_cache *c;
@@ -1001,6 +1024,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
return NULL;
}
+/* Look for a (*,*,oif) entry */
+static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
+ mifi_t mifi)
+{
+ int line = MFC6_HASH(&in6addr_any, &in6addr_any);
+ struct mfc6_cache *c;
+
+ list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp) &&
+ (c->mfc_un.res.ttls[mifi] < 255))
+ return c;
+
+ return NULL;
+}
+
+/* Look for a (*,G) entry */
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+ struct in6_addr *mcastgrp,
+ mifi_t mifi)
+{
+ int line = MFC6_HASH(mcastgrp, &in6addr_any);
+ struct mfc6_cache *c, *proxy;
+
+ if (ipv6_addr_any(mcastgrp))
+ goto skip;
+
+ list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
+ if (c->mfc_un.res.ttls[mifi] < 255)
+ return c;
+
+ /* It's ok if the mifi is part of the static tree */
+ proxy = ip6mr_cache_find_any_parent(mrt,
+ c->mf6c_parent);
+ if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
+ return c;
+ }
+
+skip:
+ return ip6mr_cache_find_any_parent(mrt, mifi);
+}
+
/*
* Allocate a multicast cache entry
*/
@@ -1038,18 +1105,17 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
- int err;
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+ if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
- nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
- ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+ ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
ip6_mr_forward(net, mrt, skb, c);
}
@@ -1101,8 +1167,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
msg->im6_msgtype = MRT6MSG_WHOLEPKT;
msg->im6_mif = mrt->mroute_reg_vif_num;
msg->im6_pad = 0;
- ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
- ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+ msg->im6_src = ipv6_hdr(pkt)->saddr;
+ msg->im6_dst = ipv6_hdr(pkt)->daddr;
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else
@@ -1127,8 +1193,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
msg->im6_msgtype = assert;
msg->im6_mif = mifi;
msg->im6_pad = 0;
- ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
- ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+ msg->im6_src = ipv6_hdr(pkt)->saddr;
+ msg->im6_dst = ipv6_hdr(pkt)->daddr;
skb_dst_set(skb, dst_clone(skb_dst(pkt)));
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1144,8 +1210,7 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
*/
ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
if (ret < 0) {
- if (net_ratelimit())
- printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
+ net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
}
@@ -1209,6 +1274,7 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
atomic_inc(&mrt->cache_resolve_queue_len);
list_add(&c->list, &mrt->mfc6_unres_queue);
+ mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
@@ -1232,7 +1298,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+ int parent)
{
int line;
struct mfc6_cache *c, *next;
@@ -1241,11 +1308,14 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ ipv6_addr_equal(&c->mf6c_mcastgrp,
+ &mfc->mf6cc_mcastgrp.sin6_addr) &&
+ (parent == -1 || parent == c->mf6c_parent)) {
write_lock_bh(&mrt_lock);
list_del(&c->list);
write_unlock_bh(&mrt_lock);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_cache_free(c);
return 0;
}
@@ -1256,7 +1326,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
static int ip6mr_device_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct mr6_table *mrt;
struct mif_device *v;
@@ -1296,9 +1366,9 @@ static int __net_init ip6mr_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
- if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
+ if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
goto proc_vif_fail;
- if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
+ if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
goto proc_cache_fail;
#endif
@@ -1306,7 +1376,7 @@ static int __net_init ip6mr_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
proc_cache_fail:
- proc_net_remove(net, "ip6_mr_vif");
+ remove_proc_entry("ip6_mr_vif", net->proc_net);
proc_vif_fail:
ip6mr_rules_exit(net);
#endif
@@ -1317,8 +1387,8 @@ fail:
static void __net_exit ip6mr_net_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_net_remove(net, "ip6_mr_cache");
- proc_net_remove(net, "ip6_mr_vif");
+ remove_proc_entry("ip6_mr_cache", net->proc_net);
+ remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
}
@@ -1348,12 +1418,13 @@ int __init ip6_mr_init(void)
goto reg_notif_fail;
#ifdef CONFIG_IPV6_PIMSM_V2
if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
- printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
+ pr_err("%s: can't add PIM protocol\n", __func__);
err = -EAGAIN;
goto add_proto_fail;
}
#endif
- rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
+ rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
+ ip6mr_rtm_dumproute, NULL);
return 0;
#ifdef CONFIG_IPV6_PIMSM_V2
add_proto_fail:
@@ -1374,7 +1445,7 @@ void ip6_mr_cleanup(void)
}
static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
- struct mf6cctl *mfc, int mrtsock)
+ struct mf6cctl *mfc, int mrtsock, int parent)
{
bool found = false;
int line;
@@ -1396,7 +1467,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ ipv6_addr_equal(&c->mf6c_mcastgrp,
+ &mfc->mf6cc_mcastgrp.sin6_addr) &&
+ (parent == -1 || parent == mfc->mf6cc_parent)) {
found = true;
break;
}
@@ -1409,10 +1482,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
- if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+ if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
+ !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
return -EINVAL;
c = ip6mr_cache_alloc();
@@ -1453,6 +1528,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
ip6mr_cache_resolve(net, mrt, uc, c);
ip6mr_cache_free(uc);
}
+ mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1486,6 +1562,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
list_del(&c->list);
write_unlock_bh(&mrt_lock);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_cache_free(c);
}
}
@@ -1494,6 +1571,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
list_del(&c->list);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
ip6mr_destroy_unres(mrt, c);
}
spin_unlock_bh(&mfc_unres_lock);
@@ -1510,6 +1588,9 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
}
else
err = -EADDRINUSE;
@@ -1532,6 +1613,10 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ inet6_netconf_notify_devconf(net,
+ NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt);
@@ -1547,13 +1632,13 @@ int ip6mr_sk_done(struct sock *sk)
struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
{
struct mr6_table *mrt;
- struct flowi fl = {
- .iif = skb->skb_iif,
- .oif = skb->dev->ifindex,
- .mark = skb->mark,
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
+ .flowi6_oif = skb->dev->ifindex,
+ .flowi6_mark = skb->mark,
};
- if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
return mrt->mroute6_sk;
@@ -1568,7 +1653,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
{
- int ret;
+ int ret, parent = 0;
struct mif6ctl vif;
struct mf6cctl mfc;
mifi_t mifi;
@@ -1580,7 +1665,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
+ if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1625,15 +1710,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
*/
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
+ parent = -1;
+ case MRT6_ADD_MFC_PROXY:
+ case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
return -EINVAL;
if (copy_from_user(&mfc, optval, sizeof(mfc)))
return -EFAULT;
+ if (parent == 0)
+ parent = mfc.mf6cc_parent;
rtnl_lock();
- if (optname == MRT6_DEL_MFC)
- ret = ip6mr_mfc_delete(mrt, &mfc);
+ if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
+ ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
- ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
+ ret = ip6mr_mfc_add(net, mrt, &mfc,
+ sk == mrt->mroute6_sk, parent);
rtnl_unlock();
return ret;
@@ -1643,9 +1734,12 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ASSERT:
{
int v;
+
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
- mrt->mroute_do_assert = !!v;
+ mrt->mroute_do_assert = v;
return 0;
}
@@ -1653,6 +1747,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_PIM:
{
int v;
+
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
v = !!v;
@@ -1676,6 +1773,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -EINVAL;
if (get_user(v, (u32 __user *)optval))
return -EFAULT;
+ /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
+ if (v != RT_TABLE_DEFAULT && v >= 100000000)
+ return -EINVAL;
if (sk == mrt->mroute6_sk)
return -EBUSY;
@@ -1804,11 +1904,87 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
}
}
+#ifdef CONFIG_COMPAT
+struct compat_sioc_sg_req6 {
+ struct sockaddr_in6 src;
+ struct sockaddr_in6 grp;
+ compat_ulong_t pktcnt;
+ compat_ulong_t bytecnt;
+ compat_ulong_t wrong_if;
+};
+
+struct compat_sioc_mif_req6 {
+ mifi_t mifi;
+ compat_ulong_t icount;
+ compat_ulong_t ocount;
+ compat_ulong_t ibytes;
+ compat_ulong_t obytes;
+};
+
+int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+ struct compat_sioc_sg_req6 sr;
+ struct compat_sioc_mif_req6 vr;
+ struct mif_device *vif;
+ struct mfc6_cache *c;
+ struct net *net = sock_net(sk);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return -ENOENT;
+
+ switch (cmd) {
+ case SIOCGETMIFCNT_IN6:
+ if (copy_from_user(&vr, arg, sizeof(vr)))
+ return -EFAULT;
+ if (vr.mifi >= mrt->maxvif)
+ return -EINVAL;
+ read_lock(&mrt_lock);
+ vif = &mrt->vif6_table[vr.mifi];
+ if (MIF_EXISTS(mrt, vr.mifi)) {
+ vr.icount = vif->pkt_in;
+ vr.ocount = vif->pkt_out;
+ vr.ibytes = vif->bytes_in;
+ vr.obytes = vif->bytes_out;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &vr, sizeof(vr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ case SIOCGETSGCNT_IN6:
+ if (copy_from_user(&sr, arg, sizeof(sr)))
+ return -EFAULT;
+
+ read_lock(&mrt_lock);
+ c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+ if (c) {
+ sr.pktcnt = c->mfc_un.res.pkt;
+ sr.bytecnt = c->mfc_un.res.bytes;
+ sr.wrong_if = c->mfc_un.res.wrong_if;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &sr, sizeof(sr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+#endif
static inline int ip6mr_forward2_finish(struct sk_buff *skb)
{
IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(skb);
}
@@ -1823,7 +1999,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
struct mif_device *vif = &mrt->vif6_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
- struct flowi fl;
+ struct flowi6 fl6;
if (vif->dev == NULL)
goto out_free;
@@ -1841,14 +2017,16 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
ipv6h = ipv6_hdr(skb);
- fl = (struct flowi) {
- .oif = vif->link,
- .fl6_dst = ipv6h->daddr,
+ fl6 = (struct flowi6) {
+ .flowi6_oif = vif->link,
+ .daddr = ipv6h->daddr,
};
- dst = ip6_route_output(net, NULL, &fl);
- if (!dst)
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
goto out_free;
+ }
skb_dst_drop(skb);
skb_dst_set(skb, dst);
@@ -1898,24 +2076,34 @@ static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
return ct;
}
-static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache)
{
int psend = -1;
int vif, ct;
+ int true_vifi = ip6mr_find_vif(mrt, skb->dev);
vif = cache->mf6c_parent;
cache->mfc_un.res.pkt++;
cache->mfc_un.res.bytes += skb->len;
+ if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ struct mfc6_cache *cache_proxy;
+
+ /* For an (*,G) entry, we only check that the incomming
+ * interface is part of the static tree.
+ */
+ cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ if (cache_proxy &&
+ cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ goto forward;
+ }
+
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
if (mrt->vif6_table[vif].dev != skb->dev) {
- int true_vifi;
-
cache->mfc_un.res.wrong_if++;
- true_vifi = ip6mr_find_vif(mrt, skb->dev);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -1933,14 +2121,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
goto dont_forward;
}
+forward:
mrt->vif6_table[vif].pkt_in++;
mrt->vif6_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
+ if (ipv6_addr_any(&cache->mf6c_origin) &&
+ ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (true_vifi >= 0 &&
+ true_vifi != cache->mf6c_parent &&
+ ipv6_hdr(skb)->hop_limit >
+ cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ /* It's an (*,*) entry and the packet is not coming from
+ * the upstream: forward the packet to the upstream
+ * only.
+ */
+ psend = cache->mf6c_parent;
+ goto last_forward;
+ }
+ goto dont_forward;
+ }
for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
- if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ /* For (*,G) entry, don't forward to the incoming interface */
+ if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
@@ -1949,14 +2155,14 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
psend = ct;
}
}
+last_forward:
if (psend != -1) {
ip6mr_forward2(net, mrt, skb, cache, psend);
- return 0;
+ return;
}
dont_forward:
kfree_skb(skb);
- return 0;
}
@@ -1969,19 +2175,29 @@ int ip6_mr_input(struct sk_buff *skb)
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
struct mr6_table *mrt;
- struct flowi fl = {
- .iif = skb->dev->ifindex,
- .mark = skb->mark,
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .flowi6_mark = skb->mark,
};
int err;
- err = ip6mr_fib_lookup(net, &fl, &mrt);
- if (err < 0)
+ err = ip6mr_fib_lookup(net, &fl6, &mrt);
+ if (err < 0) {
+ kfree_skb(skb);
return err;
+ }
read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+ if (cache == NULL) {
+ int vif = ip6mr_find_vif(mrt, skb->dev);
+
+ if (vif >= 0)
+ cache = ip6mr_cache_find_any(mrt,
+ &ipv6_hdr(skb)->daddr,
+ vif);
+ }
/*
* No usable cache entry
@@ -2014,37 +2230,45 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
{
int ct;
struct rtnexthop *nhp;
- u8 *b = skb_tail_pointer(skb);
- struct rtattr *mp_head;
+ struct nlattr *mp_attr;
+ struct rta_mfc_stats mfcs;
/* If cache is unresolved, don't try to parse IIF and OIF */
if (c->mf6c_parent >= MAXMIFS)
return -ENOENT;
- if (MIF_EXISTS(mrt, c->mf6c_parent))
- RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
-
- mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
+ if (MIF_EXISTS(mrt, c->mf6c_parent) &&
+ nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (mp_attr == NULL)
+ return -EMSGSIZE;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
- goto rtattr_failure;
- nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (nhp == NULL) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
}
}
- mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
+
+ nla_nest_end(skb, mp_attr);
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
+ return -EMSGSIZE;
+
rtm->rtm_type = RTN_MULTICAST;
return 1;
-
-rtattr_failure:
- nlmsg_trim(skb, b);
- return -EMSGSIZE;
}
int ip6mr_get_route(struct net *net,
@@ -2061,6 +2285,13 @@ int ip6mr_get_route(struct net *net,
read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+ if (!cache && skb->dev) {
+ int vif = ip6mr_find_vif(mrt, skb->dev);
+
+ if (vif >= 0)
+ cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
+ vif);
+ }
if (!cache) {
struct sk_buff *skb2;
@@ -2100,8 +2331,8 @@ int ip6mr_get_route(struct net *net,
iph->payload_len = 0;
iph->nexthdr = IPPROTO_NONE;
iph->hop_limit = 0;
- ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
- ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
+ iph->saddr = rt->rt6i_src.addr;
+ iph->daddr = rt->rt6i_dst.addr;
err = ip6mr_cache_unresolved(mrt, vif, skb2);
read_unlock(&mrt_lock);
@@ -2118,30 +2349,39 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 pid, u32 seq, struct mfc6_cache *c)
+ u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
+ int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
+ int err;
- nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
rtm = nlmsg_data(nlh);
- rtm->rtm_family = RTNL_FAMILY_IPMR;
+ rtm->rtm_family = RTNL_FAMILY_IP6MR;
rtm->rtm_dst_len = 128;
rtm->rtm_src_len = 128;
rtm->rtm_tos = 0;
rtm->rtm_table = mrt->id;
- NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+ if (nla_put_u32(skb, RTA_TABLE, mrt->id))
+ goto nla_put_failure;
+ rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- rtm->rtm_protocol = RTPROT_UNSPEC;
+ if (c->mfc_flags & MFC_STATIC)
+ rtm->rtm_protocol = RTPROT_STATIC;
+ else
+ rtm->rtm_protocol = RTPROT_MROUTED;
rtm->rtm_flags = 0;
- NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
- NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
-
- if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
+ if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
+ nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
+ goto nla_put_failure;
+ err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ /* do not break the dump if cache is unresolved */
+ if (err < 0 && err != -ENOENT)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2151,6 +2391,52 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int mr6_msgsize(bool unresolved, int maxvif)
+{
+ size_t len =
+ NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
+ + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
+ ;
+
+ if (!unresolved)
+ len = len
+ + nla_total_size(4) /* RTA_IIF */
+ + nla_total_size(0) /* RTA_MULTIPATH */
+ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
+ /* RTA_MFC_STATS */
+ + nla_total_size(sizeof(struct rta_mfc_stats))
+ ;
+
+ return len;
+}
+
+static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+ int cmd)
+{
+ struct net *net = read_pnet(&mrt->net);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
+ if (err < 0)
+ goto errout;
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
+ return;
+
+errout:
+ kfree_skb(skb);
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+}
+
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -2175,15 +2461,33 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (e < s_e)
goto next_entry;
if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc) < 0)
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
}
e = s_e = 0;
}
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (ip6mr_fill_mroute(mrt, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0) {
+ spin_unlock_bh(&mfc_unres_lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(&mfc_unres_lock);
+ e = s_e = 0;
s_h = 0;
next_table:
t++;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 85cccd6ed0b..d1c793cffcb 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -16,8 +16,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* [Memo]
@@ -30,6 +29,9 @@
* The decompression of IP datagram MUST be done after the reassembly,
* AH/ESP processing.
*/
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -43,6 +45,7 @@
#include <linux/list.h>
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -50,27 +53,33 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
__be32 spi;
- struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
struct ip_comp_hdr *ipcomph =
(struct ip_comp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
- return;
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
- printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n",
- spi, &iph->daddr);
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
@@ -167,6 +176,11 @@ out:
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
@@ -179,21 +193,22 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
{
if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
- printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
+ pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
- printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
}
@@ -202,10 +217,10 @@ static int __init ipcomp6_init(void)
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
- printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
- printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
+ pr_info("%s: can't remove xfrm type\n", __func__);
}
module_init(ipcomp6_init);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1770e061c0..edb58aff4ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
if (ipv6_only_sock(sk) ||
- !ipv6_addr_v4mapped(&np->daddr)) {
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
retv = -EADDRNOTAVAIL;
break;
}
@@ -343,7 +343,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (!capable(CAP_NET_ADMIN)) {
+ if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+ !ns_capable(net->user_ns, CAP_NET_RAW)) {
retv = -EPERM;
break;
}
@@ -381,7 +382,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* hop-by-hop / destination options are privileged option */
retv = -EPERM;
- if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
opt = ipv6_renew_options(sk, np->opt, optname,
@@ -397,7 +398,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_RTHDR && opt && opt->srcrt) {
struct ipv6_rt_hdr *rthdr = opt->srcrt;
switch (rthdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (rthdr->hdrlen != 2 ||
rthdr->segments_left != 1)
@@ -435,7 +436,7 @@ sticky_done:
goto e_inval;
np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
- ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
+ np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr;
retv = 0;
break;
}
@@ -444,12 +445,12 @@ sticky_done:
{
struct ipv6_txoptions *opt = NULL;
struct msghdr msg;
- struct flowi fl;
+ struct flowi6 fl6;
int junk;
- fl.fl6_flowlabel = 0;
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
if (optlen == 0)
goto update;
@@ -475,8 +476,8 @@ sticky_done:
msg.msg_controllen = optlen;
msg.msg_control = (void*)(opt+1);
- retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk,
- &junk);
+ retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
+ &junk, &junk);
if (retv)
goto done;
update:
@@ -503,7 +504,7 @@ done:
goto e_inval;
if (val > 255 || val < -1)
goto e_inval;
- np->mcast_hops = val;
+ np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
retv = 0;
break;
@@ -516,6 +517,36 @@ done:
retv = 0;
break;
+ case IPV6_UNICAST_IF:
+ {
+ struct net_device *dev = NULL;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ goto e_inval;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (ifindex == 0) {
+ np->ucast_oif = 0;
+ retv = 0;
+ break;
+ }
+
+ dev = dev_get_by_index(net, ifindex);
+ retv = -EADDRNOTAVAIL;
+ if (!dev)
+ break;
+ dev_put(dev);
+
+ retv = -EINVAL;
+ if (sk->sk_bound_dev_if)
+ break;
+
+ np->ucast_oif = ifindex;
+ retv = 0;
+ break;
+ }
+
case IPV6_MULTICAST_IF:
if (sk->sk_type == SOCK_STREAM)
break;
@@ -648,7 +679,6 @@ done:
}
case MCAST_MSFILTER:
{
- extern int sysctl_mld_max_msf;
struct group_filter *gsf;
if (optlen < GROUP_FILTER_SIZE(0))
@@ -692,7 +722,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
@@ -725,7 +755,7 @@ done:
case IPV6_IPSEC_POLICY:
case IPV6_XFRM_POLICY:
retv = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
@@ -798,6 +828,7 @@ pref_skip_coa:
if (val < 0 || val > 255)
goto e_inval;
np->min_hopcount = val;
+ retv = 0;
break;
case IPV6_DONTFRAG:
np->dontfrag = valbool;
@@ -913,7 +944,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
}
static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen, unsigned int flags)
{
struct ipv6_pinfo *np = inet6_sk(sk);
int len;
@@ -962,7 +993,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
msg.msg_control = optval;
msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_flags = flags;
lock_sock(sk);
skb = np->pktoptions;
@@ -971,35 +1002,42 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
release_sock(sk);
if (skb) {
- int err = datagram_recv_ctl(sk, &msg, skb);
+ ip6_datagram_recv_ctl(sk, &msg, skb);
kfree_skb(skb);
- if (err)
- return err;
} else {
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
- ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+ src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
int hlim = np->mcast_hops;
put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
+ if (np->rxopt.bits.rxtclass) {
+ int tclass = (int)ip6_tclass(np->rcv_flowinfo);
+
+ put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
+ }
if (np->rxopt.bits.rxoinfo) {
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
- ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+ src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
+ np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
int hlim = np->mcast_hops;
put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
+ if (np->rxopt.bits.rxflow) {
+ __be32 flowinfo = np->rcv_flowinfo;
+
+ put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+ }
}
len -= msg.msg_controllen;
return put_user(len, optlen);
@@ -1162,6 +1200,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->mcast_oif;
break;
+ case IPV6_UNICAST_IF:
+ val = (__force int)htonl((__u32) np->ucast_oif);
+ break;
+
case IPV6_MTU_DISCOVER:
val = np->pmtudisc;
break;
@@ -1174,6 +1216,37 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->sndflow;
break;
+ case IPV6_FLOWLABEL_MGR:
+ {
+ struct in6_flowlabel_req freq;
+ int flags;
+
+ if (len < sizeof(freq))
+ return -EINVAL;
+
+ if (copy_from_user(&freq, optval, sizeof(freq)))
+ return -EFAULT;
+
+ if (freq.flr_action != IPV6_FL_A_GET)
+ return -EINVAL;
+
+ len = sizeof(freq);
+ flags = freq.flr_flags;
+
+ memset(&freq, 0, sizeof(freq));
+
+ val = ipv6_flowlabel_opt_get(sk, &freq, flags);
+ if (val < 0)
+ return val;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &freq, len))
+ return -EFAULT;
+
+ return 0;
+ }
+
case IPV6_ADDR_PREFERENCES:
val = 0;
@@ -1222,7 +1295,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
if(level != SOL_IPV6)
return -ENOPROTOOPT;
- err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1337,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
return compat_mc_getsockopt(sk, level, optname, optval, optlen,
ipv6_getsockopt);
- err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+ MSG_CMSG_COMPAT);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 49f986d626a..617f0958e16 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/pkt_sched.h>
#include <net/mld.h>
#include <linux/netfilter.h>
@@ -92,28 +93,30 @@ static void mld_gq_timer_expire(unsigned long data);
static void mld_ifc_timer_expire(unsigned long data);
static void mld_ifc_event(struct inet6_dev *idev);
static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
-static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *addr);
+static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
static void mld_clear_delrec(struct inet6_dev *idev);
+static bool mld_in_v1_mode(const struct inet6_dev *idev);
static int sf_setstate(struct ifmcaddr6 *pmc);
static void sf_markstate(struct ifmcaddr6 *pmc);
static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
-static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
- int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
int delta);
-static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
- int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
int delta);
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
-
-#define IGMP6_UNSOLICITED_IVAL (10*HZ)
#define MLD_QRV_DEFAULT 2
+/* RFC3810, 9.2. Query Interval */
+#define MLD_QI_DEFAULT (125 * HZ)
+/* RFC3810, 9.3. Query Response Interval */
+#define MLD_QRI_DEFAULT (10 * HZ)
-#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
- (idev)->cnf.force_mld_version == 1 || \
- ((idev)->mc_v1_seen && \
- time_before(jiffies, (idev)->mc_v1_seen)))
+/* RFC3810, 8.1 Query Version Distinctions */
+#define MLD_V1_QUERY_LEN 24
+#define MLD_V2_QUERY_LEN_MIN 28
#define IPV6_MLD_MAX_MSF 64
@@ -128,6 +131,18 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
pmc != NULL; \
pmc = rcu_dereference(pmc->next))
+static int unsolicited_report_interval(struct inet6_dev *idev)
+{
+ int iv;
+
+ if (mld_in_v1_mode(idev))
+ iv = idev->cnf.mldv1_unsolicited_report_interval;
+ else
+ iv = idev->cnf.mldv2_unsolicited_report_interval;
+
+ return iv > 0 ? iv : 1;
+}
+
int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
@@ -155,15 +170,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -ENOMEM;
mc_lst->next = NULL;
- ipv6_addr_copy(&mc_lst->addr, addr);
+ mc_lst->addr = *addr;
rcu_read_lock();
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
- dev = rt->rt6i_dev;
- dst_release(&rt->dst);
+ dev = rt->dst.dev;
+ ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
@@ -201,10 +216,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
-static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
-{
- kfree(container_of(head, struct ipv6_mc_socklist, rcu));
-}
/*
* socket leave on multicast group
*/
@@ -215,6 +226,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_mc_socklist __rcu **lnk;
struct net *net = sock_net(sk);
+ if (!ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+
spin_lock(&ipv6_sk_mc_lock);
for (lnk = &np->ipv6_mc_list;
(mc_lst = rcu_dereference_protected(*lnk,
@@ -239,7 +253,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
rcu_read_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
- call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+ kfree_rcu(mc_lst, rcu);
return 0;
}
}
@@ -250,7 +264,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
/* called with rcu_read_lock() */
static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
- struct in6_addr *group,
+ const struct in6_addr *group,
int ifindex)
{
struct net_device *dev = NULL;
@@ -260,9 +274,8 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
if (rt) {
- dev = rt->rt6i_dev;
- dev_hold(dev);
- dst_release(&rt->dst);
+ dev = rt->dst.dev;
+ ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
@@ -286,6 +299,9 @@ void ipv6_sock_mc_close(struct sock *sk)
struct ipv6_mc_socklist *mc_lst;
struct net *net = sock_net(sk);
+ if (!rcu_access_pointer(np->ipv6_mc_list))
+ return;
+
spin_lock(&ipv6_sk_mc_lock);
while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
@@ -307,7 +323,7 @@ void ipv6_sock_mc_close(struct sock *sk)
rcu_read_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
- call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+ kfree_rcu(mc_lst, rcu);
spin_lock(&ipv6_sk_mc_lock);
}
@@ -319,7 +335,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
{
struct in6_addr *source, *group;
struct ipv6_mc_socklist *pmc;
- struct net_device *dev;
struct inet6_dev *idev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *psl;
@@ -341,7 +356,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
rcu_read_unlock();
return -ENODEV;
}
- dev = idev->dev;
err = -EADDRNOTAVAIL;
@@ -377,8 +391,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
goto done; /* err = -EADDRNOTAVAIL */
rv = !0;
for (i=0; i<psl->sl_count; i++) {
- rv = memcmp(&psl->sl_addr[i], source,
- sizeof(struct in6_addr));
+ rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
if (rv == 0)
break;
}
@@ -428,12 +441,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
for (i=0; i<psl->sl_count; i++) {
- rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
- if (rv == 0)
- break;
+ rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
+ if (rv == 0) /* There is an error in the address. */
+ goto done;
}
- if (rv == 0) /* address already there is an error */
- goto done;
for (j=psl->sl_count-1; j>=i; j--)
psl->sl_addr[j+1] = psl->sl_addr[j];
psl->sl_addr[i] = *source;
@@ -453,9 +464,8 @@ done:
int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
{
- struct in6_addr *group;
+ const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
- struct net_device *dev;
struct inet6_dev *idev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *newpsl, *psl;
@@ -478,7 +488,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
rcu_read_unlock();
return -ENODEV;
}
- dev = idev->dev;
err = 0;
@@ -546,10 +555,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
struct group_filter __user *optval, int __user *optlen)
{
int err, i, count, copycount;
- struct in6_addr *group;
+ const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
struct inet6_dev *idev;
- struct net_device *dev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *psl;
struct net *net = sock_net(sk);
@@ -566,7 +574,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
rcu_read_unlock();
return -ENODEV;
}
- dev = idev->dev;
err = -EADDRNOTAVAIL;
/*
@@ -617,13 +624,13 @@ done:
return err;
}
-int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
- const struct in6_addr *src_addr)
+bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+ const struct in6_addr *src_addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc;
struct ip6_sf_socklist *psl;
- int rv = 1;
+ bool rv = true;
rcu_read_lock();
for_each_pmc_rcu(np, mc) {
@@ -632,7 +639,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
}
if (!mc) {
rcu_read_unlock();
- return 1;
+ return true;
}
read_lock(&mc->sflock);
psl = mc->sflist;
@@ -646,9 +653,9 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
break;
}
if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
- rv = 0;
+ rv = false;
if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
- rv = 0;
+ rv = false;
}
read_unlock(&mc->sflock);
rcu_read_unlock();
@@ -669,6 +676,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
+ if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ return;
+
spin_lock_bh(&mc->mca_lock);
if (!(mc->mca_flags&MAF_LOADED)) {
mc->mca_flags |= MAF_LOADED;
@@ -680,7 +691,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
return;
- if (MLD_V1_SEEN(mc->idev)) {
+ if (mld_in_v1_mode(mc->idev)) {
igmp6_join_group(mc);
return;
}
@@ -695,6 +706,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
+ if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ return;
+
spin_lock_bh(&mc->mca_lock);
if (mc->mca_flags&MAF_LOADED) {
mc->mca_flags &= ~MAF_LOADED;
@@ -758,7 +773,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
spin_unlock_bh(&idev->mc_lock);
}
-static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
+static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
{
struct ifmcaddr6 *pmc, *pmc_prev;
struct ip6_sf_list *psf, *psf_next;
@@ -868,7 +883,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
- ipv6_addr_copy(&mc->mca_addr, addr);
+ mc->mca_addr = *addr;
mc->idev = idev; /* (reference taken) */
mc->mca_users = 1;
/* mca_stamp should be updated upon changes */
@@ -940,41 +955,14 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
}
/*
- * identify MLD packets for MLD filter exceptions
- */
-int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
-{
- struct icmp6hdr *pic;
-
- if (nexthdr != IPPROTO_ICMPV6)
- return 0;
-
- if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
- return 0;
-
- pic = icmp6_hdr(skb);
-
- switch (pic->icmp6_type) {
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- case ICMPV6_MLD2_REPORT:
- return 1;
- default:
- break;
- }
- return 0;
-}
-
-/*
* check if the interface/address pair is valid
*/
-int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
- const struct in6_addr *src_addr)
+bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
+ const struct in6_addr *src_addr)
{
struct inet6_dev *idev;
struct ifmcaddr6 *mc;
- int rv = 0;
+ bool rv = false;
rcu_read_lock();
idev = __in6_dev_get(dev);
@@ -1001,7 +989,7 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
spin_unlock_bh(&mc->mca_lock);
} else
- rv = 1; /* don't filter unspecified source */
+ rv = true; /* don't filter unspecified source */
}
read_unlock_bh(&idev->lock);
}
@@ -1011,21 +999,49 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
static void mld_gq_start_timer(struct inet6_dev *idev)
{
- int tv = net_random() % idev->mc_maxdelay;
+ unsigned long tv = prandom_u32() % idev->mc_maxdelay;
idev->mc_gq_running = 1;
if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+static void mld_gq_stop_timer(struct inet6_dev *idev)
{
- int tv = net_random() % delay;
+ idev->mc_gq_running = 0;
+ if (del_timer(&idev->mc_gq_timer))
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+ unsigned long tv = prandom_u32() % delay;
if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
+static void mld_ifc_stop_timer(struct inet6_dev *idev)
+{
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+}
+
+static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+ unsigned long tv = prandom_u32() % delay;
+
+ if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
+ in6_dev_hold(idev);
+}
+
+static void mld_dad_stop_timer(struct inet6_dev *idev)
+{
+ if (del_timer(&idev->mc_dad_timer))
+ __in6_dev_put(idev);
+}
+
/*
* IGMP handling (alias multicast ICMPv6 messages)
*/
@@ -1044,12 +1060,9 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
delay = ma->mca_timer.expires - jiffies;
}
- if (delay >= resptime) {
- if (resptime)
- delay = net_random() % resptime;
- else
- delay = 1;
- }
+ if (delay >= resptime)
+ delay = prandom_u32() % resptime;
+
ma->mca_timer.expires = jiffies + delay;
if (!mod_timer(&ma->mca_timer, jiffies + delay))
atomic_inc(&ma->mca_refcnt);
@@ -1057,8 +1070,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
}
/* mark EXCLUDE-mode sources */
-static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
- struct in6_addr *srcs)
+static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
+ const struct in6_addr *srcs)
{
struct ip6_sf_list *psf;
int i, scount;
@@ -1069,10 +1082,10 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
break;
for (i=0; i<nsrcs; i++) {
/* skip inactive filters */
- if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+ if (psf->sf_count[MCAST_INCLUDE] ||
pmc->mca_sfcount[MCAST_EXCLUDE] !=
psf->sf_count[MCAST_EXCLUDE])
- continue;
+ break;
if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
scount++;
break;
@@ -1081,12 +1094,12 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
}
pmc->mca_flags &= ~MAF_GSQUERY;
if (scount == nsrcs) /* all sources excluded */
- return 0;
- return 1;
+ return false;
+ return true;
}
-static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
- struct in6_addr *srcs)
+static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+ const struct in6_addr *srcs)
{
struct ip6_sf_list *psf;
int i, scount;
@@ -1110,10 +1123,162 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
}
if (!scount) {
pmc->mca_flags &= ~MAF_GSQUERY;
- return 0;
+ return false;
}
pmc->mca_flags |= MAF_GSQUERY;
- return 1;
+ return true;
+}
+
+static int mld_force_mld_version(const struct inet6_dev *idev)
+{
+ /* Normally, both are 0 here. If enforcement to a particular is
+ * being used, individual device enforcement will have a lower
+ * precedence over 'all' device (.../conf/all/force_mld_version).
+ */
+
+ if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
+ return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
+ else
+ return idev->cnf.force_mld_version;
+}
+
+static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
+{
+ return mld_force_mld_version(idev) == 2;
+}
+
+static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
+{
+ return mld_force_mld_version(idev) == 1;
+}
+
+static bool mld_in_v1_mode(const struct inet6_dev *idev)
+{
+ if (mld_in_v2_mode_only(idev))
+ return false;
+ if (mld_in_v1_mode_only(idev))
+ return true;
+ if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
+ return true;
+
+ return false;
+}
+
+static void mld_set_v1_mode(struct inet6_dev *idev)
+{
+ /* RFC3810, relevant sections:
+ * - 9.1. Robustness Variable
+ * - 9.2. Query Interval
+ * - 9.3. Query Response Interval
+ * - 9.12. Older Version Querier Present Timeout
+ */
+ unsigned long switchback;
+
+ switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;
+
+ idev->mc_v1_seen = jiffies + switchback;
+}
+
+static void mld_update_qrv(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.8. QRV (Querier's Robustness Variable)
+ * - 9.1. Robustness Variable
+ */
+
+ /* The value of the Robustness Variable MUST NOT be zero,
+ * and SHOULD NOT be one. Catch this here if we ever run
+ * into such a case in future.
+ */
+ WARN_ON(idev->mc_qrv == 0);
+
+ if (mlh2->mld2q_qrv > 0)
+ idev->mc_qrv = mlh2->mld2q_qrv;
+
+ if (unlikely(idev->mc_qrv < 2)) {
+ net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
+ idev->mc_qrv, MLD_QRV_DEFAULT);
+ idev->mc_qrv = MLD_QRV_DEFAULT;
+ }
+}
+
+static void mld_update_qi(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.9. QQIC (Querier's Query Interval Code)
+ * - 9.2. Query Interval
+ * - 9.12. Older Version Querier Present Timeout
+ * (the [Query Interval] in the last Query received)
+ */
+ unsigned long mc_qqi;
+
+ if (mlh2->mld2q_qqic < 128) {
+ mc_qqi = mlh2->mld2q_qqic;
+ } else {
+ unsigned long mc_man, mc_exp;
+
+ mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
+ mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);
+
+ mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
+ }
+
+ idev->mc_qi = mc_qqi * HZ;
+}
+
+static void mld_update_qri(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.3. Maximum Response Code
+ * - 9.3. Query Response Interval
+ */
+ idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
+}
+
+static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
+ unsigned long *max_delay)
+{
+ unsigned long mldv1_md;
+
+ /* Ignore v1 queries */
+ if (mld_in_v2_mode_only(idev))
+ return -EINVAL;
+
+ /* MLDv1 router present */
+ mldv1_md = ntohs(mld->mld_maxdelay);
+ *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
+
+ mld_set_v1_mode(idev);
+
+ /* cancel MLDv2 report timer */
+ mld_gq_stop_timer(idev);
+ /* cancel the interface change timer */
+ mld_ifc_stop_timer(idev);
+ /* clear deleted report items */
+ mld_clear_delrec(idev);
+
+ return 0;
+}
+
+static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ unsigned long *max_delay)
+{
+ /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
+ if (mld_in_v1_mode(idev))
+ return -EINVAL;
+
+ *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
+
+ mld_update_qrv(idev, mld);
+ mld_update_qi(idev, mld);
+ mld_update_qri(idev, mld);
+
+ idev->mc_maxdelay = *max_delay;
+
+ return 0;
}
/* called with rcu_read_lock() */
@@ -1121,13 +1286,13 @@ int igmp6_event_query(struct sk_buff *skb)
{
struct mld2_query *mlh2 = NULL;
struct ifmcaddr6 *ma;
- struct in6_addr *group;
+ const struct in6_addr *group;
unsigned long max_delay;
struct inet6_dev *idev;
struct mld_msg *mld;
int group_type;
int mark = 0;
- int len;
+ int len, err;
if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
return -EINVAL;
@@ -1136,12 +1301,20 @@ int igmp6_event_query(struct sk_buff *skb)
len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
len -= skb_network_header_len(skb);
- /* Drop queries with not link local source */
- if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ /* RFC3810 6.2
+ * Upon reception of an MLD message that contains a Query, the node
+ * checks if the source address of the message is a valid link-local
+ * address, if the Hop Limit is set to 1, and if the Router Alert
+ * option is present in the Hop-By-Hop Options header of the IPv6
+ * packet. If any of these checks fails, the packet is dropped.
+ */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+ ipv6_hdr(skb)->hop_limit != 1 ||
+ !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+ IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
return -EINVAL;
idev = __in6_dev_get(skb->dev);
-
if (idev == NULL)
return 0;
@@ -1153,35 +1326,23 @@ int igmp6_event_query(struct sk_buff *skb)
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == 24) {
- int switchback;
- /* MLDv1 router present */
-
- /* Translate milliseconds to jiffies */
- max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
-
- switchback = (idev->mc_qrv + 1) * max_delay;
- idev->mc_v1_seen = jiffies + switchback;
-
- /* cancel the interface change timer */
- idev->mc_ifc_count = 0;
- if (del_timer(&idev->mc_ifc_timer))
- __in6_dev_put(idev);
- /* clear deleted report items */
- mld_clear_delrec(idev);
- } else if (len >= 28) {
+ if (len == MLD_V1_QUERY_LEN) {
+ err = mld_process_v1(idev, mld, &max_delay);
+ if (err < 0)
+ return err;
+ } else if (len >= MLD_V2_QUERY_LEN_MIN) {
int srcs_offset = sizeof(struct mld2_query) -
sizeof(struct icmp6hdr);
+
if (!pskb_may_pull(skb, srcs_offset))
return -EINVAL;
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
- if (!max_delay)
- max_delay = 1;
- idev->mc_maxdelay = max_delay;
- if (mlh2->mld2q_qrv)
- idev->mc_qrv = mlh2->mld2q_qrv;
+
+ err = mld_process_v2(idev, mlh2, &max_delay);
+ if (err < 0)
+ return err;
+
if (group_type == IPV6_ADDR_ANY) { /* general query */
if (mlh2->mld2q_nsrcs)
return -EINVAL; /* no sources allowed */
@@ -1287,17 +1448,17 @@ int igmp6_event_report(struct sk_buff *skb)
return 0;
}
-static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
- int gdeleted, int sdeleted)
+static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+ int gdeleted, int sdeleted)
{
switch (type) {
case MLD2_MODE_IS_INCLUDE:
case MLD2_MODE_IS_EXCLUDE:
if (gdeleted || sdeleted)
- return 0;
+ return false;
if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
if (pmc->mca_sfmode == MCAST_INCLUDE)
- return 1;
+ return true;
/* don't include if this source is excluded
* in all filters
*/
@@ -1306,29 +1467,29 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
return pmc->mca_sfcount[MCAST_EXCLUDE] ==
psf->sf_count[MCAST_EXCLUDE];
}
- return 0;
+ return false;
case MLD2_CHANGE_TO_INCLUDE:
if (gdeleted || sdeleted)
- return 0;
+ return false;
return psf->sf_count[MCAST_INCLUDE] != 0;
case MLD2_CHANGE_TO_EXCLUDE:
if (gdeleted || sdeleted)
- return 0;
+ return false;
if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
psf->sf_count[MCAST_INCLUDE])
- return 0;
+ return false;
return pmc->mca_sfcount[MCAST_EXCLUDE] ==
psf->sf_count[MCAST_EXCLUDE];
case MLD2_ALLOW_NEW_SOURCES:
if (gdeleted || !psf->sf_crcount)
- return 0;
+ return false;
return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
case MLD2_BLOCK_OLD_SOURCES:
if (pmc->mca_sfmode == MCAST_INCLUDE)
return gdeleted || (psf->sf_crcount && sdeleted);
return psf->sf_crcount && !gdeleted && !sdeleted;
}
- return 0;
+ return false;
}
static int
@@ -1345,21 +1506,49 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ int proto, int len)
{
+ struct ipv6hdr *hdr;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ skb_reset_network_header(skb);
+ skb_put(skb, sizeof(struct ipv6hdr));
+ hdr = ipv6_hdr(skb);
+
+ ip6_flow_hdr(hdr, 0, 0);
+
+ hdr->payload_len = htons(len);
+ hdr->nexthdr = proto;
+ hdr->hop_limit = inet6_sk(sk)->hop_limit;
+
+ hdr->saddr = *saddr;
+ hdr->daddr = *daddr;
+}
+
+static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
+{
+ struct net_device *dev = idev->dev;
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.igmp_sk;
struct sk_buff *skb;
struct mld2_report *pmr;
struct in6_addr addr_buf;
const struct in6_addr *saddr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
int err;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
/* we assume size > sizeof(ra) here */
- size += LL_ALLOCATED_SPACE(dev);
+ size += hlen + tlen;
/* limit our allocations to order-0 page */
size = min_t(int, size, SKB_MAX_ORDER(0, 0));
skb = sock_alloc_send_skb(sk, size, 1, &err);
@@ -1367,9 +1556,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
if (!skb)
return NULL;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb->priority = TC_PRIO_CONTROL;
+ skb_reserve(skb, hlen);
- if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -1378,7 +1568,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
} else
saddr = &addr_buf;
- ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
@@ -1402,15 +1592,16 @@ static void mld_sendpack(struct sk_buff *skb)
struct inet6_dev *idev;
struct net *net = dev_net(skb->dev);
int err;
- struct flowi fl;
+ struct flowi6 fl6;
struct dst_entry *dst;
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
- mldlen = skb->tail - skb->transport_header;
+ payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
+ sizeof(*pip6);
+ mldlen = skb_tail_pointer(skb) - skb_transport_header(skb);
pip6->payload_len = htons(payload_len);
pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
@@ -1418,18 +1609,16 @@ static void mld_sendpack(struct sk_buff *skb)
csum_partial(skb_transport_header(skb),
mldlen, 0));
- dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
-
- if (!dst) {
- err = -ENOMEM;
- goto err_out;
- }
-
- icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT,
+ icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
+ dst = icmp6_dst_alloc(skb->dev, &fl6);
- err = xfrm_lookup(net, &dst, &fl, NULL, 0);
+ err = 0;
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ }
skb_dst_set(skb, dst);
if (err)
goto err_out;
@@ -1440,11 +1629,12 @@ static void mld_sendpack(struct sk_buff *skb)
dst_output);
out:
if (!err) {
- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
- } else
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ }
rcu_read_unlock();
return;
@@ -1467,7 +1657,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr;
if (!skb)
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(pmc->idev, dev->mtu);
if (!skb)
return NULL;
pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
@@ -1485,9 +1675,10 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
skb_tailroom(skb)) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, int gdeleted, int sdeleted)
+ int type, int gdeleted, int sdeleted, int crsend)
{
- struct net_device *dev = pmc->idev->dev;
+ struct inet6_dev *idev = pmc->idev;
+ struct net_device *dev = idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -1516,7 +1707,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(idev, dev->mtu);
}
}
first = 1;
@@ -1543,7 +1734,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(idev, dev->mtu);
first = 1;
scount = 0;
}
@@ -1576,7 +1767,7 @@ empty_source:
if (type == MLD2_ALLOW_NEW_SOURCES ||
type == MLD2_BLOCK_OLD_SOURCES)
return skb;
- if (pmc->mca_crcount || isquery) {
+ if (pmc->mca_crcount || isquery || crsend) {
/* make sure we have room for group header */
if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
mld_sendpack(skb);
@@ -1598,8 +1789,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
struct sk_buff *skb = NULL;
int type;
+ read_lock_bh(&idev->lock);
if (!pmc) {
- read_lock_bh(&idev->lock);
for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
if (pmc->mca_flags & MAF_NOREPORT)
continue;
@@ -1608,19 +1799,19 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
type = MLD2_MODE_IS_EXCLUDE;
else
type = MLD2_MODE_IS_INCLUDE;
- skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
- read_unlock_bh(&idev->lock);
} else {
spin_lock_bh(&pmc->mca_lock);
if (pmc->mca_sfcount[MCAST_EXCLUDE])
type = MLD2_MODE_IS_EXCLUDE;
else
type = MLD2_MODE_IS_INCLUDE;
- skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
+ read_unlock_bh(&idev->lock);
if (skb)
mld_sendpack(skb);
}
@@ -1662,13 +1853,13 @@ static void mld_send_cr(struct inet6_dev *idev)
if (pmc->mca_sfmode == MCAST_INCLUDE) {
type = MLD2_BLOCK_OLD_SOURCES;
dtype = MLD2_BLOCK_OLD_SOURCES;
- skb = add_grec(skb, pmc, type, 1, 0);
- skb = add_grec(skb, pmc, dtype, 1, 1);
+ skb = add_grec(skb, pmc, type, 1, 0, 0);
+ skb = add_grec(skb, pmc, dtype, 1, 1, 0);
}
if (pmc->mca_crcount) {
if (pmc->mca_sfmode == MCAST_EXCLUDE) {
type = MLD2_CHANGE_TO_INCLUDE;
- skb = add_grec(skb, pmc, type, 1, 0);
+ skb = add_grec(skb, pmc, type, 1, 0, 0);
}
pmc->mca_crcount--;
if (pmc->mca_crcount == 0) {
@@ -1699,8 +1890,8 @@ static void mld_send_cr(struct inet6_dev *idev)
type = MLD2_ALLOW_NEW_SOURCES;
dtype = MLD2_BLOCK_OLD_SOURCES;
}
- skb = add_grec(skb, pmc, type, 0, 0);
- skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
+ skb = add_grec(skb, pmc, dtype, 0, 1, 0); /* deleted sources */
/* filter mode changes */
if (pmc->mca_crcount) {
@@ -1708,7 +1899,7 @@ static void mld_send_cr(struct inet6_dev *idev)
type = MLD2_CHANGE_TO_EXCLUDE;
else
type = MLD2_CHANGE_TO_INCLUDE;
- skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
pmc->mca_crcount--;
}
spin_unlock_bh(&pmc->mca_lock);
@@ -1728,11 +1919,13 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
struct mld_msg *hdr;
const struct in6_addr *snd_addr, *saddr;
struct in6_addr addr_buf;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
- struct flowi fl;
+ struct flowi6 fl6;
struct dst_entry *dst;
if (type == ICMPV6_MGM_REDUCTION)
@@ -1749,7 +1942,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPSTATS_MIB_OUT, full_len);
rcu_read_unlock();
- skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
+ skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
if (skb == NULL) {
rcu_read_lock();
@@ -1758,8 +1951,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
rcu_read_unlock();
return;
}
-
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb->priority = TC_PRIO_CONTROL;
+ skb_reserve(skb, hlen);
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
@@ -1770,14 +1963,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
} else
saddr = &addr_buf;
- ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
+ ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
memset(hdr, 0, sizeof(struct mld_msg));
hdr->mld_type = type;
- ipv6_addr_copy(&hdr->mld_mca, addr);
+ hdr->mld_mca = *addr;
hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
IPPROTO_ICMPV6,
@@ -1786,19 +1979,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
- if (!dst) {
- err = -ENOMEM;
- goto err_out;
- }
-
- icmpv6_flow_init(sk, &fl, type,
+ icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
-
- err = xfrm_lookup(net, &dst, &fl, NULL, 0);
- if (err)
+ dst = icmp6_dst_alloc(skb->dev, &fl6);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
goto err_out;
+ }
skb_dst_set(skb, dst);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
@@ -1819,8 +2007,57 @@ err_out:
goto out;
}
+static void mld_send_initial_cr(struct inet6_dev *idev)
+{
+ struct sk_buff *skb;
+ struct ifmcaddr6 *pmc;
+ int type;
+
+ if (mld_in_v1_mode(idev))
+ return;
+
+ skb = NULL;
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ type = MLD2_CHANGE_TO_EXCLUDE;
+ else
+ type = MLD2_CHANGE_TO_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0, 1);
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ if (skb)
+ mld_sendpack(skb);
+}
+
+void ipv6_mc_dad_complete(struct inet6_dev *idev)
+{
+ idev->mc_dad_count = idev->mc_qrv;
+ if (idev->mc_dad_count) {
+ mld_send_initial_cr(idev);
+ idev->mc_dad_count--;
+ if (idev->mc_dad_count)
+ mld_dad_start_timer(idev, idev->mc_maxdelay);
+ }
+}
+
+static void mld_dad_timer_expire(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+
+ mld_send_initial_cr(idev);
+ if (idev->mc_dad_count) {
+ idev->mc_dad_count--;
+ if (idev->mc_dad_count)
+ mld_dad_start_timer(idev, idev->mc_maxdelay);
+ }
+ in6_dev_put(idev);
+}
+
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
- struct in6_addr *psfsrc)
+ const struct in6_addr *psfsrc)
{
struct ip6_sf_list *psf, *psf_prev;
int rv = 0;
@@ -1845,7 +2082,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
else
pmc->mca_sources = psf->sf_next;
if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
- !MLD_V1_SEEN(idev)) {
+ !mld_in_v1_mode(idev)) {
psf->sf_crcount = idev->mc_qrv;
psf->sf_next = pmc->mca_tomb;
pmc->mca_tomb = psf;
@@ -1856,8 +2093,8 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
return rv;
}
-static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
- int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
int delta)
{
struct ifmcaddr6 *pmc;
@@ -1917,7 +2154,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
* Add multicast single-source filter to the interface list
*/
static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
- struct in6_addr *psfsrc, int delta)
+ const struct in6_addr *psfsrc)
{
struct ip6_sf_list *psf, *psf_prev;
@@ -2020,8 +2257,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
/*
* Add multicast source filter list to the interface list
*/
-static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
- int sfmode, int sfcount, struct in6_addr *psfsrc,
+static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
int delta)
{
struct ifmcaddr6 *pmc;
@@ -2048,7 +2285,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
pmc->mca_sfcount[sfmode]++;
err = 0;
for (i=0; i<sfcount; i++) {
- err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+ err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
if (err)
break;
}
@@ -2058,7 +2295,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
if (!delta)
pmc->mca_sfcount[sfmode]--;
for (j=0; j<i; j++)
- (void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+ ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
struct ip6_sf_list *psf;
@@ -2110,7 +2347,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
- delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+ delay = prandom_u32() % unsolicited_report_interval(ma->idev);
spin_lock_bh(&ma->mca_lock);
if (del_timer(&ma->mca_timer)) {
@@ -2145,7 +2382,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
static void igmp6_leave_group(struct ifmcaddr6 *ma)
{
- if (MLD_V1_SEEN(ma->idev)) {
+ if (mld_in_v1_mode(ma->idev)) {
if (ma->mca_flags & MAF_LAST_REPORTER)
igmp6_send(&ma->mca_addr, ma->idev->dev,
ICMPV6_MGM_REDUCTION);
@@ -2161,7 +2398,7 @@ static void mld_gq_timer_expire(unsigned long data)
idev->mc_gq_running = 0;
mld_send_report(idev, NULL);
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_timer_expire(unsigned long data)
@@ -2174,12 +2411,12 @@ static void mld_ifc_timer_expire(unsigned long data)
if (idev->mc_ifc_count)
mld_ifc_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_event(struct inet6_dev *idev)
{
- if (MLD_V1_SEEN(idev))
+ if (mld_in_v1_mode(idev))
return;
idev->mc_ifc_count = idev->mc_qrv;
mld_ifc_start_timer(idev, 1);
@@ -2190,7 +2427,7 @@ static void igmp6_timer_handler(unsigned long data)
{
struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
- if (MLD_V1_SEEN(ma->idev))
+ if (mld_in_v1_mode(ma->idev))
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
else
mld_send_report(ma->idev, ma);
@@ -2230,12 +2467,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Withdraw multicast list */
read_lock_bh(&idev->lock);
- idev->mc_ifc_count = 0;
- if (del_timer(&idev->mc_ifc_timer))
- __in6_dev_put(idev);
- idev->mc_gq_running = 0;
- if (del_timer(&idev->mc_gq_timer))
- __in6_dev_put(idev);
+ mld_ifc_stop_timer(idev);
+ mld_gq_stop_timer(idev);
+ mld_dad_stop_timer(idev);
for (i = idev->mc_list; i; i=i->next)
igmp6_group_dropped(i);
@@ -2272,8 +2506,14 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
idev->mc_ifc_count = 0;
setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
(unsigned long)idev);
+ setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
+ (unsigned long)idev);
+
idev->mc_qrv = MLD_QRV_DEFAULT;
- idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+ idev->mc_qi = MLD_QI_DEFAULT;
+ idev->mc_qri = MLD_QRI_DEFAULT;
+
+ idev->mc_maxdelay = unsolicited_report_interval(idev);
idev->mc_v1_seen = 0;
write_unlock_bh(&idev->lock);
}
@@ -2604,10 +2844,10 @@ static int __net_init igmp6_proc_init(struct net *net)
int err;
err = -ENOMEM;
- if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops))
+ if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
goto out;
- if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO,
- &igmp6_mcf_seq_fops))
+ if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+ &igmp6_mcf_seq_fops))
goto out_proc_net_igmp6;
err = 0;
@@ -2615,14 +2855,14 @@ out:
return err;
out_proc_net_igmp6:
- proc_net_remove(net, "igmp6");
+ remove_proc_entry("igmp6", net->proc_net);
goto out;
}
static void __net_exit igmp6_proc_exit(struct net *net)
{
- proc_net_remove(net, "mcfilter6");
- proc_net_remove(net, "igmp6");
+ remove_proc_entry("mcfilter6", net->proc_net);
+ remove_proc_entry("igmp6", net->proc_net);
}
#else
static inline int igmp6_proc_init(struct net *net)
@@ -2641,8 +2881,7 @@ static int __net_init igmp6_net_init(struct net *net)
err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
- printk(KERN_ERR
- "Failed to initialize the IGMP6 control socket (err %d).\n",
+ pr_err("Failed to initialize the IGMP6 control socket (err %d)\n",
err);
goto out;
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index d6e9599d070..db9b6cbc9db 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -13,8 +13,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* Authors:
@@ -22,6 +21,8 @@
* Masahide NAKAMURA @USAGI
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/time.h>
@@ -44,7 +45,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
if (!data)
return NULL;
if (padlen == 1) {
- data[0] = IPV6_TLV_PAD0;
+ data[0] = IPV6_TLV_PAD1;
} else if (padlen > 1) {
data[0] = IPV6_TLV_PADN;
data[1] = padlen - 2;
@@ -84,28 +85,30 @@ static int mip6_mh_len(int type)
static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
{
- struct ip6_mh *mh;
+ struct ip6_mh _hdr;
+ const struct ip6_mh *mh;
- if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
- !pskb_may_pull(skb, (skb_transport_offset(skb) +
- ((skb_transport_header(skb)[1] + 1) << 3))))
+ mh = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_hdr), &_hdr);
+ if (!mh)
return -1;
- mh = (struct ip6_mh *)skb_transport_header(skb);
+ if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len)
+ return -1;
if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
- mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
- skb_network_header(skb)));
+ mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
+ skb_network_header_len(skb));
return -1;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
mh->ip6mh_proto);
- mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
- skb_network_header(skb)));
+ mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
+ skb_network_header_len(skb));
return -1;
}
@@ -126,7 +129,7 @@ static struct mip6_report_rate_limiter mip6_report_rl = {
static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
int err = destopt->nexthdr;
@@ -181,8 +184,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
}
static inline int mip6_report_rl_allow(struct timeval *stamp,
- struct in6_addr *dst,
- struct in6_addr *src, int iif)
+ const struct in6_addr *dst,
+ const struct in6_addr *src, int iif)
{
int allow = 0;
@@ -195,26 +198,28 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
mip6_report_rl.iif = iif;
- ipv6_addr_copy(&mip6_report_rl.src, src);
- ipv6_addr_copy(&mip6_report_rl.dst, dst);
+ mip6_report_rl.src = *src;
+ mip6_report_rl.dst = *dst;
allow = 1;
}
spin_unlock_bh(&mip6_report_rl.lock);
return allow;
}
-static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
+ const struct flowi *fl)
{
struct net *net = xs_net(x);
struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ const struct flowi6 *fl6 = &fl->u.ip6;
struct ipv6_destopt_hao *hao = NULL;
struct xfrm_selector sel;
int offset;
struct timeval stamp;
int err = 0;
- if (unlikely(fl->proto == IPPROTO_MH &&
- fl->fl_mh_type <= IP6_MH_TYPE_MAX))
+ if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
+ fl6->fl6_mh_type <= IP6_MH_TYPE_MAX))
goto out;
if (likely(opt->dsthao)) {
@@ -239,14 +244,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
sizeof(sel.saddr));
sel.prefixlen_s = 128;
sel.family = AF_INET6;
- sel.proto = fl->proto;
- sel.dport = xfrm_flowi_dport(fl);
+ sel.proto = fl6->flowi6_proto;
+ sel.dport = xfrm_flowi_dport(fl, &fl6->uli);
if (sel.dport)
sel.dport_mask = htons(~0);
- sel.sport = xfrm_flowi_sport(fl);
+ sel.sport = xfrm_flowi_sport(fl, &fl6->uli);
if (sel.sport)
sel.sport_mask = htons(~0);
- sel.ifindex = fl->oif;
+ sel.ifindex = fl6->flowi6_oif;
err = km_report(net, IPPROTO_DSTOPTS, &sel,
(hao ? (xfrm_address_t *)&hao->addr : NULL));
@@ -262,7 +267,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -305,13 +311,12 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
static int mip6_destopt_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
- printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
- x->id.spi);
+ pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
- printk(KERN_INFO "%s: state's mode is not %u: %u\n",
- __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ pr_info("%s: state's mode is not %u: %u\n",
+ __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
return -EINVAL;
}
@@ -347,7 +352,7 @@ static const struct xfrm_type mip6_destopt_type =
static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
int err = rt2->rt_hdr.nexthdr;
@@ -399,7 +404,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -441,13 +447,12 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
static int mip6_rthdr_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
- printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
- x->id.spi);
+ pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
- printk(KERN_INFO "%s: state's mode is not %u: %u\n",
- __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ pr_info("%s: state's mode is not %u: %u\n",
+ __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
return -EINVAL;
}
@@ -479,18 +484,18 @@ static const struct xfrm_type mip6_rthdr_type =
static int __init mip6_init(void)
{
- printk(KERN_INFO "Mobile IPv6\n");
+ pr_info("Mobile IPv6\n");
if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
- printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __func__);
+ pr_info("%s: can't add xfrm type(destopt)\n", __func__);
goto mip6_destopt_xfrm_fail;
}
if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
- printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__);
+ pr_info("%s: can't add xfrm type(rthdr)\n", __func__);
goto mip6_rthdr_xfrm_fail;
}
if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
- printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__);
+ pr_info("%s: can't add rawv6 mh filter\n", __func__);
goto mip6_rawv6_mh_fail;
}
@@ -508,11 +513,11 @@ static int __init mip6_init(void)
static void __exit mip6_fini(void)
{
if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
- printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__);
+ pr_info("%s: can't remove rawv6 mh filter\n", __func__);
if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
- printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__);
+ pr_info("%s: can't remove xfrm type(rthdr)\n", __func__);
if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
- printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __func__);
+ pr_info("%s: can't remove xfrm type(destopt)\n", __func__);
}
module_init(mip6_init);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e18f8413020..ca8d4ea48a5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -15,6 +15,7 @@
/*
* Changes:
*
+ * Alexey I. Froloff : RFC6106 (DNSSL) support
* Pierre Ynard : export userland ND options
* through netlink (RDNSS support)
* Lars Fenneberg : fixed MTU setting on receipt
@@ -26,27 +27,7 @@
* YOSHIFUJI Hideaki @USAGI : Verify ND options properly
*/
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
-#define ND_NOPRINTK(x...) do { ; } while(0)
-#define ND_PRINTK0 ND_PRINTK
-#define ND_PRINTK1 ND_NOPRINTK
-#define ND_PRINTK2 ND_NOPRINTK
-#define ND_PRINTK3 ND_NOPRINTK
-#if ND_DEBUG >= 1
-#undef ND_PRINTK1
-#define ND_PRINTK1 ND_PRINTK
-#endif
-#if ND_DEBUG >= 2
-#undef ND_PRINTK2
-#define ND_PRINTK2 ND_PRINTK
-#endif
-#if ND_DEBUG >= 3
-#undef ND_PRINTK3
-#define ND_PRINTK3 ND_PRINTK
-#endif
+#define pr_fmt(fmt) "ICMPv6: " fmt
#include <linux/module.h>
#include <linux/errno.h>
@@ -91,9 +72,18 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(val, level, fmt, ...) \
+do { \
+ if (val <= ND_DEBUG) \
+ net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
+} while (0)
+
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
- __u32 rnd);
+ __u32 *hash_rnd);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -107,8 +97,6 @@ static const struct neigh_ops ndisc_generic_ops = {
.error_report = ndisc_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
- .hh_output = dev_queue_xmit,
- .queue_xmit = dev_queue_xmit,
};
static const struct neigh_ops ndisc_hh_ops = {
@@ -117,22 +105,17 @@ static const struct neigh_ops ndisc_hh_ops = {
.error_report = ndisc_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_resolve_output,
- .hh_output = dev_queue_xmit,
- .queue_xmit = dev_queue_xmit,
};
static const struct neigh_ops ndisc_direct_ops = {
.family = AF_INET6,
- .output = dev_queue_xmit,
- .connected_output = dev_queue_xmit,
- .hh_output = dev_queue_xmit,
- .queue_xmit = dev_queue_xmit,
+ .output = neigh_direct_output,
+ .connected_output = neigh_direct_output,
};
struct neigh_table nd_tbl = {
.family = AF_INET6,
- .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
.key_len = sizeof(struct in6_addr),
.hash = ndisc_hash,
.constructor = ndisc_constructor,
@@ -142,17 +125,19 @@ struct neigh_table nd_tbl = {
.id = "ndisc_cache",
.parms = {
.tbl = &nd_tbl,
- .base_reachable_time = ND_REACHABLE_TIME,
- .retrans_time = ND_RETRANS_TIMER,
- .gc_staletime = 60 * HZ,
.reachable_time = ND_REACHABLE_TIME,
- .delay_probe_time = 5 * HZ,
- .queue_len = 3,
- .ucast_probes = 3,
- .mcast_probes = 3,
- .anycast_delay = 1 * HZ,
- .proxy_delay = (8 * HZ) / 10,
- .proxy_qlen = 64,
+ .data = {
+ [NEIGH_VAR_MCAST_PROBES] = 3,
+ [NEIGH_VAR_UCAST_PROBES] = 3,
+ [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
+ [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
+ [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
+ [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+ [NEIGH_VAR_PROXY_QLEN] = 64,
+ [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
+ [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
+ },
},
.gc_interval = 30 * HZ,
.gc_thresh1 = 128,
@@ -160,50 +145,12 @@ struct neigh_table nd_tbl = {
.gc_thresh3 = 1024,
};
-/* ND options */
-struct ndisc_options {
- struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
-#ifdef CONFIG_IPV6_ROUTE_INFO
- struct nd_opt_hdr *nd_opts_ri;
- struct nd_opt_hdr *nd_opts_ri_end;
-#endif
- struct nd_opt_hdr *nd_useropts;
- struct nd_opt_hdr *nd_useropts_end;
-};
-
-#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
-
-#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-
-/*
- * Return the padding between the option length and the start of the
- * link addr. Currently only IP-over-InfiniBand needs this, although
- * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
- * also need a pad of 2.
- */
-static int ndisc_addr_option_pad(unsigned short type)
-{
- switch (type) {
- case ARPHRD_INFINIBAND: return 2;
- default: return 0;
- }
-}
-
-static inline int ndisc_opt_addr_space(struct net_device *dev)
+static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
{
- return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
-}
-
-static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
- unsigned short addr_type)
-{
- int space = NDISC_OPT_SPACE(data_len);
- int pad = ndisc_addr_option_pad(addr_type);
+ int pad = ndisc_addr_option_pad(skb->dev->type);
+ int data_len = skb->dev->addr_len;
+ int space = ndisc_opt_addr_space(skb->dev);
+ u8 *opt = skb_put(skb, space);
opt[0] = type;
opt[1] = space>>3;
@@ -217,7 +164,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
opt += data_len;
if ((space -= data_len) > 0)
memset(opt, 0, space);
- return opt + space;
}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
@@ -235,7 +181,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
{
- return opt->nd_opt_type == ND_OPT_RDNSS;
+ return opt->nd_opt_type == ND_OPT_RDNSS ||
+ opt->nd_opt_type == ND_OPT_DNSSL;
}
static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -249,8 +196,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
}
-static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
- struct ndisc_options *ndopts)
+struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+ struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -270,10 +217,9 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
case ND_OPT_MTU:
case ND_OPT_REDIRECT_HDR:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
- ND_PRINTK2(KERN_WARNING
- "%s(): duplicated ND6 option found: type=%d\n",
- __func__,
- nd_opt->nd_opt_type);
+ ND_PRINTK(2, warn,
+ "%s: duplicated ND6 option found: type=%d\n",
+ __func__, nd_opt->nd_opt_type);
} else {
ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
}
@@ -301,10 +247,11 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
* to accommodate future extension to the
* protocol.
*/
- ND_PRINTK2(KERN_NOTICE
- "%s(): ignored unsupported option; type=%d, len=%d\n",
- __func__,
- nd_opt->nd_opt_type, nd_opt->nd_opt_len);
+ ND_PRINTK(2, notice,
+ "%s: ignored unsupported option; type=%d, len=%d\n",
+ __func__,
+ nd_opt->nd_opt_type,
+ nd_opt->nd_opt_len);
}
}
opt_len -= l;
@@ -313,18 +260,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
return ndopts;
}
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
- struct net_device *dev)
-{
- u8 *lladdr = (u8 *)(p + 1);
- int lladdrlen = p->nd_opt_len << 3;
- int prepad = ndisc_addr_option_pad(dev->type);
- if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
- return NULL;
- return lladdr + prepad;
-}
-
-int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
+int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
{
switch (dev->type) {
case ARPHRD_ETHER:
@@ -332,15 +268,14 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
case ARPHRD_FDDI:
ipv6_eth_mc_map(addr, buf);
return 0;
- case ARPHRD_IEEE802_TR:
- ipv6_tr_mc_map(addr,buf);
- return 0;
case ARPHRD_ARCNET:
ipv6_arcnet_mc_map(addr, buf);
return 0;
case ARPHRD_INFINIBAND:
ipv6_ib_mc_map(addr, dev->broadcast, buf);
return 0;
+ case ARPHRD_IPGRE:
+ return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
default:
if (dir) {
memcpy(buf, dev->broadcast, dev->addr_len);
@@ -354,16 +289,9 @@ EXPORT_SYMBOL(ndisc_mc_map);
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
- __u32 hash_rnd)
+ __u32 *hash_rnd)
{
- const u32 *p32 = pkey;
- u32 addr_hash, i;
-
- addr_hash = 0;
- for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
- addr_hash ^= *p32++;
-
- return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
+ return ndisc_hashfn(pkey, dev, hash_rnd);
}
static int ndisc_constructor(struct neighbour *neigh)
@@ -372,25 +300,22 @@ static int ndisc_constructor(struct neighbour *neigh)
struct net_device *dev = neigh->dev;
struct inet6_dev *in6_dev;
struct neigh_parms *parms;
- int is_multicast = ipv6_addr_is_multicast(addr);
+ bool is_multicast = ipv6_addr_is_multicast(addr);
- rcu_read_lock();
in6_dev = in6_dev_get(dev);
if (in6_dev == NULL) {
- rcu_read_unlock();
return -EINVAL;
}
parms = in6_dev->nd_parms;
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
- rcu_read_unlock();
neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
if (!dev->header_ops) {
neigh->nud_state = NUD_NOARP;
neigh->ops = &ndisc_direct_ops;
- neigh->output = neigh->ops->queue_xmit;
+ neigh->output = neigh_direct_output;
} else {
if (is_multicast) {
neigh->nud_state = NUD_NOARP;
@@ -442,102 +367,92 @@ static void pndisc_destructor(struct pneigh_entry *n)
ipv6_dev_mc_dec(dev, &maddr);
}
-struct sk_buff *ndisc_build_skb(struct net_device *dev,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- struct icmp6hdr *icmp6h,
- const struct in6_addr *target,
- int llinfo)
+static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+ int len)
{
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.ndisc_sk;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+ struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
- struct icmp6hdr *hdr;
- int len;
- int err;
- u8 *opt;
- if (!dev->addr_len)
- llinfo = 0;
-
- len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
- if (llinfo)
- len += ndisc_opt_addr_space(dev);
-
- skb = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_ALLOCATED_SPACE(dev)),
- 1, &err);
+ skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
if (!skb) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
- __func__, err);
+ ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+ __func__);
return NULL;
}
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
- skb->transport_header = skb->tail;
- skb_put(skb, len);
+ skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
+ skb_reset_transport_header(skb);
- hdr = (struct icmp6hdr *)skb_transport_header(skb);
- memcpy(hdr, icmp6h, sizeof(*hdr));
+ /* Manually assign socket ownership as we avoid calling
+ * sock_alloc_send_pskb() to bypass wmem buffer limits
+ */
+ skb_set_owner_w(skb, sk);
- opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
- if (target) {
- ipv6_addr_copy((struct in6_addr *)opt, target);
- opt += sizeof(*target);
- }
+ return skb;
+}
+
+static void ip6_nd_hdr(struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ int hop_limit, int len)
+{
+ struct ipv6hdr *hdr;
- if (llinfo)
- ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
- dev->addr_len, dev->type);
+ skb_push(skb, sizeof(*hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
- hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
- IPPROTO_ICMPV6,
- csum_partial(hdr,
- len, 0));
+ ip6_flow_hdr(hdr, 0, 0);
- return skb;
-}
+ hdr->payload_len = htons(len);
+ hdr->nexthdr = IPPROTO_ICMPV6;
+ hdr->hop_limit = hop_limit;
-EXPORT_SYMBOL(ndisc_build_skb);
+ hdr->saddr = *saddr;
+ hdr->daddr = *daddr;
+}
-void ndisc_send_skb(struct sk_buff *skb,
- struct net_device *dev,
- struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- struct icmp6hdr *icmp6h)
+static void ndisc_send_skb(struct sk_buff *skb,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
- struct flowi fl;
- struct dst_entry *dst;
- struct net *net = dev_net(dev);
+ struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(skb->dev);
struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
int err;
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
- icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
-
- dst = icmp6_dst_alloc(dev, neigh, daddr);
if (!dst) {
- kfree_skb(skb);
- return;
- }
+ struct flowi6 fl6;
- err = xfrm_lookup(net, &dst, &fl, NULL, 0);
- if (err < 0) {
- kfree_skb(skb);
- return;
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+ dst = icmp6_dst_alloc(skb->dev, &fl6);
+ if (IS_ERR(dst)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_dst_set(skb, dst);
}
- skb_dst_set(skb, dst);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
+ IPPROTO_ICMPV6,
+ csum_partial(icmp6h,
+ skb->len, 0));
+
+ ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
- idev = in6_dev_get(dst->dev);
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
@@ -547,49 +462,27 @@ void ndisc_send_skb(struct sk_buff *skb,
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ rcu_read_unlock();
}
-EXPORT_SYMBOL(ndisc_send_skb);
-
-/*
- * Send a Neighbour Discover packet
- */
-static void __ndisc_send(struct net_device *dev,
- struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- struct icmp6hdr *icmp6h, const struct in6_addr *target,
- int llinfo)
+void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ const struct in6_addr *daddr,
+ const struct in6_addr *solicited_addr,
+ bool router, bool solicited, bool override, bool inc_opt)
{
struct sk_buff *skb;
-
- skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
- if (!skb)
- return;
-
- ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
-}
-
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *solicited_addr,
- int router, int solicited, int override, int inc_opt)
-{
struct in6_addr tmpaddr;
struct inet6_ifaddr *ifp;
const struct in6_addr *src_addr;
- struct icmp6hdr icmp6h = {
- .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
- };
+ struct nd_msg *msg;
+ int optlen = 0;
/* for anycast or proxy, solicited_addr != src_addr */
ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
if (ifp) {
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
- override = 0;
+ override = false;
inc_opt |= ifp->idev->cnf.force_tllao;
in6_ifa_put(ifp);
} else {
@@ -600,23 +493,64 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
src_addr = &tmpaddr;
}
- icmp6h.icmp6_router = router;
- icmp6h.icmp6_solicited = solicited;
- icmp6h.icmp6_override = override;
+ if (!dev->addr_len)
+ inc_opt = 0;
+ if (inc_opt)
+ optlen += ndisc_opt_addr_space(dev);
+
+ skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!skb)
+ return;
+
+ msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+ *msg = (struct nd_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+ .icmp6_router = router,
+ .icmp6_solicited = solicited,
+ .icmp6_override = override,
+ },
+ .target = *solicited_addr,
+ };
+
+ if (inc_opt)
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+ dev->dev_addr);
+
- __ndisc_send(dev, neigh, daddr, src_addr,
- &icmp6h, solicited_addr,
- inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
+ ndisc_send_skb(skb, daddr, src_addr);
+}
+
+static void ndisc_send_unsol_na(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+
+ idev = in6_dev_get(dev);
+ if (!idev)
+ return;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
+ /*router=*/ !!idev->cnf.forwarding,
+ /*solicited=*/ false, /*override=*/ true,
+ /*inc_opt=*/ true);
+ }
+ read_unlock_bh(&idev->lock);
+
+ in6_dev_put(idev);
}
void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr)
{
+ struct sk_buff *skb;
struct in6_addr addr_buf;
- struct icmp6hdr icmp6h = {
- .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
- };
+ int inc_opt = dev->addr_len;
+ int optlen = 0;
+ struct nd_msg *msg;
if (saddr == NULL) {
if (ipv6_get_lladdr(dev, &addr_buf,
@@ -625,18 +559,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
saddr = &addr_buf;
}
- __ndisc_send(dev, neigh, daddr, saddr,
- &icmp6h, solicit,
- !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
+ if (ipv6_addr_any(saddr))
+ inc_opt = false;
+ if (inc_opt)
+ optlen += ndisc_opt_addr_space(dev);
+
+ skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!skb)
+ return;
+
+ msg = (struct nd_msg *)skb_put(skb, sizeof(*msg));
+ *msg = (struct nd_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+ },
+ .target = *solicit,
+ };
+
+ if (inc_opt)
+ ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+ dev->dev_addr);
+
+ ndisc_send_skb(skb, daddr, saddr);
}
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- struct icmp6hdr icmp6h = {
- .icmp6_type = NDISC_ROUTER_SOLICITATION,
- };
+ struct sk_buff *skb;
+ struct rs_msg *msg;
int send_sllao = dev->addr_len;
+ int optlen = 0;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
@@ -660,9 +613,25 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
}
}
#endif
- __ndisc_send(dev, NULL, daddr, saddr,
- &icmp6h, NULL,
- send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
+ if (send_sllao)
+ optlen += ndisc_opt_addr_space(dev);
+
+ skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!skb)
+ return;
+
+ msg = (struct rs_msg *)skb_put(skb, sizeof(*msg));
+ *msg = (struct rs_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_ROUTER_SOLICITATION,
+ },
+ };
+
+ if (send_sllao)
+ ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
+ dev->dev_addr);
+
+ ndisc_send_skb(skb, daddr, saddr);
}
@@ -689,16 +658,15 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
saddr = &ipv6_hdr(skb)->saddr;
- if ((probes -= neigh->parms->ucast_probes) < 0) {
+ if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) {
if (!(neigh->nud_state & NUD_VALID)) {
- ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n",
- __func__, target);
+ ND_PRINTK(1, dbg,
+ "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
+ __func__, target);
}
ndisc_send_ns(dev, neigh, target, target, saddr);
- } else if ((probes -= neigh->parms->app_probes) < 0) {
-#ifdef CONFIG_ARPD
+ } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
-#endif
} else {
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
@@ -723,10 +691,10 @@ static int pndisc_is_router(const void *pkey,
static void ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
- struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
- struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+ const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
@@ -734,12 +702,16 @@ static void ndisc_recv_ns(struct sk_buff *skb)
struct inet6_dev *idev = NULL;
struct neighbour *neigh;
int dad = ipv6_addr_any(saddr);
- int inc;
+ bool inc;
int is_router = -1;
+ if (skb->len < sizeof(struct nd_msg)) {
+ ND_PRINTK(2, warn, "NS: packet too short\n");
+ return;
+ }
+
if (ipv6_addr_is_multicast(&msg->target)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NS: multicast target address");
+ ND_PRINTK(2, warn, "NS: multicast target address\n");
return;
}
@@ -747,27 +719,21 @@ static void ndisc_recv_ns(struct sk_buff *skb)
* RFC2461 7.1.1:
* DAD has to be destined for solicited node multicast address.
*/
- if (dad &&
- !(daddr->s6_addr32[0] == htonl(0xff020000) &&
- daddr->s6_addr32[1] == htonl(0x00000000) &&
- daddr->s6_addr32[2] == htonl(0x00000001) &&
- daddr->s6_addr [12] == 0xff )) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NS: bad DAD packet (wrong destination)\n");
+ if (dad && !ipv6_addr_is_solict_mult(daddr)) {
+ ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
return;
}
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NS: invalid ND options\n");
+ ND_PRINTK(2, warn, "NS: invalid ND options\n");
return;
}
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
if (!lladdr) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NS: invalid link-layer address length\n");
+ ND_PRINTK(2, warn,
+ "NS: invalid link-layer address length\n");
return;
}
@@ -777,8 +743,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
* in the message.
*/
if (dad) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
+ ND_PRINTK(2, warn,
+ "NS: bad DAD packet (link-layer address option)\n");
return;
}
}
@@ -790,20 +756,6 @@ static void ndisc_recv_ns(struct sk_buff *skb)
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
- if (dev->type == ARPHRD_IEEE802_TR) {
- const unsigned char *sadr;
- sadr = skb_mac_header(skb);
- if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
- sadr[9] == dev->dev_addr[1] &&
- sadr[10] == dev->dev_addr[2] &&
- sadr[11] == dev->dev_addr[3] &&
- sadr[12] == dev->dev_addr[4] &&
- sadr[13] == dev->dev_addr[5]) {
- /* looped-back to us */
- goto out;
- }
- }
-
/*
* We are colliding with another node
* who is doing DAD
@@ -839,8 +791,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
(is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
- inc != 0 &&
- idev->nd_parms->proxy_delay != 0) {
+ inc &&
+ NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
/*
* for anycast or proxy,
* sender should delay its response
@@ -858,11 +810,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
}
if (is_router < 0)
- is_router = !!idev->cnf.forwarding;
+ is_router = idev->cnf.forwarding;
if (dad) {
ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
- is_router, 0, (ifp != NULL), 1);
+ !!is_router, false, (ifp != NULL), true);
goto out;
}
@@ -883,8 +835,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
NEIGH_UPDATE_F_OVERRIDE);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, neigh, saddr, &msg->target,
- is_router,
- 1, (ifp != NULL && inc), inc);
+ !!is_router,
+ true, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
}
@@ -900,9 +852,9 @@ static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
- struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+ const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
@@ -910,42 +862,39 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct neighbour *neigh;
if (skb->len < sizeof(struct nd_msg)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NA: packet too short\n");
+ ND_PRINTK(2, warn, "NA: packet too short\n");
return;
}
if (ipv6_addr_is_multicast(&msg->target)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NA: target address is multicast.\n");
+ ND_PRINTK(2, warn, "NA: target address is multicast\n");
return;
}
if (ipv6_addr_is_multicast(daddr) &&
msg->icmph.icmp6_solicited) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NA: solicited NA is multicasted.\n");
+ ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
return;
}
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NS: invalid ND option\n");
+ ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
}
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
if (!lladdr) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NA: invalid link-layer address length\n");
+ ND_PRINTK(2, warn,
+ "NA: invalid link-layer address length\n");
return;
}
}
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
if (ifp) {
- if (ifp->flags & IFA_F_TENTATIVE) {
- addrconf_dad_failure(ifp);
- return;
+ if (skb->pkt_type != PACKET_LOOPBACK
+ && (ifp->flags & IFA_F_TENTATIVE)) {
+ addrconf_dad_failure(ifp);
+ return;
}
/* What should we make now? The advertisement
is invalid, but ndisc specs say nothing
@@ -957,9 +906,9 @@ static void ndisc_recv_na(struct sk_buff *skb)
unsolicited advertisement.
*/
if (skb->pkt_type != PACKET_LOOPBACK)
- ND_PRINTK1(KERN_WARNING
- "ICMPv6 NA: someone advertises our address %pI6 on %s!\n",
- &ifp->addr, ifp->idev->dev->name);
+ ND_PRINTK(1, warn,
+ "NA: someone advertises our address %pI6 on %s!\n",
+ &ifp->addr, ifp->idev->dev->name);
in6_ifa_put(ifp);
return;
}
@@ -980,7 +929,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
- /* XXX: idev->cnf.prixy_ndp */
+ /* XXX: idev->cnf.proxy_ndp */
goto out;
}
@@ -995,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/*
* Change: router to host
*/
- struct rt6_info *rt;
- rt = rt6_get_dflt_router(saddr, dev);
- if (rt)
- ip6_del_rt(rt);
+ rt6_clean_tohost(dev_net(dev), saddr);
}
out:
@@ -1012,17 +958,16 @@ static void ndisc_recv_rs(struct sk_buff *skb)
unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
struct neighbour *neigh;
struct inet6_dev *idev;
- struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
struct ndisc_options ndopts;
u8 *lladdr = NULL;
if (skb->len < sizeof(*rs_msg))
return;
- idev = in6_dev_get(skb->dev);
+ idev = __in6_dev_get(skb->dev);
if (!idev) {
- if (net_ratelimit())
- ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
+ ND_PRINTK(1, err, "RS: can't find in6 device\n");
return;
}
@@ -1039,8 +984,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
/* Parse ND options */
if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
- if (net_ratelimit())
- ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
+ ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
goto out;
}
@@ -1060,7 +1004,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
neigh_release(neigh);
}
out:
- in6_dev_put(idev);
+ return;
}
static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
@@ -1095,8 +1039,9 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
- NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
- &ipv6_hdr(ra)->saddr);
+ if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
+ &ipv6_hdr(ra)->saddr))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
@@ -1109,18 +1054,6 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
}
-static inline int accept_ra(struct inet6_dev *in6_dev)
-{
- /*
- * If forwarding is enabled, RA are not accepted unless the special
- * hybrid mode (accept_ra=2) is enabled.
- */
- if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
- return 0;
-
- return in6_dev->cnf.accept_ra;
-}
-
static void ndisc_router_discovery(struct sk_buff *skb)
{
struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1134,23 +1067,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__u8 * opt = (__u8 *)(ra_msg + 1);
- optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
+ optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
+ sizeof(struct ra_msg);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 RA: source address is not link-local.\n");
+ ND_PRINTK(2, warn, "RA: source address is not link-local\n");
return;
}
if (optlen < 0) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 RA: packet too short\n");
+ ND_PRINTK(2, warn, "RA: packet too short\n");
return;
}
#ifdef CONFIG_IPV6_NDISC_NODETYPE
if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 RA: from host or unauthorized router\n");
+ ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
return;
}
#endif
@@ -1159,22 +1090,19 @@ static void ndisc_router_discovery(struct sk_buff *skb)
* set the RA_RECV flag in the interface
*/
- in6_dev = in6_dev_get(skb->dev);
+ in6_dev = __in6_dev_get(skb->dev);
if (in6_dev == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 RA: can't find inet6 device for %s.\n",
- skb->dev->name);
+ ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
+ skb->dev->name);
return;
}
if (!ndisc_parse_options(opt, optlen, &ndopts)) {
- in6_dev_put(in6_dev);
- ND_PRINTK2(KERN_WARNING
- "ICMP6 RA: invalid ND options\n");
+ ND_PRINTK(2, warn, "RA: invalid ND options\n");
return;
}
- if (!accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev))
goto skip_linkparms;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1205,6 +1133,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (!in6_dev->cnf.accept_ra_defrtr)
goto skip_defrtr;
+ if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ goto skip_defrtr;
+
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -1217,35 +1148,38 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
- if (rt)
- neigh = rt->rt6i_nexthop;
-
+ if (rt) {
+ neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+ if (!neigh) {
+ ND_PRINTK(0, err,
+ "RA: %s got default router without neighbour\n",
+ __func__);
+ ip6_rt_put(rt);
+ return;
+ }
+ }
if (rt && lifetime == 0) {
- neigh_clone(neigh);
ip6_del_rt(rt);
rt = NULL;
}
if (rt == NULL && lifetime) {
- ND_PRINTK3(KERN_DEBUG
- "ICMPv6 RA: adding default router.\n");
+ ND_PRINTK(3, dbg, "RA: adding default router\n");
rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
if (rt == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 RA: %s() failed to add default route.\n",
- __func__);
- in6_dev_put(in6_dev);
+ ND_PRINTK(0, err,
+ "RA: %s failed to add default route\n",
+ __func__);
return;
}
- neigh = rt->rt6i_nexthop;
+ neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
if (neigh == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 RA: %s() got default router without neighbour.\n",
- __func__);
- dst_release(&rt->dst);
- in6_dev_put(in6_dev);
+ ND_PRINTK(0, err,
+ "RA: %s got default router without neighbour\n",
+ __func__);
+ ip6_rt_put(rt);
return;
}
neigh->flags |= NTF_ROUTER;
@@ -1254,12 +1188,12 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
if (rt)
- rt->rt6i_expires = jiffies + (HZ * lifetime);
-
+ rt6_set_expires(rt, jiffies + (HZ * lifetime));
if (ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
if (rt)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
}
skip_defrtr:
@@ -1275,7 +1209,7 @@ skip_defrtr:
rtime = (rtime*HZ)/1000;
if (rtime < HZ/10)
rtime = HZ/10;
- in6_dev->nd_parms->retrans_time = rtime;
+ NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
in6_dev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
}
@@ -1287,9 +1221,11 @@ skip_defrtr:
if (rtime < HZ/10)
rtime = HZ/10;
- if (rtime != in6_dev->nd_parms->base_reachable_time) {
- in6_dev->nd_parms->base_reachable_time = rtime;
- in6_dev->nd_parms->gc_staletime = 3 * rtime;
+ if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
+ NEIGH_VAR_SET(in6_dev->nd_parms,
+ BASE_REACHABLE_TIME, rtime);
+ NEIGH_VAR_SET(in6_dev->nd_parms,
+ GC_STALETIME, 3 * rtime);
in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
in6_dev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
@@ -1312,8 +1248,8 @@ skip_linkparms:
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
skb->dev);
if (!lladdr) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 RA: invalid link-layer address length\n");
+ ND_PRINTK(2, warn,
+ "RA: invalid link-layer address length\n");
goto out;
}
}
@@ -1324,10 +1260,13 @@ skip_linkparms:
NEIGH_UPDATE_F_ISROUTER);
}
- if (!accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev))
goto out;
#ifdef CONFIG_IPV6_ROUTE_INFO
+ if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ goto skip_routeinfo;
+
if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_ri;
@@ -1339,12 +1278,17 @@ skip_linkparms:
ri->prefix_len == 0)
continue;
#endif
+ if (ri->prefix_len == 0 &&
+ !in6_dev->cnf.accept_ra_defrtr)
+ continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
}
}
+
+skip_routeinfo:
#endif
#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1358,7 +1302,9 @@ skip_linkparms:
for (p = ndopts.nd_opts_pi;
p;
p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
- addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
+ addrconf_prefix_rcv(skb->dev, (u8 *)p,
+ (p->nd_opt_len) << 3,
+ ndopts.nd_opts_src_lladdr != NULL);
}
}
@@ -1370,14 +1316,12 @@ skip_linkparms:
mtu = ntohl(n);
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 RA: invalid mtu: %d\n",
- mtu);
+ ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
in6_dev->cnf.mtu6 = mtu;
if (rt)
- rt->dst.metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(&rt->dst, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
@@ -1393,252 +1337,180 @@ skip_linkparms:
}
if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 RA: invalid RA options");
+ ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
- if (rt)
- dst_release(&rt->dst);
- else if (neigh)
+ ip6_rt_put(rt);
+ if (neigh)
neigh_release(neigh);
- in6_dev_put(in6_dev);
}
static void ndisc_redirect_rcv(struct sk_buff *skb)
{
- struct inet6_dev *in6_dev;
- struct icmp6hdr *icmph;
- struct in6_addr *dest;
- struct in6_addr *target; /* new first hop to destination */
- struct neighbour *neigh;
- int on_link = 0;
+ u8 *hdr;
struct ndisc_options ndopts;
- int optlen;
- u8 *lladdr = NULL;
+ struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+ offsetof(struct rd_msg, opt));
#ifdef CONFIG_IPV6_NDISC_NODETYPE
switch (skb->ndisc_nodetype) {
case NDISC_NODETYPE_HOST:
case NDISC_NODETYPE_NODEFAULT:
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: from host or unauthorized router\n");
+ ND_PRINTK(2, warn,
+ "Redirect: from host or unauthorized router\n");
return;
}
#endif
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: source address is not link-local.\n");
+ ND_PRINTK(2, warn,
+ "Redirect: source address is not link-local\n");
return;
}
- optlen = skb->tail - skb->transport_header;
- optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
-
- if (optlen < 0) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: packet too short\n");
+ if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
return;
- }
-
- icmph = icmp6_hdr(skb);
- target = (struct in6_addr *) (icmph + 1);
- dest = target + 1;
- if (ipv6_addr_is_multicast(dest)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: destination address is multicast.\n");
+ if (!ndopts.nd_opts_rh) {
+ ip6_redirect_no_header(skb, dev_net(skb->dev),
+ skb->dev->ifindex, 0);
return;
}
- if (ipv6_addr_equal(dest, target)) {
- on_link = 1;
- } else if (ipv6_addr_type(target) !=
- (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: target address is not link-local unicast.\n");
+ hdr = (u8 *)ndopts.nd_opts_rh;
+ hdr += 8;
+ if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
return;
- }
- in6_dev = in6_dev_get(skb->dev);
- if (!in6_dev)
- return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
- in6_dev_put(in6_dev);
- return;
- }
+ icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
+}
- /* RFC2461 8.1:
- * The IP source address of the Redirect MUST be the same as the current
- * first-hop router for the specified ICMP Destination Address.
- */
+static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
+ struct sk_buff *orig_skb,
+ int rd_len)
+{
+ u8 *opt = skb_put(skb, rd_len);
- if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: invalid ND options\n");
- in6_dev_put(in6_dev);
- return;
- }
- if (ndopts.nd_opts_tgt_lladdr) {
- lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
- skb->dev);
- if (!lladdr) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: invalid link-layer address length\n");
- in6_dev_put(in6_dev);
- return;
- }
- }
+ memset(opt, 0, 8);
+ *(opt++) = ND_OPT_REDIRECT_HDR;
+ *(opt++) = (rd_len >> 3);
+ opt += 6;
- neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
- if (neigh) {
- rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr, neigh, lladdr,
- on_link);
- neigh_release(neigh);
- }
- in6_dev_put(in6_dev);
+ memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
}
-void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
- const struct in6_addr *target)
+void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
{
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.ndisc_sk;
- int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ int optlen = 0;
+ struct inet_peer *peer;
struct sk_buff *buff;
- struct icmp6hdr *icmph;
+ struct rd_msg *msg;
struct in6_addr saddr_buf;
- struct in6_addr *addrp;
struct rt6_info *rt;
struct dst_entry *dst;
- struct inet6_dev *idev;
- struct flowi fl;
- u8 *opt;
+ struct flowi6 fl6;
int rd_len;
- int err;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: no link-local address on %s\n",
- dev->name);
+ ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
+ dev->name);
return;
}
if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: target address is not link-local unicast.\n");
+ ND_PRINTK(2, warn,
+ "Redirect: target address is not link-local unicast\n");
return;
}
- icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
+ icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
&saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
- dst = ip6_route_output(net, NULL, &fl);
- if (dst == NULL)
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
return;
-
- err = xfrm_lookup(net, &dst, &fl, NULL, 0);
- if (err)
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
return;
rt = (struct rt6_info *) dst;
if (rt->rt6i_flags & RTF_GATEWAY) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 Redirect: destination is not a neighbour.\n");
+ ND_PRINTK(2, warn,
+ "Redirect: destination is not a neighbour\n");
goto release;
}
- if (!xrlim_allow(dst, 1*HZ))
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ ret = inet_peer_xrlim_allow(peer, 1*HZ);
+ if (peer)
+ inet_putpeer(peer);
+ if (!ret)
goto release;
if (dev->addr_len) {
+ struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
+ if (!neigh) {
+ ND_PRINTK(2, warn,
+ "Redirect: no neigh for target address\n");
+ goto release;
+ }
+
read_lock_bh(&neigh->lock);
if (neigh->nud_state & NUD_VALID) {
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
- len += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev);
} else
read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
}
rd_len = min_t(unsigned int,
- IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+ IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
+ skb->len + 8);
rd_len &= ~0x7;
- len += rd_len;
-
- buff = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_ALLOCATED_SPACE(dev)),
- 1, &err);
- if (buff == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 Redirect: %s() failed to allocate an skb, err=%d.\n",
- __func__, err);
- goto release;
- }
-
- skb_reserve(buff, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
- IPPROTO_ICMPV6, len);
-
- skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
- skb_put(buff, len);
- icmph = icmp6_hdr(buff);
-
- memset(icmph, 0, sizeof(struct icmp6hdr));
- icmph->icmp6_type = NDISC_REDIRECT;
+ optlen += rd_len;
- /*
- * copy target and destination addresses
- */
-
- addrp = (struct in6_addr *)(icmph + 1);
- ipv6_addr_copy(addrp, target);
- addrp++;
- ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
+ buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
+ if (!buff)
+ goto release;
- opt = (u8*) (addrp + 1);
+ msg = (struct rd_msg *)skb_put(buff, sizeof(*msg));
+ *msg = (struct rd_msg) {
+ .icmph = {
+ .icmp6_type = NDISC_REDIRECT,
+ },
+ .target = *target,
+ .dest = ipv6_hdr(skb)->daddr,
+ };
/*
* include target_address option
*/
if (ha)
- opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
- dev->addr_len, dev->type);
+ ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
/*
* build redirect option and copy skb over to the new packet.
*/
- memset(opt, 0, 8);
- *(opt++) = ND_OPT_REDIRECT_HDR;
- *(opt++) = (rd_len >> 3);
- opt += 6;
-
- memcpy(opt, ipv6_hdr(skb), rd_len - 8);
-
- icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
- len, IPPROTO_ICMPV6,
- csum_partial(icmph, len, 0));
+ if (rd_len)
+ ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
skb_dst_set(buff, dst);
- idev = in6_dev_get(dst->dev);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
- dst_output);
- if (!err) {
- ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
- ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- }
-
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
return;
release:
@@ -1651,11 +1523,28 @@ static void pndisc_redo(struct sk_buff *skb)
kfree_skb(skb);
}
+static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
+{
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+ if (!idev)
+ return true;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
+ idev->cnf.suppress_frag_ndisc) {
+ net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
+ return true;
+ }
+ return false;
+}
+
int ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
- if (!pskb_may_pull(skb, skb->len))
+ if (ndisc_suppress_frag_ndisc(skb))
+ return 0;
+
+ if (skb_linearize(skb))
return 0;
msg = (struct nd_msg *)skb_transport_header(skb);
@@ -1663,16 +1552,14 @@ int ndisc_rcv(struct sk_buff *skb)
__skb_push(skb, skb->data - skb_transport_header(skb));
if (ipv6_hdr(skb)->hop_limit != 255) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NDISC: invalid hop-limit: %d\n",
- ipv6_hdr(skb)->hop_limit);
+ ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
+ ipv6_hdr(skb)->hop_limit);
return 0;
}
if (msg->icmph.icmp6_code != 0) {
- ND_PRINTK2(KERN_WARNING
- "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
- msg->icmph.icmp6_code);
+ ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
+ msg->icmph.icmp6_code);
return 0;
}
@@ -1705,17 +1592,27 @@ int ndisc_rcv(struct sk_buff *skb)
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
+ struct inet6_dev *idev;
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
- fib6_run_gc(~0UL, net);
+ fib6_run_gc(0, net, false);
+ idev = in6_dev_get(dev);
+ if (!idev)
+ break;
+ if (idev->cnf.ndisc_notify)
+ ndisc_send_unsol_na(dev);
+ in6_dev_put(idev);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
- fib6_run_gc(~0UL, net);
+ fib6_run_gc(0, net, false);
+ break;
+ case NETDEV_NOTIFY_PEERS:
+ ndisc_send_unsol_na(dev);
break;
default:
break;
@@ -1736,11 +1633,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
static int warned;
if (strcmp(warncomm, current->comm) && warned < 5) {
strcpy(warncomm, current->comm);
- printk(KERN_WARNING
- "process `%s' is using deprecated sysctl (%s) "
- "net.ipv6.neigh.%s.%s; "
- "Use net.ipv6.neigh.%s.%s_ms "
- "instead.\n",
+ pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
warncomm, func,
dev_name, ctl->procname,
dev_name, ctl->procname);
@@ -1759,22 +1652,23 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu
ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
if (strcmp(ctl->procname, "retrans_time") == 0)
- ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
else if (strcmp(ctl->procname, "base_reachable_time") == 0)
- ret = proc_dointvec_jiffies(ctl, write,
- buffer, lenp, ppos);
+ ret = neigh_proc_dointvec_jiffies(ctl, write,
+ buffer, lenp, ppos);
else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
(strcmp(ctl->procname, "base_reachable_time_ms") == 0))
- ret = proc_dointvec_ms_jiffies(ctl, write,
- buffer, lenp, ppos);
+ ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
+ buffer, lenp, ppos);
else
ret = -1;
if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
- if (ctl->data == &idev->nd_parms->base_reachable_time)
- idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+ if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
+ idev->nd_parms->reachable_time =
+ neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
idev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
@@ -1794,9 +1688,9 @@ static int __net_init ndisc_net_init(struct net *net)
err = inet_ctl_sock_create(&sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
- err);
+ ND_PRINTK(0, err,
+ "NDISC: Failed to initialize the control socket (err %d)\n",
+ err);
return err;
}
@@ -1833,29 +1727,33 @@ int __init ndisc_init(void)
neigh_table_init(&nd_tbl);
#ifdef CONFIG_SYSCTL
- err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
+ err = neigh_sysctl_register(NULL, &nd_tbl.parms,
&ndisc_ifinfo_sysctl_change);
if (err)
goto out_unregister_pernet;
-#endif
- err = register_netdevice_notifier(&ndisc_netdev_notifier);
- if (err)
- goto out_unregister_sysctl;
out:
+#endif
return err;
-out_unregister_sysctl:
#ifdef CONFIG_SYSCTL
- neigh_sysctl_unregister(&nd_tbl.parms);
out_unregister_pernet:
-#endif
unregister_pernet_subsys(&ndisc_net_ops);
goto out;
+#endif
}
-void ndisc_cleanup(void)
+int __init ndisc_late_init(void)
+{
+ return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
{
unregister_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_cleanup(void)
+{
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 35915e8617f..d38e6a8d8b9 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,8 +1,16 @@
+/*
+ * IPv6 specific functions of netfilter core
+ *
+ * Rusty Russell (C) 2000 -- This code is GPL.
+ * Patrick McHardy (C) 2006-2012
+ */
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/ipv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/addrconf.h>
#include <net/dst.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -13,21 +21,24 @@
int ip6_route_me_harder(struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int hh_len;
struct dst_entry *dst;
- struct flowi fl = {
- .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
- .mark = skb->mark,
- .fl6_dst = iph->daddr,
- .fl6_src = iph->saddr,
+ struct flowi6 fl6 = {
+ .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+ .flowi6_mark = skb->mark,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
};
+ int err;
- dst = ip6_route_output(net, skb->sk, &fl);
- if (dst->error) {
+ dst = ip6_route_output(net, skb->sk, &fl6);
+ err = dst->error;
+ if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
- return -EINVAL;
+ return err;
}
/* Drop old route. */
@@ -37,14 +48,22 @@ int ip6_route_me_harder(struct sk_buff *skb)
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
+ xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
- if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
- return -1;
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
skb_dst_set(skb, dst);
}
#endif
+ /* Change in oif may mean change in hh_len. */
+ hh_len = skb_dst(skb)->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
+ 0, GFP_ATOMIC))
+ return -ENOMEM;
+
return 0;
}
EXPORT_SYMBOL(ip6_route_me_harder);
@@ -66,7 +85,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
if (entry->hook == NF_INET_LOCAL_OUT) {
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
rt_info->daddr = iph->daddr;
rt_info->saddr = iph->saddr;
@@ -80,7 +99,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
if (entry->hook == NF_INET_LOCAL_OUT) {
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
@@ -89,16 +108,32 @@ static int nf_ip6_reroute(struct sk_buff *skb,
return 0;
}
-static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
+static int nf_ip6_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict)
{
- *dst = ip6_route_output(&init_net, NULL, fl);
- return (*dst)->error;
+ static const struct ipv6_pinfo fake_pinfo;
+ static const struct inet_sock fake_sk = {
+ /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
+ .sk.sk_bound_dev_if = 1,
+ .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
+ };
+ const void *sk = strict ? &fake_sk : NULL;
+ struct dst_entry *result;
+ int err;
+
+ result = ip6_route_output(net, sk, &fl->u.ip6);
+ err = result->error;
+ if (err)
+ dst_release(result);
+ else
+ *dst = result;
+ return err;
}
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
@@ -132,7 +167,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol)
{
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
__wsum hsum;
__sum16 csum = 0;
@@ -154,6 +189,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
return csum;
};
+static const struct nf_ipv6_ops ipv6ops = {
+ .chk_addr = ipv6_chk_addr,
+};
+
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
.checksum = nf_ip6_checksum,
@@ -166,6 +205,7 @@ static const struct nf_afinfo nf_ip6_afinfo = {
int __init ipv6_netfilter_init(void)
{
+ RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
return nf_register_afinfo(&nf_ip6_afinfo);
}
@@ -174,5 +214,6 @@ int __init ipv6_netfilter_init(void)
*/
void ipv6_netfilter_fini(void)
{
+ RCU_INIT_POINTER(nf_ipv6_ops, NULL);
nf_unregister_afinfo(&nf_ip6_afinfo);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 448464844a2..4bff1f297e3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,27 +25,35 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
-config IP6_NF_QUEUE
- tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
- depends on INET && IPV6 && NETFILTER
- depends on NETFILTER_ADVANCED
- ---help---
-
- This option adds a queue handler to the kernel for IPv6
- packets which enables users to receive the filtered packets
- with QUEUE target using libipq.
-
- This option enables the old IPv6-only "ip6_queue" implementation
- which has been obsoleted by the new "nfnetlink_queue" code (see
- CONFIG_NETFILTER_NETLINK_QUEUE).
-
- (C) Fernando Anton 2001
- IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
- Universidad Carlos III de Madrid
- Universidad Politecnica de Alcala de Henares
- email: <fanton@it.uc3m.es>.
+config NF_TABLES_IPV6
+ depends on NF_TABLES
+ tristate "IPv6 nf_tables support"
+ help
+ This option enables the IPv6 support for nf_tables.
- To compile it as a module, choose M here. If unsure, say N.
+config NFT_CHAIN_ROUTE_IPV6
+ depends on NF_TABLES_IPV6
+ tristate "IPv6 nf_tables route chain support"
+ help
+ This option enables the "route" chain for IPv6 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, flowlabel, hop-limit and
+ the packet mark.
+
+config NFT_CHAIN_NAT_IPV6
+ depends on NF_TABLES_IPV6
+ depends on NF_NAT_IPV6 && NFT_NAT
+ tristate "IPv6 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv6 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NFT_REJECT_IPV6
+ depends on NF_TABLES_IPV6
+ default NFT_REJECT
+ tristate
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
@@ -125,6 +133,16 @@ config IP6_NF_MATCH_MH
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
+
+ To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ip6t_rpfilter.
+
config IP6_NF_MATCH_RT
tristate '"rt" Routing header match support'
depends on NETFILTER_ADVANCED
@@ -144,15 +162,6 @@ config IP6_NF_TARGET_HL
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_HL.
-config IP6_NF_TARGET_LOG
- tristate "LOG target support"
- default m if NETFILTER_ADVANCED=n
- help
- This option adds a `LOG' target, which allows you to create rules in
- any iptables table which records the packet header to the syslog.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
@@ -174,6 +183,19 @@ config IP6_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
@@ -186,7 +208,6 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
- depends on NETFILTER_ADVANCED
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -203,9 +224,44 @@ config IP6_NF_SECURITY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
-
+
If unsure, say N.
+config NF_NAT_IPV6
+ tristate "IPv6 NAT"
+ depends on NF_CONNTRACK_IPV6
+ depends on NETFILTER_ADVANCED
+ select NF_NAT
+ help
+ The IPv6 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. It is controlled by
+ the `nat' table in ip6tables, see the man page for ip6tables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if NF_NAT_IPV6
+
+config IP6_NF_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_TARGET_NPT
+ tristate "NPT (Network Prefix translation) target support"
+ help
+ This option adds the `SNPT' and `DNPT' target, which perform
+ stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+endif # NF_NAT_IPV6
+
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index abfee91ce81..70d3dd66f2c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -6,20 +6,29 @@
obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
-obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
+obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
+
+nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
@@ -27,8 +36,11 @@ obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
+obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
# targets
-obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
+obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
+obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
deleted file mode 100644
index 413ab0754e1..00000000000
--- a/net/ipv6/netfilter/ip6_queue.c
+++ /dev/null
@@ -1,638 +0,0 @@
-/*
- * This is a module which is used for queueing IPv6 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2001 Fernando Anton, this code is GPL.
- * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
- * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
- * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
- * email: fanton@it.uc3m.es
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ipv6.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/netfilter/nf_queue.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip6_queue"
-#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
-
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_SPINLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
- list_add_tail(&entry->list, &queue_list);
- queue_total++;
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
- int status = 0;
-
- switch(mode) {
- case IPQ_COPY_NONE:
- case IPQ_COPY_META:
- copy_mode = mode;
- copy_range = 0;
- break;
-
- case IPQ_COPY_PACKET:
- if (range > 0xFFFF)
- range = 0xFFFF;
- copy_range = range;
- copy_mode = mode;
- break;
-
- default:
- status = -EINVAL;
-
- }
- return status;
-}
-
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-
-static inline void
-__ipq_reset(void)
-{
- peer_pid = 0;
- net_disable_timestamp();
- __ipq_set_mode(IPQ_COPY_NONE, 0);
- __ipq_flush(NULL, 0);
-}
-
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
- struct nf_queue_entry *entry = NULL, *i;
-
- spin_lock_bh(&queue_lock);
-
- list_for_each_entry(i, &queue_list, list) {
- if ((unsigned long)i == id) {
- entry = i;
- break;
- }
- }
-
- if (entry) {
- list_del(&entry->list);
- queue_total--;
- }
-
- spin_unlock_bh(&queue_lock);
- return entry;
-}
-
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
- struct nf_queue_entry *entry, *next;
-
- list_for_each_entry_safe(entry, next, &queue_list, list) {
- if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
- queue_total--;
- nf_reinject(entry, NF_DROP);
- }
- }
-}
-
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
- spin_lock_bh(&queue_lock);
- __ipq_flush(cmpfn, data);
- spin_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
- sk_buff_data_t old_tail;
- size_t size = 0;
- size_t data_len = 0;
- struct sk_buff *skb;
- struct ipq_packet_msg *pmsg;
- struct nlmsghdr *nlh;
- struct timeval tv;
-
- switch (ACCESS_ONCE(copy_mode)) {
- case IPQ_COPY_META:
- case IPQ_COPY_NONE:
- size = NLMSG_SPACE(sizeof(*pmsg));
- break;
-
- case IPQ_COPY_PACKET:
- if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
- (*errp = skb_checksum_help(entry->skb)))
- return NULL;
-
- data_len = ACCESS_ONCE(copy_range);
- if (data_len == 0 || data_len > entry->skb->len)
- data_len = entry->skb->len;
-
- size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
- break;
-
- default:
- *errp = -EINVAL;
- return NULL;
- }
-
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb)
- goto nlmsg_failure;
-
- old_tail = skb->tail;
- nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
- pmsg = NLMSG_DATA(nlh);
- memset(pmsg, 0, sizeof(*pmsg));
-
- pmsg->packet_id = (unsigned long )entry;
- pmsg->data_len = data_len;
- tv = ktime_to_timeval(entry->skb->tstamp);
- pmsg->timestamp_sec = tv.tv_sec;
- pmsg->timestamp_usec = tv.tv_usec;
- pmsg->mark = entry->skb->mark;
- pmsg->hook = entry->hook;
- pmsg->hw_protocol = entry->skb->protocol;
-
- if (entry->indev)
- strcpy(pmsg->indev_name, entry->indev->name);
- else
- pmsg->indev_name[0] = '\0';
-
- if (entry->outdev)
- strcpy(pmsg->outdev_name, entry->outdev->name);
- else
- pmsg->outdev_name[0] = '\0';
-
- if (entry->indev && entry->skb->dev) {
- pmsg->hw_type = entry->skb->dev->type;
- pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
- }
-
- if (data_len)
- if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
- BUG();
-
- nlh->nlmsg_len = skb->tail - old_tail;
- return skb;
-
-nlmsg_failure:
- *errp = -EINVAL;
- printk(KERN_ERR "ip6_queue: error creating packet message\n");
- return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
- int status = -EINVAL;
- struct sk_buff *nskb;
-
- if (copy_mode == IPQ_COPY_NONE)
- return -EAGAIN;
-
- nskb = ipq_build_packet_message(entry, &status);
- if (nskb == NULL)
- return status;
-
- spin_lock_bh(&queue_lock);
-
- if (!peer_pid)
- goto err_out_free_nskb;
-
- if (queue_total >= queue_maxlen) {
- queue_dropped++;
- status = -ENOSPC;
- if (net_ratelimit())
- printk (KERN_WARNING "ip6_queue: fill at %d entries, "
- "dropping packet(s). Dropped: %d\n", queue_total,
- queue_dropped);
- goto err_out_free_nskb;
- }
-
- /* netlink_unicast will either free the nskb or attach it to a socket */
- status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
- if (status < 0) {
- queue_user_dropped++;
- goto err_out_unlock;
- }
-
- __ipq_enqueue_entry(entry);
-
- spin_unlock_bh(&queue_lock);
- return status;
-
-err_out_free_nskb:
- kfree_skb(nskb);
-
-err_out_unlock:
- spin_unlock_bh(&queue_lock);
- return status;
-}
-
-static int
-ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
- int diff;
- struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
- struct sk_buff *nskb;
-
- if (v->data_len < sizeof(*user_iph))
- return 0;
- diff = v->data_len - e->skb->len;
- if (diff < 0) {
- if (pskb_trim(e->skb, v->data_len))
- return -ENOMEM;
- } else if (diff > 0) {
- if (v->data_len > 0xFFFF)
- return -EINVAL;
- if (diff > skb_tailroom(e->skb)) {
- nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
- diff, GFP_ATOMIC);
- if (!nskb) {
- printk(KERN_WARNING "ip6_queue: OOM "
- "in mangle, dropping packet\n");
- return -ENOMEM;
- }
- kfree_skb(e->skb);
- e->skb = nskb;
- }
- skb_put(e->skb, diff);
- }
- if (!skb_make_writable(e->skb, v->data_len))
- return -ENOMEM;
- skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
- e->skb->ip_summed = CHECKSUM_NONE;
-
- return 0;
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
- struct nf_queue_entry *entry;
-
- if (vmsg->value > NF_MAX_VERDICT)
- return -EINVAL;
-
- entry = ipq_find_dequeue_entry(vmsg->id);
- if (entry == NULL)
- return -ENOENT;
- else {
- int verdict = vmsg->value;
-
- if (vmsg->data_len && vmsg->data_len == len)
- if (ipq_mangle_ipv6(vmsg, entry) < 0)
- verdict = NF_DROP;
-
- nf_reinject(entry, verdict);
- return 0;
- }
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
- int status;
-
- spin_lock_bh(&queue_lock);
- status = __ipq_set_mode(mode, range);
- spin_unlock_bh(&queue_lock);
- return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
- unsigned char type, unsigned int len)
-{
- int status = 0;
-
- if (len < sizeof(*pmsg))
- return -EINVAL;
-
- switch (type) {
- case IPQM_MODE:
- status = ipq_set_mode(pmsg->msg.mode.value,
- pmsg->msg.mode.range);
- break;
-
- case IPQM_VERDICT:
- if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
- status = -EINVAL;
- else
- status = ipq_set_verdict(&pmsg->msg.verdict,
- len - sizeof(*pmsg));
- break;
- default:
- status = -EINVAL;
- }
- return status;
-}
-
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
- if (entry->indev)
- if (entry->indev->ifindex == ifindex)
- return 1;
-
- if (entry->outdev)
- if (entry->outdev->ifindex == ifindex)
- return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (entry->skb->nf_bridge) {
- if (entry->skb->nf_bridge->physindev &&
- entry->skb->nf_bridge->physindev->ifindex == ifindex)
- return 1;
- if (entry->skb->nf_bridge->physoutdev &&
- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
- return 1;
- }
-#endif
- return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
- ipq_flush(dev_cmp, ifindex);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
- int status, type, pid, flags, nlmsglen, skblen;
- struct nlmsghdr *nlh;
-
- skblen = skb->len;
- if (skblen < sizeof(*nlh))
- return;
-
- nlh = nlmsg_hdr(skb);
- nlmsglen = nlh->nlmsg_len;
- if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
- return;
-
- pid = nlh->nlmsg_pid;
- flags = nlh->nlmsg_flags;
-
- if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
- RCV_SKB_FAIL(-EINVAL);
-
- if (flags & MSG_TRUNC)
- RCV_SKB_FAIL(-ECOMM);
-
- type = nlh->nlmsg_type;
- if (type < NLMSG_NOOP || type >= IPQM_MAX)
- RCV_SKB_FAIL(-EINVAL);
-
- if (type <= IPQM_BASE)
- return;
-
- if (security_netlink_recv(skb, CAP_NET_ADMIN))
- RCV_SKB_FAIL(-EPERM);
-
- spin_lock_bh(&queue_lock);
-
- if (peer_pid) {
- if (peer_pid != pid) {
- spin_unlock_bh(&queue_lock);
- RCV_SKB_FAIL(-EBUSY);
- }
- } else {
- net_enable_timestamp();
- peer_pid = pid;
- }
-
- spin_unlock_bh(&queue_lock);
-
- status = ipq_receive_peer(NLMSG_DATA(nlh), type,
- nlmsglen - NLMSG_LENGTH(0));
- if (status < 0)
- RCV_SKB_FAIL(status);
-
- if (flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
-}
-
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
- mutex_lock(&ipqnl_mutex);
- __ipq_rcv_skb(skb);
- mutex_unlock(&ipqnl_mutex);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = ptr;
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- /* Drop any packets associated with the downed device */
- if (event == NETDEV_DOWN)
- ipq_dev_drop(dev->ifindex);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
- .notifier_call = ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct netlink_notify *n = ptr;
-
- if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
- spin_lock_bh(&queue_lock);
- if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
- __ipq_reset();
- spin_unlock_bh(&queue_lock);
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
- .notifier_call = ipq_rcv_nl_event,
-};
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
- {
- .procname = NET_IPQ_QMAX_NAME,
- .data = &queue_maxlen,
- .maxlen = sizeof(queue_maxlen),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- { }
-};
-#endif
-
-#ifdef CONFIG_PROC_FS
-static int ip6_queue_show(struct seq_file *m, void *v)
-{
- spin_lock_bh(&queue_lock);
-
- seq_printf(m,
- "Peer PID : %d\n"
- "Copy mode : %hu\n"
- "Copy range : %u\n"
- "Queue length : %u\n"
- "Queue max. length : %u\n"
- "Queue dropped : %u\n"
- "Netfilter dropped : %u\n",
- peer_pid,
- copy_mode,
- copy_range,
- queue_total,
- queue_maxlen,
- queue_dropped,
- queue_user_dropped);
-
- spin_unlock_bh(&queue_lock);
- return 0;
-}
-
-static int ip6_queue_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ip6_queue_show, NULL);
-}
-
-static const struct file_operations ip6_queue_proc_fops = {
- .open = ip6_queue_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .owner = THIS_MODULE,
-};
-#endif
-
-static const struct nf_queue_handler nfqh = {
- .name = "ip6_queue",
- .outfn = &ipq_enqueue_packet,
-};
-
-static int __init ip6_queue_init(void)
-{
- int status = -ENOMEM;
- struct proc_dir_entry *proc __maybe_unused;
-
- netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
- ipq_rcv_skb, NULL, THIS_MODULE);
- if (ipqnl == NULL) {
- printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
- goto cleanup_netlink_notifier;
- }
-
-#ifdef CONFIG_PROC_FS
- proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
- &ip6_queue_proc_fops);
- if (!proc) {
- printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
- goto cleanup_ipqnl;
- }
-#endif
- register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
- ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
-#endif
- status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
- if (status < 0) {
- printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
- goto cleanup_sysctl;
- }
- return status;
-
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ipq_sysctl_header);
-#endif
- unregister_netdevice_notifier(&ipq_dev_notifier);
- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-cleanup_ipqnl: __maybe_unused
- netlink_kernel_release(ipqnl);
- mutex_lock(&ipqnl_mutex);
- mutex_unlock(&ipqnl_mutex);
-
-cleanup_netlink_notifier:
- netlink_unregister_notifier(&ipq_nl_notifier);
- return status;
-}
-
-static void __exit ip6_queue_fini(void)
-{
- nf_unregister_queue_handlers(&nfqh);
-
- ipq_flush(NULL, 0);
-
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ipq_sysctl_header);
-#endif
- unregister_netdevice_notifier(&ipq_dev_notifier);
- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
- netlink_kernel_release(ipqnl);
- mutex_lock(&ipqnl_mutex);
- mutex_unlock(&ipqnl_mutex);
-
- netlink_unregister_notifier(&ipq_nl_notifier);
-}
-
-MODULE_DESCRIPTION("IPv6 packet queue handler");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
-
-module_init(ip6_queue_init);
-module_exit(ip6_queue_fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 455582384ec..e080fbbbc0e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -78,19 +79,6 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
Hence the start of any table is given by get_table() below. */
-/* Check for an extension */
-int
-ip6t_ext_hdr(u8 nexthdr)
-{
- return (nexthdr == IPPROTO_HOPOPTS) ||
- (nexthdr == IPPROTO_ROUTING) ||
- (nexthdr == IPPROTO_FRAGMENT) ||
- (nexthdr == IPPROTO_ESP) ||
- (nexthdr == IPPROTO_AH) ||
- (nexthdr == IPPROTO_NONE) ||
- (nexthdr == IPPROTO_DSTOPTS);
-}
-
/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
static inline bool
@@ -146,7 +134,7 @@ ip6_packet_match(const struct sk_buff *skb,
int protohdr;
unsigned short _frag_off;
- protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
+ protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL);
if (protohdr < 0) {
if (_frag_off == 0)
*hotdrop = true;
@@ -194,8 +182,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
static unsigned int
ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
{
- if (net_ratelimit())
- pr_info("error: `%s'\n", (const char *)par->targinfo);
+ net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
return NF_DROP;
}
@@ -221,8 +208,7 @@ ip6t_get_target_c(const struct ip6t_entry *e)
return ip6t_get_target((struct ip6t_entry *)e);
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* This cries for unification! */
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
@@ -299,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
@@ -311,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
@@ -340,6 +327,7 @@ ip6t_do_table(struct sk_buff *skb,
unsigned int *stackptr, origptr, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
+ unsigned int addend;
/* Initialization */
indev = in ? in->name : nulldevname;
@@ -358,8 +346,14 @@ ip6t_do_table(struct sk_buff *skb,
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- xt_info_rdlock_bh();
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -373,6 +367,7 @@ ip6t_do_table(struct sk_buff *skb,
const struct xt_entry_match *ematch;
IP_NF_ASSERT(e);
+ acpar.thoff = 0;
if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
&acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
no_match:
@@ -392,8 +387,7 @@ ip6t_do_table(struct sk_buff *skb,
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out,
@@ -407,10 +401,10 @@ ip6t_do_table(struct sk_buff *skb,
if (v < 0) {
/* Pop from stack? */
if (v != XT_RETURN) {
- verdict = (unsigned)(-v) - 1;
+ verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr == 0)
+ if (*stackptr <= origptr)
e = get_entry(table_base,
private->underflow[hook]);
else
@@ -441,9 +435,11 @@ ip6t_do_table(struct sk_buff *skb,
break;
} while (!acpar.hotdrop);
- xt_info_rdunlock_bh();
*stackptr = origptr;
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
#else
@@ -897,42 +893,25 @@ get_counters(const struct xt_table_info *t,
struct ip6t_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +924,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc(countersize);
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
@@ -1093,6 +1072,7 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(AF_INET6, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1125,7 +1105,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
"ip6table_%s", name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1184,7 +1164,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
}
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
@@ -1216,7 +1196,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1224,7 +1204,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
"ip6table_%s", name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1261,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -1291,6 +1273,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
@@ -1340,6 +1323,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
int ret = 0;
const void *loc_cpu_entry;
struct ip6t_entry *iter;
+ unsigned int addend;
#ifdef CONFIG_COMPAT
struct compat_xt_counters_info compat_tmp;
@@ -1380,7 +1364,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
}
t = xt_find_table_lock(net, AF_INET6, name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
@@ -1396,13 +1380,13 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
i = 0;
/* Choose the copy that is on our node */
curcpu = smp_processor_id();
- xt_info_wrlock(curcpu);
+ addend = xt_write_recseq_begin();
loc_cpu_entry = private->entries[curcpu];
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
- xt_info_wrunlock(curcpu);
+ xt_write_recseq_end(addend);
unlock_up_free:
local_bh_enable();
@@ -1593,7 +1577,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
- struct xt_target *target;
struct ip6t_entry *de;
unsigned int origsize;
int ret, h;
@@ -1615,7 +1598,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
}
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ip6t_get_target(e);
- target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
@@ -1696,6 +1678,7 @@ translate_compat_table(struct net *net,
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
+ xt_compat_init_offsets(AF_INET6, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
@@ -1837,6 +1820,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
@@ -1879,7 +1863,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1964,7 +1948,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
@@ -1994,7 +1978,7 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2016,7 +2000,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2041,7 +2025,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2066,6 +2050,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EFAULT;
break;
}
+ rev.name[sizeof(rev.name)-1] = 0;
if (cmd == IP6T_SO_GET_REVISION_TARGET)
target = 1;
@@ -2260,7 +2245,7 @@ static int __init ip6_tables_init(void)
if (ret < 0)
goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
+ /* No one else will be downing sem now, so we won't sleep */
ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
if (ret < 0)
goto err2;
@@ -2295,88 +2280,9 @@ static void __exit ip6_tables_fini(void)
unregister_pernet_subsys(&ip6_tables_net_ops);
}
-/*
- * find the offset to specified header or the protocol number of last header
- * if target < 0. "last header" is transport protocol header, ESP, or
- * "No next header".
- *
- * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
- *
- * If the first fragment doesn't contain the final protocol header or
- * NEXTHDR_NONE it is considered invalid.
- *
- * Note that non-1st fragment is special case that "the protocol number
- * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
- *
- */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
- int target, unsigned short *fragoff)
-{
- unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len = skb->len - start;
-
- if (fragoff)
- *fragoff = 0;
-
- while (nexthdr != target) {
- struct ipv6_opt_hdr _hdr, *hp;
- unsigned int hdrlen;
-
- if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
- break;
- return -ENOENT;
- }
-
- hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return -EBADMSG;
- if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short _frag_off;
- __be16 *fp;
- fp = skb_header_pointer(skb,
- start+offsetof(struct frag_hdr,
- frag_off),
- sizeof(_frag_off),
- &_frag_off);
- if (fp == NULL)
- return -EBADMSG;
-
- _frag_off = ntohs(*fp) & ~0x7;
- if (_frag_off) {
- if (target < 0 &&
- ((!ipv6_ext_hdr(hp->nexthdr)) ||
- hp->nexthdr == NEXTHDR_NONE)) {
- if (fragoff)
- *fragoff = _frag_off;
- return hp->nexthdr;
- }
- return -ENOENT;
- }
- hdrlen = 8;
- } else if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hp->hdrlen + 2) << 2;
- else
- hdrlen = ipv6_optlen(hp);
-
- nexthdr = hp->nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- *offset = start;
- return nexthdr;
-}
-
EXPORT_SYMBOL(ip6t_register_table);
EXPORT_SYMBOL(ip6t_unregister_table);
EXPORT_SYMBOL(ip6t_do_table);
-EXPORT_SYMBOL(ip6t_ext_hdr);
-EXPORT_SYMBOL(ipv6_find_hdr);
module_init(ip6_tables_init);
module_exit(ip6_tables_fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
deleted file mode 100644
index 09c88891a75..00000000000
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * This is a module which is used for logging packets.
- */
-
-/* (C) 2001 Jan Rekorajski <baggins@pld.org.pl>
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <linux/spinlock.h>
-#include <linux/icmpv6.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netfilter/xt_log.h>
-
-MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
-MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
-MODULE_LICENSE("GPL");
-
-struct in_device;
-#include <net/route.h>
-#include <linux/netfilter_ipv6/ip6t_LOG.h>
-
-/* One level of recursion won't kill us */
-static void dump_packet(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb, unsigned int ip6hoff,
- int recurse)
-{
- u_int8_t currenthdr;
- int fragment;
- struct ipv6hdr _ip6h;
- const struct ipv6hdr *ih;
- unsigned int ptr;
- unsigned int hdrlen = 0;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
- if (ih == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
- sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
-
- /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
- sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
- ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
- (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
- ih->hop_limit,
- (ntohl(*(__be32 *)ih) & 0x000fffff));
-
- fragment = 0;
- ptr = ip6hoff + sizeof(struct ipv6hdr);
- currenthdr = ih->nexthdr;
- while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
- struct ipv6_opt_hdr _hdr;
- const struct ipv6_opt_hdr *hp;
-
- hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 48 "OPT (...) " */
- if (logflags & IP6T_LOG_IPOPT)
- sb_add(m, "OPT ( ");
-
- switch (currenthdr) {
- case IPPROTO_FRAGMENT: {
- struct frag_hdr _fhdr;
- const struct frag_hdr *fh;
-
- sb_add(m, "FRAG:");
- fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
- &_fhdr);
- if (fh == NULL) {
- sb_add(m, "TRUNCATED ");
- return;
- }
-
- /* Max length: 6 "65535 " */
- sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
-
- /* Max length: 11 "INCOMPLETE " */
- if (fh->frag_off & htons(0x0001))
- sb_add(m, "INCOMPLETE ");
-
- sb_add(m, "ID:%08x ", ntohl(fh->identification));
-
- if (ntohs(fh->frag_off) & 0xFFF8)
- fragment = 1;
-
- hdrlen = 8;
-
- break;
- }
- case IPPROTO_DSTOPTS:
- case IPPROTO_ROUTING:
- case IPPROTO_HOPOPTS:
- if (fragment) {
- if (logflags & IP6T_LOG_IPOPT)
- sb_add(m, ")");
- return;
- }
- hdrlen = ipv6_optlen(hp);
- break;
- /* Max Length */
- case IPPROTO_AH:
- if (logflags & IP6T_LOG_IPOPT) {
- struct ip_auth_hdr _ahdr;
- const struct ip_auth_hdr *ah;
-
- /* Max length: 3 "AH " */
- sb_add(m, "AH ");
-
- if (fragment) {
- sb_add(m, ")");
- return;
- }
-
- ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
- &_ahdr);
- if (ah == NULL) {
- /*
- * Max length: 26 "INCOMPLETE [65535
- * bytes] )"
- */
- sb_add(m, "INCOMPLETE [%u bytes] )",
- skb->len - ptr);
- return;
- }
-
- /* Length: 15 "SPI=0xF1234567 */
- sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
-
- }
-
- hdrlen = (hp->hdrlen+2)<<2;
- break;
- case IPPROTO_ESP:
- if (logflags & IP6T_LOG_IPOPT) {
- struct ip_esp_hdr _esph;
- const struct ip_esp_hdr *eh;
-
- /* Max length: 4 "ESP " */
- sb_add(m, "ESP ");
-
- if (fragment) {
- sb_add(m, ")");
- return;
- }
-
- /*
- * Max length: 26 "INCOMPLETE [65535 bytes] )"
- */
- eh = skb_header_pointer(skb, ptr, sizeof(_esph),
- &_esph);
- if (eh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] )",
- skb->len - ptr);
- return;
- }
-
- /* Length: 16 "SPI=0xF1234567 )" */
- sb_add(m, "SPI=0x%x )", ntohl(eh->spi) );
-
- }
- return;
- default:
- /* Max length: 20 "Unknown Ext Hdr 255" */
- sb_add(m, "Unknown Ext Hdr %u", currenthdr);
- return;
- }
- if (logflags & IP6T_LOG_IPOPT)
- sb_add(m, ") ");
-
- currenthdr = hp->nexthdr;
- ptr += hdrlen;
- }
-
- switch (currenthdr) {
- case IPPROTO_TCP: {
- struct tcphdr _tcph;
- const struct tcphdr *th;
-
- /* Max length: 10 "PROTO=TCP " */
- sb_add(m, "PROTO=TCP ");
-
- if (fragment)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
- if (th == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
- return;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- sb_add(m, "SPT=%u DPT=%u ",
- ntohs(th->source), ntohs(th->dest));
- /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
- if (logflags & IP6T_LOG_TCPSEQ)
- sb_add(m, "SEQ=%u ACK=%u ",
- ntohl(th->seq), ntohl(th->ack_seq));
- /* Max length: 13 "WINDOW=65535 " */
- sb_add(m, "WINDOW=%u ", ntohs(th->window));
- /* Max length: 9 "RES=0x3C " */
- sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
- /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
- if (th->cwr)
- sb_add(m, "CWR ");
- if (th->ece)
- sb_add(m, "ECE ");
- if (th->urg)
- sb_add(m, "URG ");
- if (th->ack)
- sb_add(m, "ACK ");
- if (th->psh)
- sb_add(m, "PSH ");
- if (th->rst)
- sb_add(m, "RST ");
- if (th->syn)
- sb_add(m, "SYN ");
- if (th->fin)
- sb_add(m, "FIN ");
- /* Max length: 11 "URGP=65535 " */
- sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
-
- if ((logflags & IP6T_LOG_TCPOPT) &&
- th->doff * 4 > sizeof(struct tcphdr)) {
- u_int8_t _opt[60 - sizeof(struct tcphdr)];
- const u_int8_t *op;
- unsigned int i;
- unsigned int optsize = th->doff * 4
- - sizeof(struct tcphdr);
-
- op = skb_header_pointer(skb,
- ptr + sizeof(struct tcphdr),
- optsize, _opt);
- if (op == NULL) {
- sb_add(m, "OPT (TRUNCATED)");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- sb_add(m, "OPT (");
- for (i =0; i < optsize; i++)
- sb_add(m, "%02X", op[i]);
- sb_add(m, ") ");
- }
- break;
- }
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE: {
- struct udphdr _udph;
- const struct udphdr *uh;
-
- if (currenthdr == IPPROTO_UDP)
- /* Max length: 10 "PROTO=UDP " */
- sb_add(m, "PROTO=UDP " );
- else /* Max length: 14 "PROTO=UDPLITE " */
- sb_add(m, "PROTO=UDPLITE ");
-
- if (fragment)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
- if (uh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
- return;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- sb_add(m, "SPT=%u DPT=%u LEN=%u ",
- ntohs(uh->source), ntohs(uh->dest),
- ntohs(uh->len));
- break;
- }
- case IPPROTO_ICMPV6: {
- struct icmp6hdr _icmp6h;
- const struct icmp6hdr *ic;
-
- /* Max length: 13 "PROTO=ICMPv6 " */
- sb_add(m, "PROTO=ICMPv6 ");
-
- if (fragment)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
- if (ic == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
- return;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
-
- switch (ic->icmp6_type) {
- case ICMPV6_ECHO_REQUEST:
- case ICMPV6_ECHO_REPLY:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- sb_add(m, "ID=%u SEQ=%u ",
- ntohs(ic->icmp6_identifier),
- ntohs(ic->icmp6_sequence));
- break;
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- break;
-
- case ICMPV6_PARAMPROB:
- /* Max length: 17 "POINTER=ffffffff " */
- sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
- /* Fall through */
- case ICMPV6_DEST_UNREACH:
- case ICMPV6_PKT_TOOBIG:
- case ICMPV6_TIME_EXCEED:
- /* Max length: 3+maxlen */
- if (recurse) {
- sb_add(m, "[");
- dump_packet(m, info, skb,
- ptr + sizeof(_icmp6h), 0);
- sb_add(m, "] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
- sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
- }
- break;
- }
- /* Max length: 10 "PROTO=255 " */
- default:
- sb_add(m, "PROTO=%u ", currenthdr);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- sb_add(m, "UID=%u GID=%u ",
- skb->sk->sk_socket->file->f_cred->fsuid,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
- }
-
- /* Max length: 16 "MARK=0xFFFFFFFF " */
- if (!recurse && skb->mark)
- sb_add(m, "MARK=0x%x ", skb->mark);
-}
-
-static void dump_mac_header(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & IP6T_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- sb_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT &&
- (p -= ETH_HLEN) < skb->head)
- p = NULL;
-
- if (p != NULL) {
- sb_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- sb_add(m, ":%02x", p[i]);
- }
- sb_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
- }
- } else
- sb_add(m, " ");
-}
-
-static struct nf_loginfo default_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 5,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void
-ip6t_log_packet(u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- struct sbuff *m = sb_open();
-
- if (!loginfo)
- loginfo = &default_loginfo;
-
- sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
- prefix,
- in ? in->name : "",
- out ? out->name : "");
-
- /* MAC logging for input path only. */
- if (in && !out)
- dump_mac_header(m, loginfo, skb);
-
- dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
-
- sb_close(m);
-}
-
-static unsigned int
-log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
- const struct ip6t_log_info *loginfo = par->targinfo;
- struct nf_loginfo li;
-
- li.type = NF_LOG_TYPE_LOG;
- li.u.log.level = loginfo->level;
- li.u.log.logflags = loginfo->logflags;
-
- ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out,
- &li, loginfo->prefix);
- return XT_CONTINUE;
-}
-
-
-static int log_tg6_check(const struct xt_tgchk_param *par)
-{
- const struct ip6t_log_info *loginfo = par->targinfo;
-
- if (loginfo->level >= 8) {
- pr_debug("level %u >= 8\n", loginfo->level);
- return -EINVAL;
- }
- if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
- pr_debug("prefix not null-terminated\n");
- return -EINVAL;
- }
- return 0;
-}
-
-static struct xt_target log_tg6_reg __read_mostly = {
- .name = "LOG",
- .family = NFPROTO_IPV6,
- .target = log_tg6,
- .targetsize = sizeof(struct ip6t_log_info),
- .checkentry = log_tg6_check,
- .me = THIS_MODULE,
-};
-
-static struct nf_logger ip6t_logger __read_mostly = {
- .name = "ip6t_LOG",
- .logfn = &ip6t_log_packet,
- .me = THIS_MODULE,
-};
-
-static int __init log_tg6_init(void)
-{
- int ret;
-
- ret = xt_register_target(&log_tg6_reg);
- if (ret < 0)
- return ret;
- nf_log_register(NFPROTO_IPV6, &ip6t_logger);
- return 0;
-}
-
-static void __exit log_tg6_exit(void)
-{
- nf_log_unregister(&ip6t_logger);
- xt_unregister_target(&log_tg6_reg);
-}
-
-module_init(log_tg6_init);
-module_exit(log_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 00000000000..3e4e92d5e15
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr src;
+ struct nf_conn *ct;
+ struct nf_nat_range newrange;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ return NF_DROP;
+
+ nfct_nat(ct)->masq_index = par->out->ifindex;
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = src;
+ newrange.max_addr.in6 = src;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+
+ if (range->flags & NF_NAT_RANGE_MAP_IPS)
+ return -EINVAL;
+ return 0;
+}
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, ifa->idev->dev);
+ return masq_device_event(this, event, &info);
+}
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static struct xt_target masquerade_tg6_reg __read_mostly = {
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV6,
+ .checkentry = masquerade_tg6_checkentry,
+ .target = masquerade_tg6,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .me = THIS_MODULE,
+};
+
+static int __init masquerade_tg6_init(void)
+{
+ int err;
+
+ err = xt_register_target(&masquerade_tg6_reg);
+ if (err == 0) {
+ register_netdevice_notifier(&masq_dev_notifier);
+ register_inet6addr_notifier(&masq_inet_notifier);
+ }
+
+ return err;
+}
+static void __exit masquerade_tg6_exit(void)
+{
+ unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ xt_unregister_target(&masquerade_tg6_reg);
+}
+
+module_init(masquerade_tg6_init);
+module_exit(masquerade_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 00000000000..590f767db5d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6t_NPT.h>
+#include <linux/netfilter/x_tables.h>
+
+static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
+{
+ struct ip6t_npt_tginfo *npt = par->targinfo;
+ struct in6_addr pfx;
+ __wsum src_sum, dst_sum;
+
+ if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
+ return -EINVAL;
+
+ /* Ensure that LSB of prefix is zero */
+ ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len);
+ if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6))
+ return -EINVAL;
+ ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len);
+ if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
+ return -EINVAL;
+
+ src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
+ dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
+
+ npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
+ return 0;
+}
+
+static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
+ struct in6_addr *addr)
+{
+ unsigned int pfx_len;
+ unsigned int i, idx;
+ __be32 mask;
+ __sum16 sum;
+
+ pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
+ for (i = 0; i < pfx_len; i += 32) {
+ if (pfx_len - i >= 32)
+ mask = 0;
+ else
+ mask = htonl((1 << (i - pfx_len + 32)) - 1);
+
+ idx = i / 32;
+ addr->s6_addr32[idx] &= mask;
+ addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx];
+ }
+
+ if (pfx_len <= 48)
+ idx = 3;
+ else {
+ for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
+ if ((__force __sum16)addr->s6_addr16[idx] !=
+ CSUM_MANGLED_0)
+ break;
+ }
+ if (idx == ARRAY_SIZE(addr->s6_addr16))
+ return false;
+ }
+
+ sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]),
+ csum_unfold(npt->adjustment)));
+ if (sum == CSUM_MANGLED_0)
+ sum = 0;
+ *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
+
+ return true;
+}
+
+static unsigned int
+ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+ if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, saddr));
+ return NF_DROP;
+ }
+ return XT_CONTINUE;
+}
+
+static unsigned int
+ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+ if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, daddr));
+ return NF_DROP;
+ }
+ return XT_CONTINUE;
+}
+
+static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
+ {
+ .name = "SNPT",
+ .table = "mangle",
+ .target = ip6t_snpt_tg,
+ .targetsize = sizeof(struct ip6t_npt_tginfo),
+ .checkentry = ip6t_npt_checkentry,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DNPT",
+ .table = "mangle",
+ .target = ip6t_dnpt_tg,
+ .targetsize = sizeof(struct ip6t_npt_tginfo),
+ .checkentry = ip6t_npt_checkentry,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init ip6t_npt_init(void)
+{
+ return xt_register_targets(ip6t_npt_target_reg,
+ ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+static void __exit ip6t_npt_exit(void)
+{
+ xt_unregister_targets(ip6t_npt_target_reg,
+ ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+module_init(ip6t_npt_init);
+module_exit(ip6t_npt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_SNPT");
+MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 2933396e028..544b0a9da1b 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -7,6 +7,8 @@
* Authors:
* Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
*
+ * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
+ *
* Based on net/ipv4/netfilter/ipt_REJECT.c
*
* This program is free software; you can redistribute it and/or
@@ -21,160 +23,18 @@
#include <linux/skbuff.h>
#include <linux/icmpv6.h>
#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/tcp.h>
#include <net/icmp.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
#include <net/flow.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_ipv6/ip6t_REJECT.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
-/* Send RST reply */
-static void send_reset(struct net *net, struct sk_buff *oldskb)
-{
- struct sk_buff *nskb;
- struct tcphdr otcph, *tcph;
- unsigned int otcplen, hh_len;
- int tcphoff, needs_ack;
- const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
- struct ipv6hdr *ip6h;
- struct dst_entry *dst = NULL;
- u8 proto;
- struct flowi fl;
-
- if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
- (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
- pr_debug("addr is not unicast.\n");
- return;
- }
-
- proto = oip6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
-
- if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
- pr_debug("Cannot get TCP header.\n");
- return;
- }
-
- otcplen = oldskb->len - tcphoff;
-
- /* IP header checks: fragment, too short. */
- if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
- pr_debug("proto(%d) != IPPROTO_TCP, "
- "or too short. otcplen = %d\n",
- proto, otcplen);
- return;
- }
-
- if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
- BUG();
-
- /* No RST for RST. */
- if (otcph.rst) {
- pr_debug("RST is set\n");
- return;
- }
-
- /* Check checksum. */
- if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
- skb_checksum(oldskb, tcphoff, otcplen, 0))) {
- pr_debug("TCP checksum is invalid\n");
- return;
- }
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
- ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
- fl.fl_ip_sport = otcph.dest;
- fl.fl_ip_dport = otcph.source;
- security_skb_classify_flow(oldskb, &fl);
- dst = ip6_route_output(net, NULL, &fl);
- if (dst == NULL || dst->error) {
- dst_release(dst);
- return;
- }
- if (xfrm_lookup(net, &dst, &fl, NULL, 0))
- return;
-
- hh_len = (dst->dev->hard_header_len + 15)&~15;
- nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
- + sizeof(struct tcphdr) + dst->trailer_len,
- GFP_ATOMIC);
-
- if (!nskb) {
- if (net_ratelimit())
- pr_debug("cannot alloc skb\n");
- dst_release(dst);
- return;
- }
-
- skb_dst_set(nskb, dst);
-
- skb_reserve(nskb, hh_len + dst->header_len);
-
- skb_put(nskb, sizeof(struct ipv6hdr));
- skb_reset_network_header(nskb);
- ip6h = ipv6_hdr(nskb);
- ip6h->version = 6;
- ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
- ip6h->nexthdr = IPPROTO_TCP;
- ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
- ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
-
- tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
- /* Truncate to length (no data) */
- tcph->doff = sizeof(struct tcphdr)/4;
- tcph->source = otcph.dest;
- tcph->dest = otcph.source;
-
- if (otcph.ack) {
- needs_ack = 0;
- tcph->seq = otcph.ack_seq;
- tcph->ack_seq = 0;
- } else {
- needs_ack = 1;
- tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
- + otcplen - (otcph.doff<<2));
- tcph->seq = 0;
- }
-
- /* Reset flags */
- ((u_int8_t *)tcph)[13] = 0;
- tcph->rst = 1;
- tcph->ack = needs_ack;
- tcph->window = 0;
- tcph->urg_ptr = 0;
- tcph->check = 0;
-
- /* Adjust TCP checksum */
- tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
- &ipv6_hdr(nskb)->daddr,
- sizeof(struct tcphdr), IPPROTO_TCP,
- csum_partial(tcph,
- sizeof(struct tcphdr), 0));
-
- nf_ct_attach(nskb, oldskb);
-
- ip6_local_out(nskb);
-}
-
-static inline void
-send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
- unsigned int hooknum)
-{
- if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
- skb_in->dev = net->loopback_dev;
-
- icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
-}
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
@@ -185,29 +45,28 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
- send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
break;
case IP6T_ICMP6_ADM_PROHIBITED:
- send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
break;
case IP6T_ICMP6_NOT_NEIGHBOUR:
- send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
break;
case IP6T_ICMP6_ADDR_UNREACH:
- send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
break;
case IP6T_ICMP6_PORT_UNREACH:
- send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
break;
case IP6T_ICMP6_ECHOREPLY:
/* Do nothing */
break;
case IP6T_TCP_RESET:
- send_reset(net, skb);
+ nf_send_reset6(net, skb, par->hooknum);
break;
default:
- if (net_ratelimit())
- pr_info("case %u not handled yet\n", reject->with);
+ net_info_ratelimited("case %u not handled yet\n", reject->with);
break;
}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 00000000000..a0d17270117
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct ipv6hdr *
+synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ struct ipv6hdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
+ ip6_flow_hdr(iph, 0, 0);
+ iph->hop_limit = 64; //XXX
+ iph->nexthdr = IPPROTO_TCP;
+ iph->saddr = *saddr;
+ iph->daddr = *daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct ipv6hdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ struct net *net = nf_ct_net((struct nf_conn *)nfct);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = niph->saddr;
+ fl6.daddr = niph->daddr;
+ fl6.fl6_sport = nth->source;
+ fl6.fl6_dport = nth->dest;
+ security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == NULL || dst->error) {
+ dst_release(dst);
+ goto free_nskb;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ goto free_nskb;
+
+ skb_dst_set(nskb, dst);
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip6_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ __be16 frag_off;
+ u8 nexthdr;
+ int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (thoff < 0)
+ return NF_ACCEPT;
+
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (!(e->ipv6.flags & IP6T_F_PROTO) ||
+ e->ipv6.proto != IPPROTO_TCP ||
+ e->ipv6.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg6_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
+ .target = synproxy_tg6,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg6_check,
+ .destroy = synproxy_tg6_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg6_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg6_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg6_exit(void)
+{
+ xt_unregister_target(&synproxy_tg6_reg);
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+}
+
+module_init(synproxy_tg6_init);
+module_exit(synproxy_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 89cccc5a9c9..04099ab7d2e 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -41,11 +41,11 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
struct ip_auth_hdr _ah;
const struct ip_auth_hdr *ah;
const struct ip6t_ah *ahinfo = par->matchinfo;
- unsigned int ptr;
+ unsigned int ptr = 0;
unsigned int hdrlen = 0;
int err;
- err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL, NULL);
if (err < 0) {
if (err != -ENOENT)
par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index eda898fda6c..3b5735e56bf 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -40,10 +40,10 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
struct frag_hdr _frag;
const struct frag_hdr *fh;
const struct ip6t_frag *fraginfo = par->matchinfo;
- unsigned int ptr;
+ unsigned int ptr = 0;
int err;
- err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL);
if (err < 0) {
if (err != -ENOENT)
par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 59df051eaef..01df142bb02 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -50,7 +50,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
const struct ipv6_opt_hdr *oh;
const struct ip6t_opts *optinfo = par->matchinfo;
unsigned int temp;
- unsigned int ptr;
+ unsigned int ptr = 0;
unsigned int hdrlen = 0;
bool ret = false;
u8 _opttype;
@@ -62,7 +62,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
err = ipv6_find_hdr(skb, &ptr,
(par->match == &hbh_mt6_reg[0]) ?
- NEXTHDR_HOP : NEXTHDR_DEST, NULL);
+ NEXTHDR_HOP : NEXTHDR_DEST, NULL, NULL);
if (err < 0) {
if (err != -ENOENT)
par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
new file mode 100644
index 00000000000..790e0c6b19e
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/route.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match");
+
+static bool rpfilter_addr_unicast(const struct in6_addr *addr)
+{
+ int addr_type = ipv6_addr_type(addr);
+ return addr_type & IPV6_ADDR_UNICAST;
+}
+
+static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+ const struct net_device *dev, u8 flags)
+{
+ struct rt6_info *rt;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ bool ret = false;
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+ .flowi6_proto = iph->nexthdr,
+ .daddr = iph->saddr,
+ };
+ int lookup_flags;
+
+ if (rpfilter_addr_unicast(&iph->daddr)) {
+ memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr));
+ lookup_flags = RT6_LOOKUP_F_HAS_SADDR;
+ } else {
+ lookup_flags = 0;
+ }
+
+ fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ if ((flags & XT_RPFILTER_LOOSE) == 0) {
+ fl6.flowi6_oif = dev->ifindex;
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ }
+
+ rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+ if (rt->dst.error)
+ goto out;
+
+ if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST))
+ goto out;
+
+ if (rt->rt6i_flags & RTF_LOCAL) {
+ ret = flags & XT_RPFILTER_ACCEPT_LOCAL;
+ goto out;
+ }
+
+ if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ ret = true;
+ out:
+ ip6_rt_put(rt);
+ return ret;
+}
+
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+ const struct rt6_info *rt = (const void *) skb_dst(skb);
+ return rt && (rt->rt6i_flags & RTF_LOCAL);
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ int saddrtype;
+ struct ipv6hdr *iph;
+ bool invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (rpfilter_is_local(skb))
+ return true ^ invert;
+
+ iph = ipv6_hdr(skb);
+ saddrtype = ipv6_addr_type(&iph->saddr);
+ if (unlikely(saddrtype == IPV6_ADDR_ANY))
+ return true ^ invert; /* not routable: forward path will drop it */
+
+ return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV6,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index d8488c50a8e..2c99b94eeca 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -42,14 +42,14 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
const struct ipv6_rt_hdr *rh;
const struct ip6t_rt *rtinfo = par->matchinfo;
unsigned int temp;
- unsigned int ptr;
+ unsigned int ptr = 0;
unsigned int hdrlen = 0;
bool ret = false;
struct in6_addr _addr;
const struct in6_addr *ap;
int err;
- err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL, NULL);
if (err < 0) {
if (err != -ENOENT)
par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index c9e37c8fd62..ca7f6c12808 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,19 +32,20 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = true;
module_param(forward, bool, 0000);
static int __net_init ip6table_filter_net_init(struct net *net)
@@ -56,14 +57,12 @@ static int __net_init ip6table_filter_net_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ip6t_standard *)repl->entries)[1].target.verdict =
- -forward - 1;
+ forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
net->ipv6.ip6table_filter =
ip6t_register_table(net, &packet_filter, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_filter))
- return PTR_ERR(net->ipv6.ip6table_filter);
- return 0;
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
@@ -80,11 +79,6 @@ static int __init ip6table_filter_init(void)
{
int ret;
- if (forward < 0 || forward > NF_MAX_VERDICT) {
- pr_err("iptables forward must be 0 or 1\n");
- return -EINVAL;
- }
-
ret = register_pernet_subsys(&ip6table_filter_net_ops);
if (ret < 0)
return ret;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 679a0a3b7b3..307bbb782d1 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/slab.h>
+#include <net/ipv6.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -37,13 +38,12 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
-
+ int err;
#if 0
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)) {
- if (net_ratelimit())
- pr_warning("ip6t_hook: happy cracking.\n");
+ net_warn_ratelimited("ip6t_hook: happy cracking\n");
return NF_ACCEPT;
}
#endif
@@ -61,28 +61,32 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
dev_net(out)->ipv6.ip6table_mangle);
if (ret != NF_DROP && ret != NF_STOLEN &&
- (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
- memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
+ !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||
- ipv6_hdr(skb)->hop_limit != hop_limit))
- return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+ err = ip6_route_me_harder(skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
return ret;
}
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv6.ip6table_mangle);
/* INPUT/FORWARD */
- return ip6t_do_table(skb, hook, in, out,
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv6.ip6table_mangle);
}
@@ -97,9 +101,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
net->ipv6.ip6table_mangle =
ip6t_register_table(net, &packet_mangler, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_mangle))
- return PTR_ERR(net->ipv6.ip6table_mangle);
- return 0;
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 00000000000..387d8b8fc18
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
+ * funded by Astaro.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv6_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ */
+ struct nf_nat_range range;
+
+ range.flags = 0;
+ pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
+
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ unsigned int ret;
+
+ ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ ret = alloc_null_binding(ct, hooknum);
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+
+static unsigned int
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ err = ip6_route_me_harder(skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_out,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_local_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+};
+
+static int __net_init ip6table_nat_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
+}
+
+static void __net_exit ip6table_nat_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+}
+
+static struct pernet_operations ip6table_nat_net_ops = {
+ .init = ip6table_nat_net_init,
+ .exit = ip6table_nat_net_exit,
+};
+
+static int __init ip6table_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&ip6table_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+err1:
+ return err;
+}
+
+static void __exit ip6table_nat_exit(void)
+{
+ nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+}
+
+module_init(ip6table_nat_init);
+module_exit(ip6table_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5b9926a011b..5274740acec 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
@@ -40,9 +41,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
net->ipv6.ip6table_raw =
ip6t_register_table(net, &packet_raw, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_raw))
- return PTR_ERR(net->ipv6.ip6table_raw);
- return 0;
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 91aa2b4d83c..ab3b0219ecf 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@ static const struct xt_table security_table = {
};
static unsigned int
-ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -58,10 +59,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
net->ipv6.ip6table_security =
ip6t_register_table(net, &security_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_security))
- return PTR_ERR(net->ipv6.ip6table_security);
-
- return 0;
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
}
static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c8af58b2256..4cbc6b290dd 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -21,13 +21,16 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
@@ -64,167 +67,118 @@ static int ipv6_print_tuple(struct seq_file *s,
tuple->src.u3.ip6, tuple->dst.u3.ip6);
}
-/*
- * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
- *
- * This function parses (probably truncated) exthdr set "hdr"
- * of length "len". "nexthdrp" initially points to some place,
- * where type of the first header can be found.
- *
- * It skips all well-known exthdrs, and returns pointer to the start
- * of unparsable area i.e. the first header with unknown type.
- * if success, *nexthdr is updated by type/protocol of this header.
- *
- * NOTES: - it may return pointer pointing beyond end of packet,
- * if the last recognized header is truncated in the middle.
- * - if packet is truncated, so that all parsed headers are skipped,
- * it returns -1.
- * - if packet is fragmented, return pointer of the fragment header.
- * - ESP is unparsable for now and considered like
- * normal payload protocol.
- * - Note also special handling of AUTH header. Thanks to IPsec wizards.
- */
-
-static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
- u8 *nexthdrp, int len)
-{
- u8 nexthdr = *nexthdrp;
-
- while (ipv6_ext_hdr(nexthdr)) {
- struct ipv6_opt_hdr hdr;
- int hdrlen;
-
- if (len < (int)sizeof(struct ipv6_opt_hdr))
- return -1;
- if (nexthdr == NEXTHDR_NONE)
- break;
- if (nexthdr == NEXTHDR_FRAGMENT)
- break;
- if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
- BUG();
- if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hdr.hdrlen+2)<<2;
- else
- hdrlen = ipv6_optlen(&hdr);
-
- nexthdr = hdr.nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- *nexthdrp = nexthdr;
- return start;
-}
-
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
- unsigned char pnum;
+ __be16 frag_off;
int protoff;
+ u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
- &pnum, sizeof(pnum)) != 0) {
+ &nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("ip6_conntrack_core: can't get nexthdr\n");
return -NF_ACCEPT;
}
- protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff);
+ protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
- * (protoff == skb->len) mean that the packet doesn't have no data
- * except of IPv6 & ext headers. but it's tracked anyway. - YK
+ * (protoff == skb->len) means the packet has not data, just
+ * IPv6 and possibly extensions headers, but it is tracked anyway
*/
- if ((protoff < 0) || (protoff > skb->len)) {
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
return -NF_ACCEPT;
}
*dataoff = protoff;
- *protonum = pnum;
+ *protonum = nexthdr;
return NF_ACCEPT;
}
-static unsigned int ipv6_confirm(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
- unsigned int ret, protoff;
- unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
- unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-
+ __be16 frag_off;
+ int protoff;
+ u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
- goto out;
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
- goto out;
+ return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
if (!helper)
- goto out;
+ return NF_ACCEPT;
- protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
- skb->len - extoff);
- if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
- ret = helper->help(skb, protoff, ct, ctinfo);
- if (ret != NF_ACCEPT) {
- nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
- "nf_ct_%s: dropping packet", helper->name);
- return ret;
- }
-out:
- /* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(skb);
+ return helper->help(skb, protoff, ct, ctinfo);
}
-static unsigned int __ipv6_conntrack_in(struct net *net,
- unsigned int hooknum,
- struct sk_buff *skb,
- int (*okfn)(struct sk_buff *))
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct sk_buff *reasm = skb->nfct_reasm;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ int protoff;
+ __be16 frag_off;
- /* This packet is fragmented and has reassembled packet. */
- if (reasm) {
- /* Reassembled packet isn't parsed yet ? */
- if (!reasm->nfct) {
- unsigned int ret;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
- ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
- if (ret != NF_ACCEPT)
- return ret;
- }
- nf_conntrack_get(reasm->nfct);
- skb->nfct = reasm->nfct;
- skb->nfctinfo = reasm->nfctinfo;
- return NF_ACCEPT;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ goto out;
}
- return nf_conntrack_in(net, PF_INET6, hooknum, skb);
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
}
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
+ return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
}
-static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -232,11 +186,10 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
{
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct ipv6hdr)) {
- if (net_ratelimit())
- pr_notice("ipv6_conntrack_local: packet too short\n");
+ net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
+ return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
@@ -255,6 +208,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
.priority = NF_IP6_PRI_CONNTRACK,
},
{
+ .hook = ipv6_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
@@ -262,6 +222,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
.priority = NF_IP6_PRI_LAST,
},
{
+ .hook = ipv6_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
@@ -270,7 +237,52 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
},
};
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct sockaddr_in6 sin6;
+ struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+ struct nf_conn *ct;
+
+ tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.in6 = sk->sk_v6_daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
+ tuple.dst.protonum = sk->sk_protocol;
+
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
+ return -ENOPROTOOPT;
+
+ if (*len < 0 || (unsigned int) *len < sizeof(sin6))
+ return -EINVAL;
+
+ h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ if (!h) {
+ pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+ &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+ }
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
+ memcpy(&sin6.sin6_addr,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+ sizeof(sin6.sin6_addr));
+
+ nf_ct_put(ct);
+ sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
+ sk->sk_bound_dev_if);
+ return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -278,10 +290,11 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
- NLA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
- &tuple->src.u3.ip6);
- NLA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
- &tuple->dst.u3.ip6);
+ if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+ &tuple->src.u3.ip6) ||
+ nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+ &tuple->dst.u3.ip6))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -320,7 +333,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.invert_tuple = ipv6_invert_tuple,
.print_tuple = ipv6_print_tuple,
.get_l4proto = ipv6_get_l4proto,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_tuple_size = ipv6_nlattr_tuple_size,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
@@ -333,6 +346,62 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+static struct nf_sockopt_ops so_getorigdst6 = {
+ .pf = NFPROTO_IPV6,
+ .get_optmin = IP6T_SO_ORIGINAL_DST,
+ .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
+ .get = ipv6_getorigdst,
+ .owner = THIS_MODULE,
+};
+
+static int ipv6_net_init(struct net *net)
+{
+ int ret = 0;
+
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_tcp6: pernet registration failed\n");
+ goto out;
+ }
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_udp6: pernet registration failed\n");
+ goto cleanup_tcp6;
+ }
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_icmp6: pernet registration failed\n");
+ goto cleanup_udp6;
+ }
+ ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
+ goto cleanup_icmpv6;
+ }
+ return 0;
+ cleanup_icmpv6:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+ out:
+ return ret;
+}
+
+static void ipv6_net_exit(struct net *net)
+{
+ nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+}
+
+static struct pernet_operations ipv6_net_ops = {
+ .init = ipv6_net_init,
+ .exit = ipv6_net_exit,
+};
+
static int __init nf_conntrack_l3proto_ipv6_init(void)
{
int ret = 0;
@@ -340,58 +409,74 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
need_conntrack();
nf_defrag_ipv6_enable();
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
+ ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register tcp.\n");
+ pr_err("Unable to register netfilter socket option\n");
return ret;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
+ ret = register_pernet_subsys(&ipv6_net_ops);
+ if (ret < 0)
+ goto cleanup_sockopt;
+
+ ret = nf_register_hooks(ipv6_conntrack_ops,
+ ARRAY_SIZE(ipv6_conntrack_ops));
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register udp.\n");
- goto cleanup_tcp;
+ pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
+ "hook.\n");
+ goto cleanup_pernet;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
- goto cleanup_udp;
+ pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+ goto cleanup_hooks;
}
- ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register ipv6\n");
- goto cleanup_icmpv6;
+ pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
+ goto cleanup_tcp6;
}
- ret = nf_register_hooks(ipv6_conntrack_ops,
- ARRAY_SIZE(ipv6_conntrack_ops));
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
- "hook.\n");
- goto cleanup_ipv6;
+ pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
+ goto cleanup_udp6;
+ }
+
+ ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
+ goto cleanup_icmpv6;
}
return ret;
- cleanup_ipv6:
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
cleanup_icmpv6:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- cleanup_udp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- cleanup_tcp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ cleanup_hooks:
+ nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
+ cleanup_pernet:
+ unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_sockopt:
+ nf_unregister_sockopt(&so_getorigdst6);
return ret;
}
static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ unregister_pernet_subsys(&ipv6_net_ops);
+ nf_unregister_sockopt(&so_getorigdst6);
}
module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 1df3c8b6bf4..b3807c5cb88 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -29,6 +29,11 @@
static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6;
+}
+
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
@@ -88,25 +93,31 @@ static int icmpv6_print_tuple(struct seq_file *s,
ntohs(tuple->src.u.icmp.id));
}
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+ return &icmpv6_pernet(net)->timeout;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum)
+ unsigned int hooknum,
+ unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
- nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
+ unsigned int dataoff, unsigned int *timeouts)
{
static const u_int8_t valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
@@ -120,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
+ NULL, NULL,
"nf_ct_icmpv6: invalid new with type %d ",
type + 128);
return false;
@@ -177,7 +189,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
/* Update skb to refer to this connection */
skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
+ return NF_ACCEPT;
}
static int
@@ -192,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet ");
return -NF_ACCEPT;
}
@@ -200,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed ");
return -NF_ACCEPT;
}
@@ -221,17 +233,17 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *t)
{
- NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id);
- NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type);
- NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code);
-
+ if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -270,12 +282,50 @@ static int icmpv6_nlattr_tuple_size(void)
}
#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
+{
+ unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+
+ if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+ } else {
+ /* Set default ICMPv6 timeout. */
+ *timeout = in->timeout;
+ }
+ return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+ [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *icmpv6_sysctl_header;
static struct ctl_table icmpv6_sysctl_table[] = {
{
.procname = "nf_conntrack_icmpv6_timeout",
- .data = &nf_ct_icmpv6_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -284,6 +334,36 @@ static struct ctl_table icmpv6_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+ sizeof(icmpv6_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmpv6_timeout;
+
+ return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
{
.l3proto = PF_INET6,
@@ -293,16 +373,24 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
.invert_tuple = icmpv6_invert_tuple,
.print_tuple = icmpv6_print_tuple,
.packet = icmpv6_packet,
+ .get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
.error = icmpv6_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
.nla_policy = icmpv6_nla_policy,
#endif
-#ifdef CONFIG_SYSCTL
- .ctl_table_header = &icmpv6_sysctl_header,
- .ctl_table = icmpv6_sysctl_table,
-#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmpv6_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+ .init_net = icmpv6_init_net,
+ .get_net_proto = icmpv6_get_net_proto,
};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa8fa8..0d5279fd852 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -14,6 +14,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "IPv6-nf: " fmt
+
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -39,12 +41,14 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
+#include <net/inet_ecn.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <linux/sysctl.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
struct nf_ct_frag6_skb_cb
@@ -56,41 +60,27 @@ struct nf_ct_frag6_skb_cb
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
-struct nf_ct_frag6_queue
-{
- struct inet_frag_queue q;
-
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
- unsigned int csum;
- __u16 nhoffset;
-};
-
static struct inet_frags nf_frags;
-static struct netns_frags nf_init_frags;
#ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &nf_init_frags.timeout,
+ .data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &nf_init_frags.low_thresh,
+ .data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &nf_init_frags.high_thresh,
+ .data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
@@ -98,68 +88,103 @@ struct ctl_table nf_ct_frag6_sysctl_table[] = {
{ }
};
-static struct ctl_table_header *nf_ct_frag6_sysctl_header;
-#endif
-
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
+static int nf_ct_frag6_sysctl_register(struct net *net)
{
- const struct nf_ct_frag6_queue *nq;
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = nf_ct_frag6_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
+ GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data = &net->nf_frag.frags.timeout;
+ table[1].data = &net->nf_frag.frags.low_thresh;
+ table[2].data = &net->nf_frag.frags.high_thresh;
+ }
- nq = container_of(q, struct nf_ct_frag6_queue, q);
- return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
+ hdr = register_net_sysctl(net, "net/netfilter", table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->nf_frag.sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
}
-static void nf_skb_free(struct sk_buff *skb)
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-}
+ struct ctl_table *table;
-/* Destruction primitives. */
+ table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
+#else
+static int nf_ct_frag6_sysctl_register(struct net *net)
+{
+ return 0;
+}
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
- inet_frag_put(&fq->q, &nf_frags);
}
+#endif
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
- inet_frag_kill(&fq->q, &nf_frags);
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static void nf_ct_frag6_evictor(void)
+static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
- local_bh_disable();
- inet_frag_evictor(&nf_init_frags, &nf_frags);
- local_bh_enable();
+ u32 c;
+
+ net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
+ c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, nf_frags.rnd);
+ return c & (INETFRAGS_HASHSZ - 1);
}
-static void nf_ct_frag6_expire(unsigned long data)
+
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
- struct nf_ct_frag6_queue *fq;
+ const struct frag_queue *nq;
- fq = container_of((struct inet_frag_queue *)data,
- struct nf_ct_frag6_queue, q);
+ nq = container_of(q, struct frag_queue, q);
+ return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
+}
- spin_lock(&fq->q.lock);
+static void nf_skb_free(struct sk_buff *skb)
+{
+ if (NFCT_FRAG6_CB(skb)->orig)
+ kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
- if (fq->q.last_in & INET_FRAG_COMPLETE)
- goto out;
+static void nf_ct_frag6_expire(unsigned long data)
+{
+ struct frag_queue *fq;
+ struct net *net;
- fq_kill(fq);
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, nf_frag.frags);
-out:
- spin_unlock(&fq->q.lock);
- fq_put(fq);
+ ip6_expire_frag_queue(net, fq, &nf_frags);
}
/* Creation primitives. */
-
-static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
+static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+ u32 user, struct in6_addr *src,
+ struct in6_addr *dst, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -169,36 +194,38 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.ecn = ecn;
read_lock_bh(&nf_frags.lock);
- hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
+ hash = nf_hash_frag(id, src, dst);
- q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
+ q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
local_bh_enable();
- if (q == NULL)
- goto oom;
-
- return container_of(q, struct nf_ct_frag6_queue, q);
-
-oom:
- pr_debug("Can't alloc new queue\n");
- return NULL;
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+ }
+ return container_of(q, struct frag_queue, q);
}
-static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
+static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
+ unsigned int payload_len;
int offset, end;
+ u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
goto err;
}
+ payload_len = ntohs(ipv6_hdr(skb)->payload_len);
+
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ end = offset + (payload_len -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -206,6 +233,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
return -1;
}
+ ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
@@ -304,10 +333,16 @@ found:
else
fq->q.fragments = skb;
- skb->dev = NULL;
+ if (skb->dev) {
+ fq->iif = skb->dev->ifindex;
+ skb->dev = NULL;
+ }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &nf_init_frags.mem);
+ fq->ecn |= ecn;
+ if (payload_len > fq->q.max_size)
+ fq->q.max_size = payload_len;
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -316,13 +351,12 @@ found:
fq->nhoffset = nhoff;
fq->q.last_in |= INET_FRAG_FIRST_IN;
}
- write_lock(&nf_frags.lock);
- list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
- write_unlock(&nf_frags.lock);
+
+ inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &nf_frags);
err:
return -1;
}
@@ -337,16 +371,21 @@ err:
* the last and the first frames arrived and all the bits are here.
*/
static struct sk_buff *
-nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
+ u8 ecn;
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+ goto out_fail;
+
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
@@ -357,7 +396,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
}
/* Head of list must not be cloned. */
- if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+ if (skb_unclone(head, GFP_ATOMIC)) {
pr_debug("skb is cloned but can't expand head");
goto out_oom;
}
@@ -369,16 +408,16 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
struct sk_buff *clone;
int i, plen = 0;
- if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
- pr_debug("Can't alloc skb\n");
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (clone == NULL)
goto out_oom;
- }
+
clone->next = head->next;
head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
- for (i=0; i<skb_shinfo(head)->nr_frags; i++)
- plen += skb_shinfo(head)->frags[i].size;
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
clone->len = clone->data_len = head->data_len - plen;
head->data_len -= clone->len;
head->len -= clone->len;
@@ -386,7 +425,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &nf_init_frags.mem);
+ add_frag_mem_limit(&fq->q, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -410,12 +449,15 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- atomic_sub(head->truesize, &nf_init_frags.mem);
+ sub_frag_mem_limit(&fq->q, head->truesize);
+ head->ignore_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+ IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -444,12 +486,11 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
return head;
out_oversize:
- if (net_ratelimit())
- printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
+ net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
+ payload_len);
goto out_fail;
out_oom:
- if (net_ratelimit())
- printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
+ net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n");
out_fail:
return NULL;
}
@@ -521,8 +562,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
+ struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
+ : dev_net(skb->dev);
struct frag_hdr *fhdr;
- struct nf_ct_frag6_queue *fq;
+ struct frag_queue *fq;
struct ipv6hdr *hdr;
int fhoff, nhoff;
u8 prevhdr;
@@ -554,10 +597,12 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
- if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
- nf_ct_frag6_evictor();
+ local_bh_disable();
+ inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+ local_bh_enable();
- fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+ ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
@@ -568,7 +613,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
spin_unlock_bh(&fq->q.lock);
pr_debug("Can't insert skb to queue\n");
- fq_put(fq);
+ inet_frag_put(&fq->q, &nf_frags);
goto ret_orig;
}
@@ -580,7 +625,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
}
spin_unlock_bh(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, &nf_frags);
return ret_skb;
ret_orig:
@@ -588,63 +633,62 @@ ret_orig:
return skb;
}
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
- struct net_device *in, struct net_device *out,
- int (*okfn)(struct sk_buff *))
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
struct sk_buff *s, *s2;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
- nf_conntrack_put_reasm(s->nfct_reasm);
- nf_conntrack_get_reasm(skb);
- s->nfct_reasm = skb;
-
s2 = s->next;
s->next = NULL;
-
- NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
- NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+ consume_skb(s);
s = s2;
}
- nf_conntrack_put_reasm(skb);
}
+static int nf_ct_net_init(struct net *net)
+{
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+ inet_frags_init_net(&net->nf_frag.frags);
+
+ return nf_ct_frag6_sysctl_register(net);
+}
+
+static void nf_ct_net_exit(struct net *net)
+{
+ nf_ct_frags6_sysctl_unregister(net);
+ inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+static struct pernet_operations nf_ct_net_ops = {
+ .init = nf_ct_net_init,
+ .exit = nf_ct_net_exit,
+};
+
int nf_ct_frag6_init(void)
{
+ int ret = 0;
+
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.skb_free = nf_skb_free;
- nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+ nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.secret_interval = 10 * 60 * HZ;
- nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
- nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
-#ifdef CONFIG_SYSCTL
- nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
- nf_ct_frag6_sysctl_table);
- if (!nf_ct_frag6_sysctl_header) {
+ ret = register_pernet_subsys(&nf_ct_net_ops);
+ if (ret)
inet_frags_fini(&nf_frags);
- return -ENOMEM;
- }
-#endif
- return 0;
+ return ret;
}
void nf_ct_frag6_cleanup(void)
{
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(nf_ct_frag6_sysctl_header);
- nf_ct_frag6_sysctl_header = NULL;
-#endif
+ unregister_pernet_subsys(&nf_ct_net_ops);
inet_frags_fini(&nf_frags);
-
- nf_init_frags.low_thresh = 0;
- nf_ct_frag6_evictor();
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 99abfb53bab..7b9a748c6ba 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,13 +19,15 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
{
u16 zone = NF_CT_DEFAULT_ZONE;
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge &&
@@ -48,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
}
-static unsigned int ipv6_defrag(unsigned int hooknum,
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -56,21 +60,26 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
{
struct sk_buff *reasm;
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Previously seen (loopback)? */
if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
return NF_ACCEPT;
+#endif
- reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+ reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
/* queued */
if (reasm == NULL)
return NF_STOLEN;
- /* error occured or not fragmented */
+ /* error occurred or not fragmented */
if (reasm == skb)
return NF_ACCEPT;
- nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
- (struct net_device *)out, okfn);
+ nf_ct_frag6_consume_orig(reasm);
+
+ NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
+ (struct net_device *) in, (struct net_device *) out,
+ okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
return NF_STOLEN;
}
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 00000000000..abfe75a2e31
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of IPv6 NAT funded by Astaro.
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (ct->status & statusbit) {
+ fl6->daddr = t->dst.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl6->saddr = t->src.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_sport = t->src.u.all;
+ }
+}
+#endif
+
+static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+ ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct ipv6hdr *ipv6h;
+ __be16 frag_off;
+ int hdroff;
+ u8 nexthdr;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ return false;
+
+ ipv6h = (void *)skb->data + iphdroff;
+ nexthdr = ipv6h->nexthdr;
+ hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+ &nexthdr, &frag_off);
+ if (hdroff < 0)
+ goto manip_addr;
+
+ if ((frag_off & htons(~0x7)) == 0 &&
+ !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+manip_addr:
+ if (maniptype == NF_NAT_MANIP_SRC)
+ ipv6h->saddr = target->src.u3.in6;
+ else
+ ipv6h->daddr = target->dst.u3.in6;
+
+ return true;
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+ const struct in6_addr *oldip, *newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = &ipv6h->saddr;
+ newip = &t->src.u3.in6;
+ } else {
+ oldip = &ipv6h->daddr;
+ newip = &t->dst.u3.in6;
+ }
+ inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+ newip->s6_addr32, 1);
+}
+
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt6i_flags & RTF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ (data - (void *)skb->data);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V6_MINIP]) {
+ nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+ sizeof(struct in6_addr));
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V6_MAXIP])
+ nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+ sizeof(struct in6_addr));
+ else
+ range->max_addr = range->min_addr;
+
+ return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
+ .l3proto = NFPROTO_IPV6,
+ .secure_port = nf_nat_ipv6_secure_port,
+ .in_range = nf_nat_ipv6_in_range,
+ .manip_pkt = nf_nat_ipv6_manip_pkt,
+ .csum_update = nf_nat_ipv6_csum_update,
+ .csum_recalc = nf_nat_ipv6_csum_recalc,
+ .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv6_decode_session,
+#endif
+};
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ unsigned int hdrlen)
+{
+ struct {
+ struct icmp6hdr icmp6;
+ struct ipv6hdr ip6;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
+ if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp6.icmp6_cksum = 0;
+ inside->icmp6.icmp6_cksum =
+ csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ skb->len - hdrlen, IPPROTO_ICMPV6,
+ csum_partial(&inside->icmp6,
+ skb->len - hdrlen, 0));
+ }
+
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
+ if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static int __init nf_nat_l3proto_ipv6_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv6_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
+
+module_init(nf_nat_l3proto_ipv6_init);
+module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 00000000000..2205e8eeeac
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u16 id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xffff;
+
+ for (i = 0; ; ++id) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+ (id % range_size));
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmp6hdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmp6hdr *)(skb->data + hdroff);
+ l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
+ tuple, maniptype);
+ if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
+ inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+ hdr->icmp6_identifier,
+ tuple->src.u.icmp.id, 0);
+ hdr->icmp6_identifier = tuple->src.u.icmp.id;
+ }
+ return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
+ .l4proto = IPPROTO_ICMPV6,
+ .manip_pkt = icmpv6_manip_pkt,
+ .in_range = icmpv6_in_range,
+ .unique_tuple = icmpv6_unique_tuple,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 00000000000..0d812b31277
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+
+ return nft_do_chain_ipv6(ops, skb, in, out, okfn);
+}
+
+struct nft_af_info nft_af_ipv6 __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv6);
+
+static int nf_tables_ipv6_init_net(struct net *net)
+{
+ net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv6 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
+
+ if (nft_register_afinfo(net, net->nft.ipv6) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv6);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv6_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv6);
+ kfree(net->nft.ipv6);
+}
+
+static struct pernet_operations nf_tables_ipv6_net_ops = {
+ .init = nf_tables_ipv6_init_net,
+ .exit = nf_tables_ipv6_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv6 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_ipv6);
+ ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_ipv6);
+
+ return ret;
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
+ nft_unregister_chain_type(&filter_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
new file mode 100644
index 00000000000..d189fcb437f
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ipv6.h>
+
+/*
+ * IPv6 NAT chains
+ */
+
+static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+ ret = NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ if (ip6_route_me_harder(skb))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET6))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv6 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
+ [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
+ },
+};
+
+static int __init nft_chain_nat_ipv6_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv6);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_ipv6_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv6);
+}
+
+module_init(nft_chain_nat_ipv6_init);
+module_exit(nft_chain_nat_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
new file mode 100644
index 00000000000..42031299585
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ struct in6_addr saddr, daddr;
+ u_int8_t hop_limit;
+ u32 mark, flowlabel;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+
+ /* flowlabel and prio (includes version, which shouldn't change either */
+ flowlabel = *((u32 *)ipv6_hdr(skb));
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE &&
+ (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+ memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ skb->mark != mark ||
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv6 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv6);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv6);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 00000000000..0bc19fa8782
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+ .type = &nft_reject_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv6_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "reject",
+ .ops = &nft_reject_ipv6_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
new file mode 100644
index 00000000000..5ec867e4a8b
--- /dev/null
+++ b/net/ipv6/output_core.c
@@ -0,0 +1,98 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static. These functions are needed by GSO/GRO implementation.
+ */
+#include <linux/export.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/addrconf.h>
+#include <net/secure_seq.h>
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
+ int found_rhdr = 0;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ break;
+#endif
+ if (found_rhdr)
+ return offset;
+ break;
+ default :
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+ }
+
+ return offset;
+}
+EXPORT_SYMBOL(ip6_find_1stfragopt);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int ip6_dst_hoplimit(struct dst_entry *dst)
+{
+ int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+ if (hoplimit == 0) {
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev)
+ hoplimit = idev->cnf.hop_limit;
+ else
+ hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ rcu_read_unlock();
+ }
+ return hoplimit;
+}
+EXPORT_SYMBOL(ip6_dst_hoplimit);
+#endif
+
+int __ip6_local_out(struct sk_buff *skb)
+{
+ int len;
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+ ipv6_hdr(skb)->payload_len = htons(len);
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+ skb_dst(skb)->dev, dst_output);
+}
+EXPORT_SYMBOL_GPL(__ip6_local_out);
+
+int ip6_local_out(struct sk_buff *skb)
+{
+ int err;
+
+ err = __ip6_local_out(skb);
+ if (likely(err == 1))
+ err = dst_output(skb);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 00000000000..5b7a1ed2aba
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,275 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * "Ping" sockets
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors: Lorenzo Colitti (IPv6 support)
+ * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+
+struct proto pingv6_prot = {
+ .name = "PINGv6",
+ .owner = THIS_MODULE,
+ .init = ping_init_sock,
+ .close = ping_close,
+ .connect = ip6_datagram_connect_v6_only,
+ .disconnect = udp_disconnect,
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .sendmsg = ping_v6_sendmsg,
+ .recvmsg = ping_recvmsg,
+ .bind = ping_bind,
+ .backlog_rcv = ping_queue_rcv_skb,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
+ .obj_size = sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMPV6,
+ .prot = &pingv6_prot,
+ .ops = &inet6_dgram_ops,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
+ int *addr_len)
+{
+ return -EAFNOSUPPORT;
+}
+static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+}
+static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+ return -EAFNOSUPPORT;
+}
+static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload) {}
+static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, int strict)
+{
+ return 0;
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr user_icmph;
+ int addr_type;
+ struct in6_addr *daddr;
+ int iif = 0;
+ struct flowi6 fl6;
+ int err;
+ int hlimit;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ struct pingfakehdr pfh;
+
+ pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
+ if (msg->msg_name) {
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
+ if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
+ u->sin6_family != AF_INET6) {
+ return -EINVAL;
+ }
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != u->sin6_scope_id) {
+ return -EINVAL;
+ }
+ daddr = &(u->sin6_addr);
+ iif = u->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+ daddr = &sk->sk_v6_daddr;
+ }
+
+ if (!iif)
+ iif = sk->sk_bound_dev_if;
+
+ addr_type = ipv6_addr_type(daddr);
+ if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+ return -EINVAL;
+ if (addr_type & IPV6_ADDR_MAPPED)
+ return -EINVAL;
+
+ /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.saddr = np->saddr;
+ fl6.daddr = *daddr;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_icmp_type = user_icmph.icmp6_type;
+ fl6.fl6_icmp_code = user_icmph.icmp6_code;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+ rt = (struct rt6_info *) dst;
+
+ np = inet6_sk(sk);
+ if (!np)
+ return -EBADF;
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ pfh.icmph.type = user_icmph.icmp6_type;
+ pfh.icmph.code = user_icmph.icmp6_code;
+ pfh.icmph.checksum = 0;
+ pfh.icmph.un.echo.id = inet->inet_sport;
+ pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+ pfh.iov = msg->msg_iov;
+ pfh.wcheck = 0;
+ pfh.family = AF_INET6;
+
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+
+ lock_sock(sk);
+ err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+ 0, hlimit,
+ np->tclass, NULL, &fl6, rt,
+ MSG_DONTWAIT, np->dontfrag);
+
+ if (err) {
+ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *) &pfh.icmph,
+ len);
+ }
+ release_sock(sk);
+
+ if (err)
+ return err;
+
+ return len;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return ping_seq_start(seq, pos, AF_INET6);
+}
+
+static int ping_v6_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ int bucket = ((struct ping_iter_state *) seq->private)->bucket;
+ struct inet_sock *inet = inet_sk(v);
+ __u16 srcp = ntohs(inet->inet_sport);
+ __u16 destp = ntohs(inet->inet_dport);
+ ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+ }
+ return 0;
+}
+
+static struct ping_seq_afinfo ping_v6_seq_afinfo = {
+ .name = "icmp6",
+ .family = AF_INET6,
+ .seq_fops = &ping_seq_fops,
+ .seq_ops = {
+ .start = ping_v6_seq_start,
+ .show = ping_v6_seq_show,
+ .next = ping_seq_next,
+ .stop = ping_seq_stop,
+ },
+};
+
+static int __net_init ping_v6_proc_init_net(struct net *net)
+{
+ return ping_proc_register(net, &ping_v6_seq_afinfo);
+}
+
+static void __net_init ping_v6_proc_exit_net(struct net *net)
+{
+ return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+}
+
+static struct pernet_operations ping_v6_net_ops = {
+ .init = ping_v6_proc_init_net,
+ .exit = ping_v6_proc_exit_net,
+};
+#endif
+
+int __init pingv6_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ int ret = register_pernet_subsys(&ping_v6_net_ops);
+ if (ret)
+ return ret;
+#endif
+ pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl;
+ pingv6_ops.ip6_datagram_recv_specific_ctl =
+ ip6_datagram_recv_specific_ctl;
+ pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+ return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+ pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+#ifdef CONFIG_PROC_FS
+ unregister_pernet_subsys(&ping_v6_net_ops);
+#endif
+ inet6_unregister_protosw(&pingv6_protosw);
+}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 24b3558b8e6..3317440ea34 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -21,6 +21,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stddef.h>
+#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -89,6 +90,11 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+ /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
+ SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
+ SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
SNMP_MIB_SENTINEL
};
@@ -98,6 +104,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
+ SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -128,6 +135,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -138,10 +146,11 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
-static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
+static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
{
char name[32];
int i;
@@ -158,14 +167,14 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
snprintf(name, sizeof(name), "Icmp6%s%s",
i & 0x100 ? "Out" : "In", p);
seq_printf(seq, "%-32s\t%lu\n", name,
- snmp_fold_field(mib, i));
+ atomic_long_read(smib + i));
}
/* print by number (nonzero only) - ICMPMsgStat format */
for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
unsigned long val;
- val = snmp_fold_field(mib, i);
+ val = atomic_long_read(smib + i);
if (!val)
continue;
snprintf(name, sizeof(name), "Icmp6%sType%u",
@@ -174,17 +183,25 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
}
}
-static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
+/* can be called either with percpu mib (pcpumib != NULL),
+ * or shared one (smib != NULL)
+ */
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
+ atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
int i;
+ unsigned long val;
- for (i = 0; itemlist[i].name; i++)
- seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
- snmp_fold_field(mib, itemlist[i].entry));
+ for (i = 0; itemlist[i].name; i++) {
+ val = pcpumib ?
+ snmp_fold_field(pcpumib, itemlist[i].entry) :
+ atomic_long_read(smib + itemlist[i].entry);
+ seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+ }
}
-static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
int i;
@@ -198,16 +215,15 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
- snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+ snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
- snmp6_icmp6_list);
- snmp6_seq_show_icmpv6msg(seq,
- (void __percpu **)net->mib.icmpv6msg_statistics);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
- snmp6_udp6_list);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
- snmp6_udplite6_list);
+ snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
+ NULL, snmp6_icmp6_list);
+ snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
+ snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
+ NULL, snmp6_udp6_list);
+ snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
+ NULL, snmp6_udplite6_list);
return 0;
}
@@ -229,17 +245,17 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
struct inet6_dev *idev = (struct inet6_dev *)seq->private;
seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
- snmp6_seq_show_item(seq, (void __percpu **)idev->stats.ipv6,
- snmp6_ipstats_list);
- snmp6_seq_show_item(seq, (void __percpu **)idev->stats.icmpv6,
+ snmp6_seq_show_item64(seq, idev->stats.ipv6,
+ snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
+ snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
snmp6_icmp6_list);
- snmp6_seq_show_icmpv6msg(seq, (void __percpu **)idev->stats.icmpv6msg);
+ snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs);
return 0;
}
static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, snmp6_dev_seq_show, PDE(inode)->data);
+ return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
}
static const struct file_operations snmp6_dev_seq_fops = {
@@ -279,19 +295,18 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
return -ENOENT;
if (!idev->stats.proc_dir_entry)
return -EINVAL;
- remove_proc_entry(idev->stats.proc_dir_entry->name,
- net->mib.proc_net_devsnmp6);
+ proc_remove(idev->stats.proc_dir_entry);
idev->stats.proc_dir_entry = NULL;
return 0;
}
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
- &sockstat6_seq_fops))
+ if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+ &sockstat6_seq_fops))
return -ENOMEM;
- if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+ if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -299,18 +314,18 @@ static int __net_init ipv6_proc_init_net(struct net *net)
goto proc_dev_snmp6_fail;
return 0;
-proc_snmp6_fail:
- proc_net_remove(net, "sockstat6");
proc_dev_snmp6_fail:
- proc_net_remove(net, "dev_snmp6");
+ remove_proc_entry("snmp6", net->proc_net);
+proc_snmp6_fail:
+ remove_proc_entry("sockstat6", net->proc_net);
return -ENOMEM;
}
static void __net_exit ipv6_proc_exit_net(struct net *net)
{
- proc_net_remove(net, "sockstat6");
- proc_net_remove(net, "dev_snmp6");
- proc_net_remove(net, "snmp6");
+ remove_proc_entry("sockstat6", net->proc_net);
+ remove_proc_entry("dev_snmp6", net->proc_net);
+ remove_proc_entry("snmp6", net->proc_net);
}
static struct pernet_operations ipv6_proc_ops = {
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9a7978fdc02..e048cf1bb6a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,26 +25,22 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
+#if IS_ENABLED(CONFIG_IPV6)
const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
- int hash = protocol & (MAX_INET_PROTOS - 1);
-
- return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet6_add_protocol);
-/*
- * Remove a protocol from the hash tables.
- */
-
int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
- int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+ int ret;
- ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
prot, NULL) == prot) ? 0 : -1;
synchronize_net();
@@ -52,3 +48,26 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
return ret;
}
EXPORT_SYMBOL(inet6_del_protocol);
+#endif
+
+const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+
+int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+ NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet6_add_offload);
+
+int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ int ret;
+
+ ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+ prot, NULL) == prot) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet6_del_offload);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 86c39526ba5..b2dc60b0c76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -31,6 +31,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/skbuff.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -49,7 +50,7 @@
#include <net/udp.h>
#include <net/inet_common.h>
#include <net/tcp_states.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
#include <linux/mroute6.h>
@@ -60,34 +61,35 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/export.h>
+
+#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
static struct raw_hashinfo raw_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
};
static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, struct in6_addr *loc_addr,
- struct in6_addr *rmt_addr, int dif)
+ unsigned short num, const struct in6_addr *loc_addr,
+ const struct in6_addr *rmt_addr, int dif)
{
- struct hlist_node *node;
- int is_multicast = ipv6_addr_is_multicast(loc_addr);
+ bool is_multicast = ipv6_addr_is_multicast(loc_addr);
- sk_for_each_from(sk, node)
+ sk_for_each_from(sk)
if (inet_sk(sk)->inet_num == num) {
- struct ipv6_pinfo *np = inet6_sk(sk);
if (!net_eq(sock_net(sk), net))
continue;
- if (!ipv6_addr_any(&np->daddr) &&
- !ipv6_addr_equal(&np->daddr, rmt_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
goto found;
if (is_multicast &&
inet6_mc_check(sk, loc_addr, rmt_addr))
@@ -105,38 +107,40 @@ found:
* 0 - deliver
* 1 - block
*/
-static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
+static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
{
- struct icmp6hdr *icmph;
- struct raw6_sock *rp = raw6_sk(sk);
-
- if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
- __u32 *data = &rp->filter.data[0];
- int bit_nr;
+ struct icmp6hdr _hdr;
+ const struct icmp6hdr *hdr;
- icmph = (struct icmp6hdr *) skb->data;
- bit_nr = icmph->icmp6_type;
+ /* We require only the four bytes of the ICMPv6 header, not any
+ * additional bytes of message body in "struct icmp6hdr".
+ */
+ hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+ ICMPV6_HDRLEN, &_hdr);
+ if (hdr) {
+ const __u32 *data = &raw6_sk(sk)->filter.data[0];
+ unsigned int type = hdr->icmp6_type;
- return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
+ return (data[type >> 5] & (1U << (type & 31))) != 0;
}
- return 0;
+ return 1;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
+
+static mh_filter_t __rcu *mh_filter __read_mostly;
-int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
- struct sk_buff *skb))
+int rawv6_mh_filter_register(mh_filter_t filter)
{
rcu_assign_pointer(mh_filter, filter);
return 0;
}
EXPORT_SYMBOL(rawv6_mh_filter_register);
-int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
- struct sk_buff *skb))
+int rawv6_mh_filter_unregister(mh_filter_t filter)
{
- rcu_assign_pointer(mh_filter, NULL);
+ RCU_INIT_POINTER(mh_filter, NULL);
synchronize_rcu();
return 0;
}
@@ -151,19 +155,19 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
*
* Caller owns SKB so we must make clones.
*/
-static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
{
- struct in6_addr *saddr;
- struct in6_addr *daddr;
+ const struct in6_addr *saddr;
+ const struct in6_addr *daddr;
struct sock *sk;
- int delivered = 0;
+ bool delivered = false;
__u8 hash;
struct net *net;
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (MAX_INET_PROTOS - 1);
+ hash = nexthdr & (RAW_HTABLE_SIZE - 1);
read_lock(&raw_v6_hashinfo.lock);
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
@@ -177,13 +181,13 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
while (sk) {
int filtered;
- delivered = 1;
+ delivered = true;
switch (nexthdr) {
case IPPROTO_ICMPV6:
filtered = icmpv6_filter(sk, skb);
break;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
{
/* XXX: To validate MH only once for each packet,
@@ -192,10 +196,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
* policy is placed in rawv6_rcv() because it is
* required for each socket.
*/
- int (*filter)(struct sock *sock, struct sk_buff *skb);
+ mh_filter_t *filter;
filter = rcu_dereference(mh_filter);
- filtered = filter ? filter(sk, skb) : 0;
+ filtered = filter ? (*filter)(sk, skb) : 0;
break;
}
#endif
@@ -223,11 +227,11 @@ out:
return delivered;
}
-int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
+bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
{
struct sock *raw_sk;
- raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
+ raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
raw_sk = NULL;
@@ -246,6 +250,10 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+
+ if (addr->sin6_family != AF_INET6)
+ return -EINVAL;
+
addr_type = ipv6_addr_type(&addr->sin6_addr);
/* Raw sockets are IPv6 only */
@@ -263,7 +271,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
addr->sin6_scope_id) {
/* Override any existing binding, if another
@@ -297,9 +305,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
- ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+ sk->sk_v6_rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
- ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+ np->saddr = addr->sin6_addr;
err = 0;
out_unlock:
rcu_read_unlock();
@@ -326,9 +334,14 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
return;
harderr = icmpv6_err_convert(type, code, &err);
- if (type == ICMPV6_PKT_TOOBIG)
+ if (type == ICMPV6_PKT_TOOBIG) {
+ ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
-
+ }
+ if (type == NDISC_REDIRECT) {
+ ip6_sk_redirect(skb, sk);
+ return;
+ }
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
@@ -347,7 +360,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
{
struct sock *sk;
int hash;
- struct in6_addr *saddr, *daddr;
+ const struct in6_addr *saddr, *daddr;
struct net *net;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
@@ -356,7 +369,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
if (sk != NULL) {
/* Note: ipv6_hdr(skb) != skb->data */
- struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
+ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
saddr = &ip6h->saddr;
daddr = &ip6h->daddr;
net = dev_net(skb->dev);
@@ -371,9 +384,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
read_unlock(&raw_v6_hashinfo.lock);
}
-static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
+static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
+ if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
@@ -381,7 +394,8 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
}
/* Charge it to the socket. */
- if (ip_queue_rcv_skb(sk, skb) < 0) {
+ skb_dst_drop(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -447,7 +461,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
int noblock, int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct sk_buff *skb;
size_t copied;
int err;
@@ -455,14 +469,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- if (addr_len)
- *addr_len=sizeof(*sin6);
-
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
- return ipv6_recv_rxpmtu(sk, msg, len);
+ return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -492,17 +503,17 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (sin6) {
sin6->sin6_family = AF_INET6;
sin6->sin6_port = 0;
- ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
+ sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+ IP6CB(skb)->iif);
+ *addr_len = sizeof(*sin6);
}
sock_recv_ts_and_drops(msg, sk, skb);
if (np->rxopt.all)
- datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_ctl(sk, msg, skb);
err = copied;
if (flags & MSG_TRUNC)
@@ -523,7 +534,7 @@ csum_copy_err:
goto out;
}
-static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
struct raw6_sock *rp)
{
struct sk_buff *skb;
@@ -541,8 +552,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
goto out;
offset = rp->offset;
- total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
- skb->data);
+ total_len = inet_sk(sk)->cork.base.length;
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
@@ -585,11 +595,10 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
if (unlikely(csum))
tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
- csum = csum_ipv6_magic(&fl->fl6_src,
- &fl->fl6_dst,
- total_len, fl->proto, tmp_csum);
+ csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
+ total_len, fl6->flowi6_proto, tmp_csum);
- if (csum == 0 && fl->proto == IPPROTO_UDP)
+ if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
csum = CSUM_MANGLED_0;
if (skb_store_bits(skb, offset, &csum, 2))
@@ -602,7 +611,7 @@ out:
}
static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
- struct flowi *fl, struct dst_entry **dstp,
+ struct flowi6 *fl6, struct dst_entry **dstp,
unsigned int flags)
{
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -610,21 +619,24 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
struct sk_buff *skb;
int err;
struct rt6_info *rt = (struct rt6_info *)*dstp;
+ int hlen = LL_RESERVED_SPACE(rt->dst.dev);
+ int tlen = rt->dst.dev->needed_tailroom;
if (length > rt->dst.dev->mtu) {
- ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
+ ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
return -EMSGSIZE;
}
if (flags&MSG_PROBE)
goto out;
skb = sock_alloc_send_skb(sk,
- length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
+ length + hlen + tlen + 15,
flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(skb, hlen);
+ skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb_dst_set(skb, &rt->dst);
@@ -661,7 +673,7 @@ error:
return err;
}
-static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
{
struct iovec *iov;
u8 __user *type = NULL;
@@ -678,7 +690,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
if (!iov)
continue;
- switch (fl->proto) {
+ switch (fl6->flowi6_proto) {
case IPPROTO_ICMPV6:
/* check if one-byte field is readable or not. */
if (iov->iov_base && iov->iov_len < 1)
@@ -693,8 +705,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
code = iov->iov_base;
if (type && code) {
- if (get_user(fl->fl_icmp_type, type) ||
- get_user(fl->fl_icmp_code, code))
+ if (get_user(fl6->fl6_icmp_type, type) ||
+ get_user(fl6->fl6_icmp_code, code))
return -EFAULT;
probed = 1;
}
@@ -705,7 +717,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
/* check if type field is readable or not. */
if (iov->iov_len > 2 - len) {
u8 __user *p = iov->iov_base;
- if (get_user(fl->fl_mh_type, &p[2 - len]))
+ if (get_user(fl6->fl6_mh_type, &p[2 - len]))
return -EFAULT;
probed = 1;
} else
@@ -726,7 +738,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
- struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -734,7 +746,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
- struct flowi fl;
+ struct flowi6 fl6;
int addr_len = msg->msg_namelen;
int hlimit = -1;
int tclass = -1;
@@ -755,9 +767,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
/*
* Get and verify the address.
*/
- memset(&fl, 0, sizeof(fl));
+ memset(&fl6, 0, sizeof(fl6));
- fl.mark = sk->sk_mark;
+ fl6.flowi6_mark = sk->sk_mark;
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -779,12 +791,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
daddr = &sin6->sin6_addr;
if (np->sndflow) {
- fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- daddr = &flowlabel->dst;
}
}
@@ -793,38 +804,38 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
- ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
- fl.oif = sin6->sin6_scope_id;
+ __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
+ fl6.flowi6_oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
proto = inet->inet_num;
- daddr = &np->daddr;
- fl.fl6_flowlabel = np->flow_label;
+ daddr = &sk->sk_v6_daddr;
+ fl6.flowlabel = np->flow_label;
}
- if (fl.oif == 0)
- fl.oif = sk->sk_bound_dev_if;
+ if (fl6.flowi6_oif == 0)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
- err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
- &tclass, &dontfrag);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
- if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
@@ -837,46 +848,33 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
- fl.proto = proto;
- err = rawv6_probe_proto_opt(&fl, msg);
+ fl6.flowi6_proto = proto;
+ err = rawv6_probe_proto_opt(&fl6, msg);
if (err)
goto out;
if (!ipv6_addr_any(daddr))
- ipv6_addr_copy(&fl.fl6_dst, daddr);
+ fl6.daddr = *daddr;
else
- fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
- if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+ fl6.saddr = np->saddr;
- final_p = fl6_update_dst(&fl, opt, &final);
+ final_p = fl6_update_dst(&fl6, opt, &final);
- if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
- fl.oif = np->mcast_oif;
- security_sk_classify_flow(sk, &fl);
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
goto out;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
- if (err < 0) {
- if (err == -EREMOTE)
- err = ip6_dst_blackhole(sk, &dst, &fl);
- if (err < 0)
- goto out;
- }
-
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl.fl6_dst))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
}
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -889,17 +887,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
back_from_confirm:
if (inet->hdrincl)
- err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags);
+ err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
else {
lock_sock(sk);
err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
- len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
+ len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst,
msg->msg_flags, dontfrag);
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
- err = rawv6_push_pending_frames(sk, &fl, rp);
+ err = rawv6_push_pending_frames(sk, &fl6, rp);
release_sock(sk);
}
done:
@@ -968,57 +966,54 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
switch (optname) {
- case IPV6_CHECKSUM:
- if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
- level == IPPROTO_IPV6) {
- /*
- * RFC3542 tells that IPV6_CHECKSUM socket
- * option in the IPPROTO_IPV6 level is not
- * allowed on ICMPv6 sockets.
- * If you want to set it, use IPPROTO_RAW
- * level IPV6_CHECKSUM socket option
- * (Linux extension).
- */
- return -EINVAL;
- }
+ case IPV6_CHECKSUM:
+ if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
+ level == IPPROTO_IPV6) {
+ /*
+ * RFC3542 tells that IPV6_CHECKSUM socket
+ * option in the IPPROTO_IPV6 level is not
+ * allowed on ICMPv6 sockets.
+ * If you want to set it, use IPPROTO_RAW
+ * level IPV6_CHECKSUM socket option
+ * (Linux extension).
+ */
+ return -EINVAL;
+ }
- /* You may get strange result with a positive odd offset;
- RFC2292bis agrees with me. */
- if (val > 0 && (val&1))
- return -EINVAL;
- if (val < 0) {
- rp->checksum = 0;
- } else {
- rp->checksum = 1;
- rp->offset = val;
- }
+ /* You may get strange result with a positive odd offset;
+ RFC2292bis agrees with me. */
+ if (val > 0 && (val&1))
+ return -EINVAL;
+ if (val < 0) {
+ rp->checksum = 0;
+ } else {
+ rp->checksum = 1;
+ rp->offset = val;
+ }
- return 0;
- break;
+ return 0;
- default:
- return -ENOPROTOOPT;
+ default:
+ return -ENOPROTOOPT;
}
}
static int rawv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
- switch(level) {
- case SOL_RAW:
- break;
+ switch (level) {
+ case SOL_RAW:
+ break;
- case SOL_ICMPV6:
- if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
- return -EOPNOTSUPP;
- return rawv6_seticmpfilter(sk, level, optname, optval,
- optlen);
- case SOL_IPV6:
- if (optname == IPV6_CHECKSUM)
- break;
- default:
- return ipv6_setsockopt(sk, level, optname, optval,
- optlen);
+ case SOL_ICMPV6:
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+ case SOL_IPV6:
+ if (optname == IPV6_CHECKSUM)
+ break;
+ default:
+ return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
@@ -1084,21 +1079,19 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
static int rawv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
- switch(level) {
- case SOL_RAW:
- break;
+ switch (level) {
+ case SOL_RAW:
+ break;
- case SOL_ICMPV6:
- if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
- return -EOPNOTSUPP;
- return rawv6_geticmpfilter(sk, level, optname, optval,
- optlen);
- case SOL_IPV6:
- if (optname == IPV6_CHECKSUM)
- break;
- default:
- return ipv6_getsockopt(sk, level, optname, optval,
- optlen);
+ case SOL_ICMPV6:
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+ case SOL_IPV6:
+ if (optname == IPV6_CHECKSUM)
+ break;
+ default:
+ return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
@@ -1128,35 +1121,51 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
- switch(cmd) {
- case SIOCOUTQ:
- {
- int amount = sk_wmem_alloc_get(sk);
+ switch (cmd) {
+ case SIOCOUTQ: {
+ int amount = sk_wmem_alloc_get(sk);
- return put_user(amount, (int __user *)arg);
- }
- case SIOCINQ:
- {
- struct sk_buff *skb;
- int amount = 0;
-
- spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL)
- amount = skb->tail - skb->transport_header;
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
- }
+ return put_user(amount, (int __user *)arg);
+ }
+ case SIOCINQ: {
+ struct sk_buff *skb;
+ int amount = 0;
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb != NULL)
+ amount = skb_tail_pointer(skb) -
+ skb_transport_header(skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ return put_user(amount, (int __user *)arg);
+ }
- default:
+ default:
#ifdef CONFIG_IPV6_MROUTE
- return ip6mr_ioctl(sk, cmd, (void __user *)arg);
+ return ip6mr_ioctl(sk, cmd, (void __user *)arg);
#else
- return -ENOIOCTLCMD;
+ return -ENOIOCTLCMD;
#endif
}
}
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case SIOCOUTQ:
+ case SIOCINQ:
+ return -ENOIOCTLCMD;
+ default:
+#ifdef CONFIG_IPV6_MROUTE
+ return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg));
+#else
+ return -ENOIOCTLCMD;
+#endif
+ }
+}
+#endif
+
static void rawv6_close(struct sock *sk, long timeout)
{
if (inet_sk(sk)->inet_num == IPPROTO_RAW)
@@ -1198,7 +1207,7 @@ struct proto rawv6_prot = {
.owner = THIS_MODULE,
.close = rawv6_close,
.destroy = raw6_destroy,
- .connect = ip6_datagram_connect,
+ .connect = ip6_datagram_connect_v6_only,
.disconnect = udp_disconnect,
.ioctl = rawv6_ioctl,
.init = rawv6_init_sk,
@@ -1215,48 +1224,21 @@ struct proto rawv6_prot = {
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_rawv6_setsockopt,
.compat_getsockopt = compat_rawv6_getsockopt,
+ .compat_ioctl = compat_rawv6_ioctl,
#endif
};
#ifdef CONFIG_PROC_FS
-static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
-{
- struct ipv6_pinfo *np = inet6_sk(sp);
- struct in6_addr *dest, *src;
- __u16 destp, srcp;
-
- dest = &np->daddr;
- src = &np->rcv_saddr;
- destp = 0;
- srcp = inet_sk(sp)->inet_num;
- seq_printf(seq,
- "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
- i,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- sk_wmem_alloc_get(sp),
- sk_rmem_alloc_get(sp),
- 0, 0L, 0,
- sock_i_uid(sp), 0,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
-}
-
static int raw6_seq_show(struct seq_file *seq, void *v)
{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq,
- " sl "
- "local_address "
- "remote_address "
- "st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
- else
- raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ struct sock *sp = v;
+ __u16 srcp = inet_sk(sp)->inet_num;
+ ip6_dgram_sock_seq_show(seq, v, srcp, 0,
+ raw_seq_private(seq)->bucket);
+ }
return 0;
}
@@ -1282,7 +1264,7 @@ static const struct file_operations raw6_seq_fops = {
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
+ if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
return -ENOMEM;
return 0;
@@ -1290,7 +1272,7 @@ static int __net_init raw6_init_net(struct net *net)
static void __net_exit raw6_exit_net(struct net *net)
{
- proc_net_remove(net, "raw6");
+ remove_proc_entry("raw6", net->proc_net);
}
static struct pernet_operations raw6_net_ops = {
@@ -1340,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 07beeb06f75..cc85a9ba501 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -26,6 +26,9 @@
* YOSHIFUJI,H. @USAGI Always remove fragment header to
* calculate ICV correctly.
*/
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -42,6 +45,7 @@
#include <linux/jhash.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -54,6 +58,7 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#include <net/inet_ecn.h>
struct ip6frag_skb_cb
{
@@ -63,36 +68,12 @@ struct ip6frag_skb_cb
#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
-
-/*
- * Equivalent of ipv4 struct ipq
- */
-
-struct frag_queue
-{
- struct inet_frag_queue q;
-
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
- int iif;
- unsigned int csum;
- __u16 nhoffset;
-};
-
-static struct inet_frags ip6_frags;
-
-int ip6_frag_nqueues(struct net *net)
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
- return net->ipv6.frags.nqueues;
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-int ip6_frag_mem(struct net *net)
-{
- return atomic_read(&net->ipv6.frags.mem);
-}
+static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
@@ -101,47 +82,36 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
* callers should be careful not to use the hash value outside the ipfrag_lock
* as doing so could race with ipfrag_hash_rnd being recalculated.
*/
-unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr, u32 rnd)
+static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
u32 c;
- c = jhash_3words((__force u32)saddr->s6_addr32[0],
- (__force u32)saddr->s6_addr32[1],
- (__force u32)saddr->s6_addr32[2],
- rnd);
-
- c = jhash_3words((__force u32)saddr->s6_addr32[3],
- (__force u32)daddr->s6_addr32[0],
- (__force u32)daddr->s6_addr32[1],
- c);
-
- c = jhash_3words((__force u32)daddr->s6_addr32[2],
- (__force u32)daddr->s6_addr32[3],
- (__force u32)id,
- c);
+ net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
+ c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, ip6_frags.rnd);
return c & (INETFRAGS_HASHSZ - 1);
}
-EXPORT_SYMBOL_GPL(inet6_hash_frag);
static unsigned int ip6_hashfn(struct inet_frag_queue *q)
{
struct frag_queue *fq;
fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
+ return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
}
-int ip6_frag_match(struct inet_frag_queue *q, void *a)
+bool ip6_frag_match(struct inet_frag_queue *q, void *a)
{
struct frag_queue *fq;
struct ip6_create_arg *arg = a;
fq = container_of(q, struct frag_queue, q);
- return (fq->id == arg->id && fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst));
+ return fq->id == arg->id &&
+ fq->user == arg->user &&
+ ipv6_addr_equal(&fq->saddr, arg->src) &&
+ ipv6_addr_equal(&fq->daddr, arg->dst);
}
EXPORT_SYMBOL(ip6_frag_match);
@@ -152,51 +122,24 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
fq->id = arg->id;
fq->user = arg->user;
- ipv6_addr_copy(&fq->saddr, arg->src);
- ipv6_addr_copy(&fq->daddr, arg->dst);
+ fq->saddr = *arg->src;
+ fq->daddr = *arg->dst;
+ fq->ecn = arg->ecn;
}
EXPORT_SYMBOL(ip6_frag_init);
-/* Destruction primitives. */
-
-static __inline__ void fq_put(struct frag_queue *fq)
-{
- inet_frag_put(&fq->q, &ip6_frags);
-}
-
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct frag_queue *fq)
-{
- inet_frag_kill(&fq->q, &ip6_frags);
-}
-
-static void ip6_evictor(struct net *net, struct inet6_dev *idev)
-{
- int evicted;
-
- evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
- if (evicted)
- IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
-}
-
-static void ip6_frag_expire(unsigned long data)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+ struct inet_frags *frags)
{
- struct frag_queue *fq;
struct net_device *dev = NULL;
- struct net *net;
-
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
spin_lock(&fq->q.lock);
if (fq->q.last_in & INET_FRAG_COMPLETE)
goto out;
- fq_kill(fq);
+ inet_frag_kill(&fq->q, frags);
- net = container_of(fq->q.net, struct net, ipv6.frags);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
@@ -220,11 +163,24 @@ out_rcu_unlock:
rcu_read_unlock();
out:
spin_unlock(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, frags);
+}
+EXPORT_SYMBOL(ip6_expire_frag_queue);
+
+static void ip6_frag_expire(unsigned long data)
+{
+ struct frag_queue *fq;
+ struct net *net;
+
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, ipv6.frags);
+
+ ip6_expire_frag_queue(net, fq, &ip6_frags);
}
static __inline__ struct frag_queue *
-fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst)
+fq_find(struct net *net, __be32 id, const struct in6_addr *src,
+ const struct in6_addr *dst, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -234,14 +190,16 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst)
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.ecn = ecn;
read_lock(&ip6_frags.lock);
- hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
+ hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
- if (q == NULL)
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
-
+ }
return container_of(q, struct frag_queue, q);
}
@@ -252,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net_device *dev;
int offset, end;
struct net *net = dev_net(skb_dst(skb)->dev);
+ u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE)
goto err;
@@ -269,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1;
}
+ ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
@@ -335,12 +296,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
}
found:
- /* RFC5722, Section 4:
- * When reassembling an IPv6 datagram, if
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
* one or more its constituent fragments is determined to be an
* overlapping fragment, the entire datagram (and any constituent
- * fragments, including those not yet received) MUST be silently
- * discarded.
+ * fragments) MUST be silently discarded.
*/
/* Check for overlap with preceding fragment. */
@@ -370,7 +330,8 @@ found:
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &fq->q.net->mem);
+ fq->ecn |= ecn;
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -381,16 +342,22 @@ found:
}
if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len)
- return ip6_frag_reasm(fq, prev, dev);
+ fq->q.meat == fq->q.len) {
+ int res;
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ res = ip6_frag_reasm(fq, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return res;
+ }
- write_lock(&ip6_frags.lock);
- list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
- write_unlock(&ip6_frags.lock);
+ skb_dst_drop(skb);
+ inet_frag_lru_move(&fq->q);
return -1;
discard_fq:
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &ip6_frags);
err:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -414,8 +381,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
unsigned int nhoff;
+ int sum_truesize;
+ u8 ecn;
+
+ inet_frag_kill(&fq->q, &ip6_frags);
- fq_kill(fq);
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+ goto out_fail;
/* Make the one we just received the head. */
if (prev) {
@@ -433,7 +406,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
skb_morph(head, fq->q.fragments);
head->next = fq->q.fragments->next;
- kfree_skb(fq->q.fragments);
+ consume_skb(fq->q.fragments);
fq->q.fragments = head;
}
@@ -448,7 +421,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
goto out_oversize;
/* Head of list must not be cloned. */
- if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(head, GFP_ATOMIC))
goto out_oom;
/* If the first fragment is fragmented itself, we split
@@ -464,14 +437,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
- for (i=0; i<skb_shinfo(head)->nr_frags; i++)
- plen += skb_shinfo(head)->frags[i].size;
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
clone->len = clone->data_len = head->data_len - plen;
head->data_len -= clone->len;
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -483,26 +456,41 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->mac_header += sizeof(struct frag_hdr);
head->network_header += sizeof(struct frag_hdr);
- skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- for (fp=head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
+ sum_truesize = head->truesize;
+ for (fp = head->next; fp;) {
+ bool headstolen;
+ int delta;
+ struct sk_buff *next = fp->next;
+
+ sum_truesize += fp->truesize;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
+
+ if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+ kfree_skb_partial(fp, headstolen);
+ } else {
+ if (!skb_shinfo(head)->frag_list)
+ skb_shinfo(head)->frag_list = fp;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ head->truesize += fp->truesize;
+ }
+ fp = next;
}
- atomic_sub(head->truesize, &fq->q.net->mem);
+ sub_frag_mem_limit(&fq->q, sum_truesize);
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
+ IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -518,12 +506,10 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
return 1;
out_oversize:
- if (net_ratelimit())
- printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
+ net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
goto out_fail;
out_oom:
- if (net_ratelimit())
- printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
+ net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
out_fail:
rcu_read_lock();
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
@@ -535,8 +521,12 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
{
struct frag_hdr *fhdr;
struct frag_queue *fq;
- struct ipv6hdr *hdr = ipv6_hdr(skb);
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int evicted;
+
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
@@ -558,13 +548,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
return 1;
}
- if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
- ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
+ evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
+ if (evicted)
+ IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS, evicted);
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+ ip6_frag_ecn(hdr));
if (fq != NULL) {
int ret;
@@ -573,7 +567,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, &ip6_frags);
return ret;
}
@@ -644,9 +638,13 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[0].data = &net->ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[2].data = &net->ipv6.frags.timeout;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
- hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
+ hdr = register_net_sysctl(net, "net/ipv6", table);
if (hdr == NULL)
goto err_reg;
@@ -674,7 +672,7 @@ static struct ctl_table_header *ip6_ctl_header;
static int ip6_frags_sysctl_register(void)
{
- ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
+ ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
ip6_frags_ctl_table);
return ip6_ctl_header == NULL ? -ENOMEM : 0;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 026caef0326..f23fbd28a50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -24,8 +24,11 @@
* Fixed routing subtrees.
*/
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/capability.h>
#include <linux/errno.h>
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/times.h>
#include <linux/socket.h>
@@ -54,6 +57,7 @@
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
+#include <net/nexthop.h>
#include <asm/uaccess.h>
@@ -61,21 +65,18 @@
#include <linux/sysctl.h>
#endif
-/* Set to 3 to get tracing. */
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RDBG(x) printk x
-#define RT6_TRACE(x...) printk(KERN_DEBUG x)
-#else
-#define RDBG(x)
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
-#define CLONE_OFFLINK_ROUTE 0
+enum rt6_nud_state {
+ RT6_NUD_FAIL_HARD = -3,
+ RT6_NUD_FAIL_PROBE = -2,
+ RT6_NUD_FAIL_DO_RR = -1,
+ RT6_NUD_SUCCEED = 1
+};
-static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
+static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
+ const struct in6_addr *dest);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int ip6_default_advmss(const struct dst_entry *dst);
+static unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
@@ -83,36 +84,154 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_prohibit(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu);
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
- struct in6_addr *prefix, int prefixlen,
- struct in6_addr *gwaddr, int ifindex,
- unsigned pref);
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex,
+ unsigned int pref);
static struct rt6_info *rt6_get_route_info(struct net *net,
- struct in6_addr *prefix, int prefixlen,
- struct in6_addr *gwaddr, int ifindex);
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex);
#endif
+static void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+ struct inet_peer_base *base;
+ struct inet_peer *peer;
+
+ base = inetpeer_base_ptr(rt->_rt6i_peer);
+ if (!base)
+ return;
+
+ peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+ if (peer) {
+ if (!rt6_set_peer(rt, peer))
+ inet_putpeer(peer);
+ }
+}
+
+static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
+{
+ if (rt6_has_peer(rt))
+ return rt6_peer_ptr(rt);
+
+ rt6_bind_peer(rt, create);
+ return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
+}
+
+static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
+{
+ return __rt6_get_peer(rt, 1);
+}
+
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ struct inet_peer *peer;
+ u32 *p = NULL;
+
+ if (!(rt->dst.flags & DST_HOST))
+ return NULL;
+
+ peer = rt6_get_peer_create(rt);
+ if (peer) {
+ u32 *old_p = __DST_METRICS_PTR(old);
+ unsigned long prev, new;
+
+ p = peer->metrics;
+ if (inet_metrics_new(peer) ||
+ (old & DST_METRICS_FORCE_OVERWRITE))
+ memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+ new = (unsigned long) p;
+ prev = cmpxchg(&dst->_metrics, old, new);
+
+ if (prev != old) {
+ p = __DST_METRICS_PTR(prev);
+ if (prev & DST_METRICS_READ_ONLY)
+ p = NULL;
+ }
+ }
+ return p;
+}
+
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+ struct sk_buff *skb,
+ const void *daddr)
+{
+ struct in6_addr *p = &rt->rt6i_gateway;
+
+ if (!ipv6_addr_any(p))
+ return (const void *) p;
+ else if (skb)
+ return &ipv6_hdr(skb)->daddr;
+ return daddr;
+}
+
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ struct neighbour *n;
+
+ daddr = choose_neigh_daddr(rt, skb, daddr);
+ n = __ipv6_neigh_lookup(dst->dev, daddr);
+ if (n)
+ return n;
+ return neigh_create(&nd_tbl, daddr, dst->dev);
+}
+
static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
.protocol = cpu_to_be16(ETH_P_IPV6),
.gc = ip6_dst_gc,
.gc_thresh = 1024,
.check = ip6_dst_check,
+ .default_advmss = ip6_default_advmss,
+ .mtu = ip6_mtu,
+ .cow_metrics = ipv6_cow_metrics,
.destroy = ip6_dst_destroy,
.ifdown = ip6_dst_ifdown,
.negative_advice = ip6_negative_advice,
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
+ .redirect = rt6_do_redirect,
.local_out = __ip6_local_out,
+ .neigh_lookup = ip6_neigh_lookup,
};
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
+{
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
+}
+
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+}
+
+static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
+ unsigned long old)
{
+ return NULL;
}
static struct dst_ops ip6_dst_blackhole_ops = {
@@ -120,16 +239,24 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.protocol = cpu_to_be16(ETH_P_IPV6),
.destroy = ip6_dst_destroy,
.check = ip6_dst_check,
+ .mtu = ip6_blackhole_mtu,
+ .default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
+ .redirect = ip6_rt_blackhole_redirect,
+ .cow_metrics = ip6_rt_blackhole_cow_metrics,
+ .neigh_lookup = ip6_neigh_lookup,
+};
+
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+ [RTAX_HOPLIMIT - 1] = 0,
};
-static struct rt6_info ip6_null_entry_template = {
+static const struct rt6_info ip6_null_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
- .obsolete = -1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -ENETUNREACH,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
},
@@ -141,16 +268,12 @@ static struct rt6_info ip6_null_entry_template = {
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
-
-static struct rt6_info ip6_prohibit_entry_template = {
+static const struct rt6_info ip6_prohibit_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
- .obsolete = -1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EACCES,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
},
@@ -160,15 +283,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
.rt6i_ref = ATOMIC_INIT(1),
};
-static struct rt6_info ip6_blk_hole_entry_template = {
+static const struct rt6_info ip6_blk_hole_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
- .obsolete = -1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = dst_discard,
- .output = dst_discard,
+ .output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -179,38 +301,46 @@ static struct rt6_info ip6_blk_hole_entry_template = {
#endif
/* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
+ struct net_device *dev,
+ int flags,
+ struct fib6_table *table)
{
- return (struct rt6_info *)dst_alloc(ops);
+ struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+ 0, DST_OBSOLETE_FORCE_CHK, flags);
+
+ if (rt) {
+ struct dst_entry *dst = &rt->dst;
+
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+ rt->rt6i_genid = rt_genid_ipv6(net);
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ }
+ return rt;
}
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
- struct inet_peer *peer = rt->rt6i_peer;
+ struct dst_entry *from = dst->from;
+
+ if (!(rt->dst.flags & DST_HOST))
+ dst_destroy_metrics_generic(dst);
- if (idev != NULL) {
+ if (idev) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
- if (peer) {
- BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
- rt->rt6i_peer = NULL;
- inet_putpeer(peer);
- }
-}
-
-void rt6_bind_peer(struct rt6_info *rt, int create)
-{
- struct inet_peer *peer;
- if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
- return;
+ dst->from = NULL;
+ dst_release(from);
- peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
- if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
+ if (rt6_has_peer(rt)) {
+ struct inet_peer *peer = rt6_peer_ptr(rt);
inet_putpeer(peer);
+ }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -221,26 +351,86 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev != NULL) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
+ if (dev != loopback_dev) {
+ if (idev && idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
}
}
}
-static __inline__ int rt6_check_expired(const struct rt6_info *rt)
+static bool rt6_check_expired(const struct rt6_info *rt)
+{
+ if (rt->rt6i_flags & RTF_EXPIRES) {
+ if (time_after(jiffies, rt->dst.expires))
+ return true;
+ } else if (rt->dst.from) {
+ return rt6_check_expired((struct rt6_info *) rt->dst.from);
+ }
+ return false;
+}
+
+/* Multipath route selection:
+ * Hash based function using packet header and flowlabel.
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+ const struct flowi6 *fl6)
{
- return (rt->rt6i_flags & RTF_EXPIRES) &&
- time_after(jiffies, rt->rt6i_expires);
+ unsigned int val = fl6->flowi6_proto;
+
+ val ^= ipv6_addr_hash(&fl6->daddr);
+ val ^= ipv6_addr_hash(&fl6->saddr);
+
+ /* Work only if this not encapsulated */
+ switch (fl6->flowi6_proto) {
+ case IPPROTO_UDP:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ val ^= (__force u16)fl6->fl6_sport;
+ val ^= (__force u16)fl6->fl6_dport;
+ break;
+
+ case IPPROTO_ICMPV6:
+ val ^= (__force u16)fl6->fl6_icmp_type;
+ val ^= (__force u16)fl6->fl6_icmp_code;
+ break;
+ }
+ /* RFC6438 recommands to use flowlabel */
+ val ^= (__force u32)fl6->flowlabel;
+
+ /* Perhaps, we need to tune, this function? */
+ val = val ^ (val >> 7) ^ (val >> 12);
+ return val % candidate_count;
}
-static inline int rt6_need_strict(struct in6_addr *daddr)
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+ struct flowi6 *fl6, int oif,
+ int strict)
{
- return ipv6_addr_type(daddr) &
- (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
+ struct rt6_info *sibling, *next_sibling;
+ int route_choosen;
+
+ route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+ /* Don't change the route, if route_choosen == 0
+ * (siblings does not include ourself)
+ */
+ if (route_choosen)
+ list_for_each_entry_safe(sibling, next_sibling,
+ &match->rt6i_siblings, rt6i_siblings) {
+ route_choosen--;
+ if (route_choosen == 0) {
+ if (rt6_score_route(sibling, oif, strict) < 0)
+ break;
+ match = sibling;
+ break;
+ }
+ }
+ return match;
}
/*
@@ -249,7 +439,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
static inline struct rt6_info *rt6_device_match(struct net *net,
struct rt6_info *rt,
- struct in6_addr *saddr,
+ const struct in6_addr *saddr,
int oif,
int flags)
{
@@ -260,13 +450,13 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
goto out;
for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
- struct net_device *dev = sprt->rt6i_dev;
+ struct net_device *dev = sprt->dst.dev;
if (oif) {
if (dev->ifindex == oif)
return sprt;
if (dev->flags & IFF_LOOPBACK) {
- if (sprt->rt6i_idev == NULL ||
+ if (!sprt->rt6i_idev ||
sprt->rt6i_idev->dev->ifindex != oif) {
if (flags & RT6_LOOKUP_F_IFACE && oif)
continue;
@@ -295,9 +485,27 @@ out:
}
#ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+ struct work_struct work;
+ struct in6_addr target;
+ struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+ struct in6_addr mcaddr;
+ struct __rt6_probe_work *work =
+ container_of(w, struct __rt6_probe_work, work);
+
+ addrconf_addr_solict_mult(&work->target, &mcaddr);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ dev_put(work->dev);
+ kfree(w);
+}
+
static void rt6_probe(struct rt6_info *rt)
{
- struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
+ struct neighbour *neigh;
/*
* Okay, this does not seem to be appropriate
* for now, however, we need to check if it
@@ -306,22 +514,40 @@ static void rt6_probe(struct rt6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (!neigh || (neigh->nud_state & NUD_VALID))
+ if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
return;
- read_lock_bh(&neigh->lock);
- if (!(neigh->nud_state & NUD_VALID) &&
+ rcu_read_lock_bh();
+ neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ if (neigh) {
+ write_lock(&neigh->lock);
+ if (neigh->nud_state & NUD_VALID)
+ goto out;
+ }
+
+ if (!neigh ||
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct in6_addr mcaddr;
- struct in6_addr *target;
+ struct __rt6_probe_work *work;
- neigh->updated = jiffies;
- read_unlock_bh(&neigh->lock);
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
- target = (struct in6_addr *)&neigh->primary_key;
- addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
- } else
- read_unlock_bh(&neigh->lock);
+ if (neigh && work)
+ __neigh_set_probe_once(neigh);
+
+ if (neigh)
+ write_unlock(&neigh->lock);
+
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
+ } else {
+out:
+ write_unlock(&neigh->lock);
+ }
+ rcu_read_unlock_bh();
}
#else
static inline void rt6_probe(struct rt6_info *rt)
@@ -334,7 +560,7 @@ static inline void rt6_probe(struct rt6_info *rt)
*/
static inline int rt6_check_dev(struct rt6_info *rt, int oif)
{
- struct net_device *dev = rt->rt6i_dev;
+ struct net_device *dev = rt->dst.dev;
if (!oif || dev->ifindex == oif)
return 2;
if ((dev->flags & IFF_LOOPBACK) &&
@@ -343,74 +569,91 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
return 0;
}
-static inline int rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
{
- struct neighbour *neigh = rt->rt6i_nexthop;
- int m;
+ struct neighbour *neigh;
+ enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
+
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
- m = 1;
- else if (neigh) {
- read_lock_bh(&neigh->lock);
+ return RT6_NUD_SUCCEED;
+
+ rcu_read_lock_bh();
+ neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ if (neigh) {
+ read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
- m = 2;
+ ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
- else if (neigh->nud_state & NUD_FAILED)
- m = 0;
-#endif
+ else if (!(neigh->nud_state & NUD_FAILED))
+ ret = RT6_NUD_SUCCEED;
else
- m = 1;
- read_unlock_bh(&neigh->lock);
- } else
- m = 0;
- return m;
+ ret = RT6_NUD_FAIL_PROBE;
+#endif
+ read_unlock(&neigh->lock);
+ } else {
+ ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
+ RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
+ }
+ rcu_read_unlock_bh();
+
+ return ret;
}
static int rt6_score_route(struct rt6_info *rt, int oif,
int strict)
{
- int m, n;
+ int m;
m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
- return -1;
+ return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
- n = rt6_check_neigh(rt);
- if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
- return -1;
+ if (strict & RT6_LOOKUP_F_REACHABLE) {
+ int n = rt6_check_neigh(rt);
+ if (n < 0)
+ return n;
+ }
return m;
}
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
- int *mpri, struct rt6_info *match)
+ int *mpri, struct rt6_info *match,
+ bool *do_rr)
{
int m;
+ bool match_do_rr = false;
if (rt6_check_expired(rt))
goto out;
m = rt6_score_route(rt, oif, strict);
- if (m < 0)
+ if (m == RT6_NUD_FAIL_DO_RR) {
+ match_do_rr = true;
+ m = 0; /* lowest valid score */
+ } else if (m == RT6_NUD_FAIL_HARD) {
goto out;
+ }
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(rt);
+
+ /* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) {
- if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(match);
+ *do_rr = match_do_rr;
*mpri = m;
match = rt;
- } else if (strict & RT6_LOOKUP_F_REACHABLE) {
- rt6_probe(rt);
}
-
out:
return match;
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
struct rt6_info *rr_head,
- u32 metric, int oif, int strict)
+ u32 metric, int oif, int strict,
+ bool *do_rr)
{
struct rt6_info *rt, *match;
int mpri = -1;
@@ -418,10 +661,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
for (rt = rr_head; rt && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
- match = find_match(rt, oif, strict, &mpri, match);
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
- match = find_match(rt, oif, strict, &mpri, match);
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
}
@@ -430,18 +673,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
struct rt6_info *match, *rt0;
struct net *net;
-
- RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
- __func__, fn->leaf, oif);
+ bool do_rr = false;
rt0 = fn->rr_ptr;
if (!rt0)
fn->rr_ptr = rt0 = fn->leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
+ match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ &do_rr);
- if (!match &&
- (strict & RT6_LOOKUP_F_REACHABLE)) {
+ if (do_rr) {
struct rt6_info *next = rt0->dst.rt6_next;
/* no entries matched; do round-robin */
@@ -452,16 +693,13 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
fn->rr_ptr = next;
}
- RT6_TRACE("%s() => %p\n",
- __func__, match);
-
- net = dev_net(rt0->rt6i_dev);
+ net = dev_net(rt0->dst.dev);
return match ? match : net->ipv6.ip6_null_entry;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
- struct in6_addr *gwaddr)
+ const struct in6_addr *gwaddr)
{
struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
@@ -505,8 +743,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
prefix = &prefix_buf;
}
- rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
- dev->ifindex);
+ if (rinfo->prefix_len == 0)
+ rt = rt6_get_dflt_router(gwaddr, dev);
+ else
+ rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
+ gwaddr, dev->ifindex);
if (rt && !lifetime) {
ip6_del_rt(rt);
@@ -521,13 +762,12 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
- if (!addrconf_finite_timeout(lifetime)) {
- rt->rt6i_flags &= ~RTF_EXPIRES;
- } else {
- rt->rt6i_expires = jiffies + HZ * lifetime;
- rt->rt6i_flags |= RTF_EXPIRES;
- }
- dst_release(&rt->dst);
+ if (!addrconf_finite_timeout(lifetime))
+ rt6_clean_expires(rt);
+ else
+ rt6_set_expires(rt, jiffies + HZ * lifetime);
+
+ ip6_rt_put(rt);
}
return 0;
}
@@ -549,21 +789,23 @@ do { \
goto restart; \
} \
} \
-} while(0)
+} while (0)
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
- struct flowi *fl, int flags)
+ struct flowi6 *fl6, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt;
read_lock_bh(&table->tb6_lock);
- fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
rt = fn->leaf;
- rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
- BACKTRACK(net, &fl->fl6_src);
+ rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+ BACKTRACK(net, &fl6->saddr);
out:
dst_use(&rt->dst, jiffies);
read_unlock_bh(&table->tb6_lock);
@@ -571,22 +813,29 @@ out:
}
+struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
+ int flags)
+{
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+}
+EXPORT_SYMBOL_GPL(ip6_route_lookup);
+
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int strict)
{
- struct flowi fl = {
- .oif = oif,
- .fl6_dst = *daddr,
+ struct flowi6 fl6 = {
+ .flowi6_oif = oif,
+ .daddr = *daddr,
};
struct dst_entry *dst;
int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
if (saddr) {
- memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
+ memcpy(&fl6.saddr, saddr, sizeof(*saddr));
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
@@ -603,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup);
be destroyed.
*/
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info);
+ err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -619,13 +869,14 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
int ip6_ins_rt(struct rt6_info *rt)
{
struct nl_info info = {
- .nl_net = dev_net(rt->rt6i_dev),
+ .nl_net = dev_net(rt->dst.dev),
};
- return __ip6_ins_rt(rt, &info);
+ return __ip6_ins_rt(rt, &info, NULL, 0);
}
-static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
- struct in6_addr *saddr)
+static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct rt6_info *rt;
@@ -633,81 +884,38 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
* Clone the route.
*/
- rt = ip6_rt_copy(ort);
+ rt = ip6_rt_copy(ort, daddr);
if (rt) {
- struct neighbour *neigh;
- int attempts = !in_softirq();
-
- if (!(rt->rt6i_flags&RTF_GATEWAY)) {
- if (rt->rt6i_dst.plen != 128 &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
- rt->rt6i_flags |= RTF_ANYCAST;
- ipv6_addr_copy(&rt->rt6i_gateway, daddr);
- }
+ if (ort->rt6i_dst.plen != 128 &&
+ ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
+ rt->rt6i_flags |= RTF_ANYCAST;
- ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
- rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
- rt->dst.flags |= DST_HOST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
- ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
+ rt->rt6i_src.addr = *saddr;
rt->rt6i_src.plen = 128;
}
#endif
-
- retry:
- neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
- if (IS_ERR(neigh)) {
- struct net *net = dev_net(rt->rt6i_dev);
- int saved_rt_min_interval =
- net->ipv6.sysctl.ip6_rt_gc_min_interval;
- int saved_rt_elasticity =
- net->ipv6.sysctl.ip6_rt_gc_elasticity;
-
- if (attempts-- > 0) {
- net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
- net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
-
- ip6_dst_gc(&net->ipv6.ip6_dst_ops);
-
- net->ipv6.sysctl.ip6_rt_gc_elasticity =
- saved_rt_elasticity;
- net->ipv6.sysctl.ip6_rt_gc_min_interval =
- saved_rt_min_interval;
- goto retry;
- }
-
- if (net_ratelimit())
- printk(KERN_WARNING
- "ipv6: Neighbour table overflow.\n");
- dst_free(&rt->dst);
- return NULL;
- }
- rt->rt6i_nexthop = neigh;
-
}
return rt;
}
-static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
+static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
+ const struct in6_addr *daddr)
{
- struct rt6_info *rt = ip6_rt_copy(ort);
- if (rt) {
- ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
- rt->rt6i_dst.plen = 128;
+ struct rt6_info *rt = ip6_rt_copy(ort, daddr);
+
+ if (rt)
rt->rt6i_flags |= RTF_CACHE;
- rt->dst.flags |= DST_HOST;
- rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
- }
return rt;
}
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
- struct flowi *fl, int flags)
+ struct flowi6 *fl6, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
@@ -722,12 +930,13 @@ relookup:
read_lock_bh(&table->tb6_lock);
restart_2:
- fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
rt = rt6_select(fn, oif, strict | reachable);
-
- BACKTRACK(net, &fl->fl6_src);
+ if (rt->rt6i_nsiblings)
+ rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
+ BACKTRACK(net, &fl6->saddr);
if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
@@ -735,17 +944,14 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
- else {
-#if CLONE_OFFLINK_ROUTE
- nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
+ if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
+ nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
+ else if (!(rt->dst.flags & DST_HOST))
+ nrt = rt6_alloc_clone(rt, &fl6->daddr);
+ else
goto out2;
-#endif
- }
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
rt = nrt ? : net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
@@ -762,7 +968,7 @@ restart:
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
goto relookup;
out:
@@ -780,81 +986,90 @@ out2:
}
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
- struct flowi *fl, int flags)
+ struct flowi6 *fl6, int flags)
+{
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+}
+
+static struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags)
{
- return ip6_pol_route(net, table, fl->iif, fl, flags);
+ if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}
void ip6_route_input(struct sk_buff *skb)
{
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
int flags = RT6_LOOKUP_F_HAS_SADDR;
- struct flowi fl = {
- .iif = skb->dev->ifindex,
- .fl6_dst = iph->daddr,
- .fl6_src = iph->saddr,
- .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
- .mark = skb->mark,
- .proto = iph->nexthdr,
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = iph->nexthdr,
};
- if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
- flags |= RT6_LOOKUP_F_IFACE;
-
- skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
+ skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
- struct flowi *fl, int flags)
+ struct flowi6 *fl6, int flags)
{
- return ip6_pol_route(net, table, fl->oif, fl, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
- struct flowi *fl)
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6)
{
int flags = 0;
- if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
+ fl6->flowi6_iif = LOOPBACK_IFINDEX;
+
+ if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl->fl6_src))
+ if (!ipv6_addr_any(&fl6->saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
- return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL(ip6_route_output);
-int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
+struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rt6_info *ort = (struct rt6_info *) *dstp;
- struct rt6_info *rt = (struct rt6_info *)
- dst_alloc(&ip6_dst_blackhole_ops);
+ struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
struct dst_entry *new = NULL;
+ rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
new = &rt->dst;
- atomic_set(&new->__refcnt, 1);
+ memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_init_peer(rt, net->ipv6.peers);
+
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
- memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
- new->dev = ort->dst.dev;
- if (new->dev)
- dev_hold(new->dev);
+ if (dst_metrics_read_only(&ort->dst))
+ new->_metrics = ort->dst._metrics;
+ else
+ dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
- rt->rt6i_expires = 0;
- ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
- rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ rt->rt6i_flags = ort->rt6i_flags;
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -865,11 +1080,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
dst_free(new);
}
- dst_release(*dstp);
- *dstp = new;
- return new ? 0 : -ENOMEM;
+ dst_release(dst_orig);
+ return new ? new : ERR_PTR(-ENOMEM);
}
-EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
/*
* Destination cache support functions
@@ -881,10 +1094,20 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt = (struct rt6_info *) dst;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
- return dst;
+ /* All IPV6 dsts are created with ->obsolete set to the value
+ * DST_OBSOLETE_FORCE_CHK which forces validation calls down
+ * into this function always.
+ */
+ if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
+ return NULL;
+
+ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ return NULL;
- return NULL;
+ if (rt6_check_expired(rt))
+ return NULL;
+
+ return dst;
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -913,33 +1136,188 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
- if (rt->rt6i_flags&RTF_CACHE) {
- dst_set_expires(&rt->dst, 0);
- rt->rt6i_flags |= RTF_EXPIRES;
- } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+ if (rt->rt6i_flags & RTF_CACHE) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
+ }
}
}
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info*)dst;
+ dst_confirm(dst);
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+ struct net *net = dev_net(dst->dev);
+
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
+ u32 features = dst_metric(dst, RTAX_FEATURES);
mtu = IPV6_MIN_MTU;
- dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(dst, RTAX_FEATURES, features);
}
- dst->metrics[RTAX_MTU-1] = mtu;
- call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
+ dst_metric_set(dst, RTAX_MTU, mtu);
+ rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
}
-static int ipv6_get_mtu(struct net_device *dev);
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+ int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = ip6_flowinfo(iph);
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+ ip6_update_pmtu(skb, sock_net(sk), mtu,
+ sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
+/* Handle redirects */
+struct ip6rd_flowi {
+ struct flowi6 fl6;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ int flags)
+{
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
+ struct rt6_info *rt;
+ struct fib6_node *fn;
+
+ /* Get the "current" route for this destination and
+ * check if the redirect has come from approriate router.
+ *
+ * RFC 4861 specifies that redirects should only be
+ * accepted if they come from the nexthop to the target.
+ * Due to the way the routes are chosen, this notion
+ * is a bit fuzzy and one might need to check all possible
+ * routes.
+ */
+
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt6_check_expired(rt))
+ continue;
+ if (rt->dst.error)
+ break;
+ if (!(rt->rt6i_flags & RTF_GATEWAY))
+ continue;
+ if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+ continue;
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ continue;
+ break;
+ }
+
+ if (!rt)
+ rt = net->ipv6.ip6_null_entry;
+ else if (rt->dst.error) {
+ rt = net->ipv6.ip6_null_entry;
+ goto out;
+ }
+ BACKTRACK(net, &fl6->saddr);
+out:
+ dst_hold(&rt->dst);
+
+ read_unlock_bh(&table->tb6_lock);
+
+ return rt;
+};
-static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
+static struct dst_entry *ip6_route_redirect(struct net *net,
+ const struct flowi6 *fl6,
+ const struct in6_addr *gateway)
{
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip6rd_flowi rdfl;
+
+ rdfl.fl6 = *fl6;
+ rdfl.gateway = *gateway;
+
+ return fib6_rule_lookup(net, &rdfl.fl6,
+ flags, __ip6_route_redirect);
+}
+
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = ip6_flowinfo(iph);
+
+ dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_redirect);
+
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+ u32 mark)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.daddr = msg->dest;
+ fl6.saddr = iph->daddr;
+
+ dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+
+void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+
+static unsigned int ip6_default_advmss(const struct dst_entry *dst)
+{
+ struct net_device *dev = dst->dev;
+ unsigned int mtu = dst_mtu(dst);
+ struct net *net = dev_net(dev);
+
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
@@ -956,51 +1334,55 @@ static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
return mtu;
}
+static unsigned int ip6_mtu(const struct dst_entry *dst)
+{
+ struct inet6_dev *idev;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ if (mtu)
+ goto out;
+
+ mtu = IPV6_MIN_MTU;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
+}
+
static struct dst_entry *icmp6_dst_gc_list;
static DEFINE_SPINLOCK(icmp6_dst_lock);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
- struct neighbour *neigh,
- const struct in6_addr *addr)
+ struct flowi6 *fl6)
{
+ struct dst_entry *dst;
struct rt6_info *rt;
struct inet6_dev *idev = in6_dev_get(dev);
struct net *net = dev_net(dev);
- if (unlikely(idev == NULL))
- return NULL;
+ if (unlikely(!idev))
+ return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
- if (unlikely(rt == NULL)) {
+ rt = ip6_dst_alloc(net, dev, 0, NULL);
+ if (unlikely(!rt)) {
in6_dev_put(idev);
+ dst = ERR_PTR(-ENOMEM);
goto out;
}
- dev_hold(dev);
- if (neigh)
- neigh_hold(neigh);
- else {
- neigh = ndisc_get_neigh(dev, addr);
- if (IS_ERR(neigh))
- neigh = NULL;
- }
-
- rt->rt6i_dev = dev;
- rt->rt6i_idev = idev;
- rt->rt6i_nexthop = neigh;
- atomic_set(&rt->dst.__refcnt, 1);
- rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+ rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
-
-#if 0 /* there's no chance to use these for ndisc */
- rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
- ? DST_HOST
- : 0;
- ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ atomic_set(&rt->dst.__refcnt, 1);
+ rt->rt6i_gateway = fl6->daddr;
+ rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
-#endif
+ rt->rt6i_idev = idev;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
spin_lock_bh(&icmp6_dst_lock);
rt->dst.next = icmp6_dst_gc_list;
@@ -1009,17 +1391,17 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
fib6_force_start_gc(net);
+ dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
+
out:
- return &rt->dst;
+ return dst;
}
int icmp6_dst_gc(void)
{
- struct dst_entry *dst, *next, **pprev;
+ struct dst_entry *dst, **pprev;
int more = 0;
- next = NULL;
-
spin_lock_bh(&icmp6_dst_lock);
pprev = &icmp6_dst_gc_list;
@@ -1059,7 +1441,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
static int ip6_dst_gc(struct dst_ops *ops)
{
- unsigned long now = jiffies;
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
@@ -1069,13 +1450,12 @@ static int ip6_dst_gc(struct dst_ops *ops)
int entries;
entries = dst_entries_get_fast(ops);
- if (time_after(rt_last_gc + rt_min_interval, now) &&
+ if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
entries <= rt_max_size)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
- net->ipv6.ip6_rt_last_gc = now;
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -1084,43 +1464,6 @@ out:
return entries > rt_max_size;
}
-/* Clean host part of a prefix. Not necessary in radix tree,
- but results in cleaner routing tables.
-
- Remove it only when all the things will work!
- */
-
-static int ipv6_get_mtu(struct net_device *dev)
-{
- int mtu = IPV6_MIN_MTU;
- struct inet6_dev *idev;
-
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
- return mtu;
-}
-
-int ip6_dst_hoplimit(struct dst_entry *dst)
-{
- int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hoplimit < 0) {
- struct net_device *dev = dst->dev;
- struct inet6_dev *idev;
-
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev)
- hoplimit = idev->cnf.hop_limit;
- else
- hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
- rcu_read_unlock();
- }
- return hoplimit;
-}
-
/*
*
*/
@@ -1154,23 +1497,33 @@ int ip6_route_add(struct fib6_config *cfg)
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
- table = fib6_new_table(net, cfg->fc_table);
- if (table == NULL) {
- err = -ENOBUFS;
- goto out;
+ err = -ENOBUFS;
+ if (cfg->fc_nlinfo.nlh &&
+ !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
+ table = fib6_get_table(net, cfg->fc_table);
+ if (!table) {
+ pr_warn("NLM_F_CREATE should be specified when creating new route\n");
+ table = fib6_new_table(net, cfg->fc_table);
+ }
+ } else {
+ table = fib6_new_table(net, cfg->fc_table);
}
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
+ if (!table)
+ goto out;
+
+ rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
- if (rt == NULL) {
+ if (!rt) {
err = -ENOMEM;
goto out;
}
- rt->dst.obsolete = -1;
- rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
- jiffies + clock_t_to_jiffies(cfg->fc_expires) :
- 0;
+ if (cfg->fc_flags & RTF_EXPIRES)
+ rt6_set_expires(rt, jiffies +
+ clock_t_to_jiffies(cfg->fc_expires));
+ else
+ rt6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -1189,8 +1542,10 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128)
- rt->dst.flags = DST_HOST;
+ if (rt->rt6i_dst.plen == 128) {
+ rt->dst.flags |= DST_HOST;
+ dst_metrics_set_force_overwrite(&rt->dst);
+ }
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1203,8 +1558,9 @@ int ip6_route_add(struct fib6_config *cfg)
they would result in kernel looping; promote them to reject routes
*/
if ((cfg->fc_flags & RTF_REJECT) ||
- (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
- && !(cfg->fc_flags&RTF_LOCAL))) {
+ (dev && (dev->flags & IFF_LOOPBACK) &&
+ !(addr_type & IPV6_ADDR_LOOPBACK) &&
+ !(cfg->fc_flags & RTF_LOCAL))) {
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
@@ -1219,19 +1575,35 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
}
}
- rt->dst.output = ip6_pkt_discard_out;
- rt->dst.input = ip6_pkt_discard;
- rt->dst.error = -ENETUNREACH;
rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+ switch (cfg->fc_type) {
+ case RTN_BLACKHOLE:
+ rt->dst.error = -EINVAL;
+ rt->dst.output = dst_discard_sk;
+ rt->dst.input = dst_discard;
+ break;
+ case RTN_PROHIBIT:
+ rt->dst.error = -EACCES;
+ rt->dst.output = ip6_pkt_prohibit_out;
+ rt->dst.input = ip6_pkt_prohibit;
+ break;
+ case RTN_THROW:
+ default:
+ rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
+ : -ENETUNREACH;
+ rt->dst.output = ip6_pkt_discard_out;
+ rt->dst.input = ip6_pkt_discard;
+ break;
+ }
goto install_route;
}
if (cfg->fc_flags & RTF_GATEWAY) {
- struct in6_addr *gw_addr;
+ const struct in6_addr *gw_addr;
int gwa_type;
gw_addr = &cfg->fc_gateway;
- ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
+ rt->rt6i_gateway = *gw_addr;
gwa_type = ipv6_addr_type(gw_addr);
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1245,84 +1617,61 @@ int ip6_route_add(struct fib6_config *cfg)
some exceptions. --ANK
*/
err = -EINVAL;
- if (!(gwa_type&IPV6_ADDR_UNICAST))
+ if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
- if (grt == NULL)
+ if (!grt)
goto out;
if (dev) {
- if (dev != grt->rt6i_dev) {
- dst_release(&grt->dst);
+ if (dev != grt->dst.dev) {
+ ip6_rt_put(grt);
goto out;
}
} else {
- dev = grt->rt6i_dev;
+ dev = grt->dst.dev;
idev = grt->rt6i_idev;
dev_hold(dev);
in6_dev_hold(grt->rt6i_idev);
}
- if (!(grt->rt6i_flags&RTF_GATEWAY))
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
err = 0;
- dst_release(&grt->dst);
+ ip6_rt_put(grt);
if (err)
goto out;
}
err = -EINVAL;
- if (dev == NULL || (dev->flags&IFF_LOOPBACK))
+ if (!dev || (dev->flags & IFF_LOOPBACK))
goto out;
}
err = -ENODEV;
- if (dev == NULL)
+ if (!dev)
goto out;
- if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
- rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
- if (IS_ERR(rt->rt6i_nexthop)) {
- err = PTR_ERR(rt->rt6i_nexthop);
- rt->rt6i_nexthop = NULL;
+ if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
+ if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+ err = -EINVAL;
goto out;
}
- }
+ rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
+ rt->rt6i_prefsrc.plen = 128;
+ } else
+ rt->rt6i_prefsrc.plen = 0;
rt->rt6i_flags = cfg->fc_flags;
install_route:
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- if (type > RTAX_MAX) {
- err = -EINVAL;
- goto out;
- }
-
- rt->dst.metrics[type - 1] = nla_get_u32(nla);
- }
- }
- }
-
- if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
- if (!dst_mtu(&rt->dst))
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
- if (!dst_metric(&rt->dst, RTAX_ADVMSS))
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
cfg->fc_nlinfo.nl_net = dev_net(dev);
- return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
out:
if (dev)
@@ -1338,26 +1687,27 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
{
int err;
struct fib6_table *table;
- struct net *net = dev_net(rt->rt6i_dev);
+ struct net *net = dev_net(rt->dst.dev);
- if (rt == net->ipv6.ip6_null_entry)
- return -ENOENT;
+ if (rt == net->ipv6.ip6_null_entry) {
+ err = -ENOENT;
+ goto out;
+ }
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
-
err = fib6_del(rt, info);
- dst_release(&rt->dst);
-
write_unlock_bh(&table->tb6_lock);
+out:
+ ip6_rt_put(rt);
return err;
}
int ip6_del_rt(struct rt6_info *rt)
{
struct nl_info info = {
- .nl_net = dev_net(rt->rt6i_dev),
+ .nl_net = dev_net(rt->dst.dev),
};
return __ip6_del_rt(rt, &info);
}
@@ -1370,7 +1720,7 @@ static int ip6_route_del(struct fib6_config *cfg)
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
- if (table == NULL)
+ if (!table)
return err;
read_lock_bh(&table->tb6_lock);
@@ -1382,8 +1732,8 @@ static int ip6_route_del(struct fib6_config *cfg)
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (cfg->fc_ifindex &&
- (rt->rt6i_dev == NULL ||
- rt->rt6i_dev->ifindex != cfg->fc_ifindex))
+ (!rt->dst.dev ||
+ rt->dst.dev->ifindex != cfg->fc_ifindex))
continue;
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
@@ -1401,109 +1751,84 @@ static int ip6_route_del(struct fib6_config *cfg)
return err;
}
-/*
- * Handle redirects
- */
-struct ip6rd_flowi {
- struct flowi fl;
- struct in6_addr gateway;
-};
-
-static struct rt6_info *__ip6_route_redirect(struct net *net,
- struct fib6_table *table,
- struct flowi *fl,
- int flags)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
- struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
- struct rt6_info *rt;
- struct fib6_node *fn;
+ struct net *net = dev_net(skb->dev);
+ struct netevent_redirect netevent;
+ struct rt6_info *rt, *nrt = NULL;
+ struct ndisc_options ndopts;
+ struct inet6_dev *in6_dev;
+ struct neighbour *neigh;
+ struct rd_msg *msg;
+ int optlen, on_link;
+ u8 *lladdr;
- /*
- * Get the "current" route for this destination and
- * check if the redirect has come from approriate router.
- *
- * RFC 2461 specifies that redirects should only be
- * accepted if they come from the nexthop to the target.
- * Due to the way the routes are chosen, this notion
- * is a bit fuzzy and one might need to check all possible
- * routes.
- */
+ optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
+ optlen -= sizeof(*msg);
- read_lock_bh(&table->tb6_lock);
- fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
-restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- /*
- * Current route is on-link; redirect is always invalid.
- *
- * Seems, previous statement is not true. It could
- * be node, which looks for us as on-link (f.e. proxy ndisc)
- * But then router serving it might decide, that we should
- * know truth 8)8) --ANK (980726).
- */
- if (rt6_check_expired(rt))
- continue;
- if (!(rt->rt6i_flags & RTF_GATEWAY))
- continue;
- if (fl->oif != rt->rt6i_dev->ifindex)
- continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
- continue;
- break;
+ if (optlen < 0) {
+ net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
+ return;
}
- if (!rt)
- rt = net->ipv6.ip6_null_entry;
- BACKTRACK(net, &fl->fl6_src);
-out:
- dst_hold(&rt->dst);
-
- read_unlock_bh(&table->tb6_lock);
-
- return rt;
-};
+ msg = (struct rd_msg *)icmp6_hdr(skb);
-static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
- struct in6_addr *src,
- struct in6_addr *gateway,
- struct net_device *dev)
-{
- int flags = RT6_LOOKUP_F_HAS_SADDR;
- struct net *net = dev_net(dev);
- struct ip6rd_flowi rdfl = {
- .fl = {
- .oif = dev->ifindex,
- .fl6_dst = *dest,
- .fl6_src = *src,
- },
- };
+ if (ipv6_addr_is_multicast(&msg->dest)) {
+ net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
+ return;
+ }
- ipv6_addr_copy(&rdfl.gateway, gateway);
+ on_link = 0;
+ if (ipv6_addr_equal(&msg->dest, &msg->target)) {
+ on_link = 1;
+ } else if (ipv6_addr_type(&msg->target) !=
+ (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+ net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
+ return;
+ }
- if (rt6_need_strict(dest))
- flags |= RT6_LOOKUP_F_IFACE;
+ in6_dev = __in6_dev_get(skb->dev);
+ if (!in6_dev)
+ return;
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ return;
- return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
- flags, __ip6_route_redirect);
-}
+ /* RFC2461 8.1:
+ * The IP source address of the Redirect MUST be the same as the current
+ * first-hop router for the specified ICMP Destination Address.
+ */
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
- struct in6_addr *saddr,
- struct neighbour *neigh, u8 *lladdr, int on_link)
-{
- struct rt6_info *rt, *nrt = NULL;
- struct netevent_redirect netevent;
- struct net *net = dev_net(neigh->dev);
+ if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+ net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
+ return;
+ }
- rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+ lladdr = NULL;
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
+ return;
+ }
+ }
+ rt = (struct rt6_info *) dst;
if (rt == net->ipv6.ip6_null_entry) {
- if (net_ratelimit())
- printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
- "for redirect target\n");
- goto out;
+ net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
+ return;
}
+ /* Redirect received -> path was valid.
+ * Look, redirects are sent only in response to data packets,
+ * so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->dst);
+
+ neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
+ if (!neigh)
+ return;
+
/*
* We have finally decided to accept it.
*/
@@ -1515,184 +1840,71 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
NEIGH_UPDATE_F_ISROUTER))
);
- /*
- * Redirect received -> path was valid.
- * Look, redirects are sent only in response to data packets,
- * so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Duplicate redirect: silently ignore. */
- if (neigh == rt->dst.neighbour)
- goto out;
-
- nrt = ip6_rt_copy(rt);
- if (nrt == NULL)
+ nrt = ip6_rt_copy(rt, &msg->dest);
+ if (!nrt)
goto out;
nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
- ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
- nrt->rt6i_dst.plen = 128;
- nrt->dst.flags |= DST_HOST;
-
- ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
- nrt->rt6i_nexthop = neigh_clone(neigh);
- /* Reset pmtu, it may be better */
- nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
- nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
- dst_mtu(&nrt->dst));
+ nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
if (ip6_ins_rt(nrt))
goto out;
netevent.old = &rt->dst;
netevent.new = &nrt->dst;
+ netevent.daddr = &msg->dest;
+ netevent.neigh = neigh;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
- if (rt->rt6i_flags&RTF_CACHE) {
- ip6_del_rt(rt);
- return;
- }
-
-out:
- dst_release(&rt->dst);
-}
-
-/*
- * Handle ICMP "packet too big" messages
- * i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
- struct net *net, u32 pmtu, int ifindex)
-{
- struct rt6_info *rt, *nrt;
- int allfrag = 0;
-
- rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
- if (rt == NULL)
- return;
-
- if (pmtu >= dst_mtu(&rt->dst))
- goto out;
-
- if (pmtu < IPV6_MIN_MTU) {
- /*
- * According to RFC2460, PMTU is set to the IPv6 Minimum Link
- * MTU (1280) and a fragment header should always be included
- * after a node receiving Too Big message reporting PMTU is
- * less than the IPv6 Minimum Link MTU.
- */
- pmtu = IPV6_MIN_MTU;
- allfrag = 1;
- }
-
- /* New mtu received -> path was valid.
- They are sent only in response to data packets,
- so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Host route. If it is static, it would be better
- not to override it, but add new one, so that
- when cache entry will expire old pmtu
- would return automatically.
- */
if (rt->rt6i_flags & RTF_CACHE) {
- rt->dst.metrics[RTAX_MTU-1] = pmtu;
- if (allfrag)
- rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
- dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
- rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
- goto out;
+ rt = (struct rt6_info *) dst_clone(&rt->dst);
+ ip6_del_rt(rt);
}
- /* Network route.
- Two cases are possible:
- 1. It is connected route. Action: COW
- 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
- */
- if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, daddr, saddr);
- else
- nrt = rt6_alloc_clone(rt, daddr);
-
- if (nrt) {
- nrt->dst.metrics[RTAX_MTU-1] = pmtu;
- if (allfrag)
- nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-
- /* According to RFC 1981, detecting PMTU increase shouldn't be
- * happened within 5 mins, the recommended timer is 10 mins.
- * Here this route expiration time is set to ip6_rt_mtu_expires
- * which is 10 mins. After 10 mins the decreased pmtu is expired
- * and detecting PMTU increase will be automatically happened.
- */
- dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
- nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
-
- ip6_ins_rt(nrt);
- }
out:
- dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
- struct net_device *dev, u32 pmtu)
-{
- struct net *net = dev_net(dev);
-
- /*
- * RFC 1981 states that a node "MUST reduce the size of the packets it
- * is sending along the path" that caused the Packet Too Big message.
- * Since it's not possible in the general case to determine which
- * interface was used to send the original packet, we update the MTU
- * on the interface that will be used to send future packets. We also
- * update the MTU on the interface that received the Packet Too Big in
- * case the original packet was forced out that interface with
- * SO_BINDTODEVICE or similar. This is the next best thing to the
- * correct behaviour, which would be to update the MTU on all
- * interfaces.
- */
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+ neigh_release(neigh);
}
/*
* Misc support functions
*/
-static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
+static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
+ const struct in6_addr *dest)
{
- struct net *net = dev_net(ort->rt6i_dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
+ struct net *net = dev_net(ort->dst.dev);
+ struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+ ort->rt6i_table);
if (rt) {
rt->dst.input = ort->dst.input;
rt->dst.output = ort->dst.output;
+ rt->dst.flags |= DST_HOST;
- memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+ rt->rt6i_dst.addr = *dest;
+ rt->rt6i_dst.plen = 128;
+ dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
- rt->dst.dev = ort->dst.dev;
- if (rt->dst.dev)
- dev_hold(rt->dst.dev);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies;
- rt->rt6i_expires = 0;
- ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
- rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
+ if (ort->rt6i_flags & RTF_GATEWAY)
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ else
+ rt->rt6i_gateway = *dest;
+ rt->rt6i_flags = ort->rt6i_flags;
+ rt6_set_from(rt, ort);
rt->rt6i_metric = 0;
- memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
#ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
+ memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
rt->rt6i_table = ort->rt6i_table;
}
return rt;
@@ -1700,24 +1912,24 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_get_route_info(struct net *net,
- struct in6_addr *prefix, int prefixlen,
- struct in6_addr *gwaddr, int ifindex)
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex)
{
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
table = fib6_get_table(net, RT6_TABLE_INFO);
- if (table == NULL)
+ if (!table)
return NULL;
- write_lock_bh(&table->tb6_lock);
+ read_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
if (!fn)
goto out;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if (rt->rt6i_dev->ifindex != ifindex)
+ if (rt->dst.dev->ifindex != ifindex)
continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue;
@@ -1727,14 +1939,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
break;
}
out:
- write_unlock_bh(&table->tb6_lock);
+ read_unlock_bh(&table->tb6_lock);
return rt;
}
static struct rt6_info *rt6_add_route_info(struct net *net,
- struct in6_addr *prefix, int prefixlen,
- struct in6_addr *gwaddr, int ifindex,
- unsigned pref)
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex,
+ unsigned int pref)
{
struct fib6_config cfg = {
.fc_table = RT6_TABLE_INFO,
@@ -1743,13 +1955,13 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
- .fc_nlinfo.pid = 0,
+ .fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
};
- ipv6_addr_copy(&cfg.fc_dst, prefix);
- ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+ cfg.fc_dst = *prefix;
+ cfg.fc_gateway = *gwaddr;
/* We should treat it as a default route if prefix length is 0. */
if (!prefixlen)
@@ -1761,29 +1973,29 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
}
#endif
-struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
+struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
{
struct rt6_info *rt;
struct fib6_table *table;
table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
- if (table == NULL)
+ if (!table)
return NULL;
- write_lock_bh(&table->tb6_lock);
+ read_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
- if (dev == rt->rt6i_dev &&
+ if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
break;
}
if (rt)
dst_hold(&rt->dst);
- write_unlock_bh(&table->tb6_lock);
+ read_unlock_bh(&table->tb6_lock);
return rt;
}
-struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
+struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref)
{
@@ -1793,12 +2005,12 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
- .fc_nlinfo.pid = 0,
+ .fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = dev_net(dev),
};
- ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+ cfg.fc_gateway = *gwaddr;
ip6_route_add(&cfg);
@@ -1812,13 +2024,14 @@ void rt6_purge_dflt_routers(struct net *net)
/* NOTE: Keep consistent with rt6_get_dflt_router */
table = fib6_get_table(net, RT6_TABLE_DFLT);
- if (table == NULL)
+ if (!table)
return;
restart:
read_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
- if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
+ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+ (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
ip6_del_rt(rt);
@@ -1844,9 +2057,9 @@ static void rtmsg_to_fib6_config(struct net *net,
cfg->fc_nlinfo.nl_net = net;
- ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
- ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
- ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
+ cfg->fc_dst = rtmsg->rtmsg_dst;
+ cfg->fc_src = rtmsg->rtmsg_src;
+ cfg->fc_gateway = rtmsg->rtmsg_gateway;
}
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -1858,7 +2071,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch(cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = copy_from_user(&rtmsg, arg,
sizeof(struct in6_rtmsg));
@@ -1918,78 +2131,52 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-
static int ip6_pkt_prohibit(struct sk_buff *skb)
{
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}
-#endif
-
/*
* Allocate a dst for local (unicast / anycast) address.
*/
struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
- int anycast)
+ bool anycast)
{
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
- struct neighbour *neigh;
-
- if (rt == NULL) {
- if (net_ratelimit())
- pr_warning("IPv6: Maximum number of routes reached,"
- " consider increasing route/max_size.\n");
+ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
+ DST_NOCOUNT, NULL);
+ if (!rt)
return ERR_PTR(-ENOMEM);
- }
- dev_hold(net->loopback_dev);
in6_dev_hold(idev);
- rt->dst.flags = DST_HOST;
+ rt->dst.flags |= DST_HOST;
rt->dst.input = ip6_input;
rt->dst.output = ip6_output;
- rt->rt6i_dev = net->loopback_dev;
rt->rt6i_idev = idev;
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
- rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
- rt->dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
if (anycast)
rt->rt6i_flags |= RTF_ANYCAST;
else
rt->rt6i_flags |= RTF_LOCAL;
- neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
- if (IS_ERR(neigh)) {
- dst_free(&rt->dst);
- /* We are casting this because that is the return
- * value type. But an errno encoded pointer is the
- * same regardless of the underlying pointer type,
- * and that's what we are returning. So this is OK.
- */
- return (struct rt6_info *) neigh;
- }
- rt->rt6i_nexthop = neigh;
-
- ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ rt->rt6i_gateway = *addr;
+ rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
@@ -1998,6 +2185,76 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
return rt;
}
+int ip6_route_get_saddr(struct net *net,
+ struct rt6_info *rt,
+ const struct in6_addr *daddr,
+ unsigned int prefs,
+ struct in6_addr *saddr)
+{
+ struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
+ int err = 0;
+ if (rt->rt6i_prefsrc.plen)
+ *saddr = rt->rt6i_prefsrc.addr;
+ else
+ err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+ daddr, prefs, saddr);
+ return err;
+}
+
+/* remove deleted ip from prefsrc entries */
+struct arg_dev_net_ip {
+ struct net_device *dev;
+ struct net *net;
+ struct in6_addr *addr;
+};
+
+static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+{
+ struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
+ struct net *net = ((struct arg_dev_net_ip *)arg)->net;
+ struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
+
+ if (((void *)rt->dst.dev == dev || !dev) &&
+ rt != net->ipv6.ip6_null_entry &&
+ ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ /* remove prefsrc entry */
+ rt->rt6i_prefsrc.plen = 0;
+ }
+ return 0;
+}
+
+void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
+{
+ struct net *net = dev_net(ifp->idev->dev);
+ struct arg_dev_net_ip adni = {
+ .dev = ifp->idev->dev,
+ .net = net,
+ .addr = &ifp->addr,
+ };
+ fib6_clean_all(net, fib6_remove_prefsrc, &adni);
+}
+
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+/* Remove routers and update dst entries when gateway turn into host. */
+static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+{
+ struct in6_addr *gateway = (struct in6_addr *)arg;
+
+ if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
+ ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ return -1;
+ }
+ return 0;
+}
+
+void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
+{
+ fib6_clean_all(net, fib6_clean_tohost, gateway);
+}
+
struct arg_dev_net {
struct net_device *dev;
struct net *net;
@@ -2005,14 +2262,13 @@ struct arg_dev_net {
static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
- struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
- struct net *net = ((struct arg_dev_net *)arg)->net;
+ const struct arg_dev_net *adn = arg;
+ const struct net_device *dev = adn->dev;
- if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
- rt != net->ipv6.ip6_null_entry) {
- RT6_TRACE("deleted by ifdown %p\n", rt);
+ if ((rt->dst.dev == dev || !dev) &&
+ rt != adn->net->ipv6.ip6_null_entry)
return -1;
- }
+
return 0;
}
@@ -2023,21 +2279,19 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
.net = net,
};
- fib6_clean_all(net, fib6_ifdown, 0, &adn);
+ fib6_clean_all(net, fib6_ifdown, &adn);
icmp6_clean_all(fib6_ifdown, &adn);
}
-struct rt6_mtu_change_arg
-{
+struct rt6_mtu_change_arg {
struct net_device *dev;
- unsigned mtu;
+ unsigned int mtu;
};
static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
- struct net *net = dev_net(arg->dev);
/* In IPv6 pmtu discovery is not optional,
so that RTAX_MTU lock cannot disable it.
@@ -2046,7 +2300,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
*/
idev = __in6_dev_get(arg->dev);
- if (idev == NULL)
+ if (!idev)
return 0;
/* For administrative MTU increase, there is no way to discover
@@ -2063,25 +2317,24 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
also have the lowest MTU, TOO BIG MESSAGE will be lead to
PMTU discouvery.
*/
- if (rt->rt6i_dev == arg->dev &&
+ if (rt->dst.dev == arg->dev &&
!dst_metric_locked(&rt->dst, RTAX_MTU) &&
(dst_mtu(&rt->dst) >= arg->mtu ||
(dst_mtu(&rt->dst) < arg->mtu &&
dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
- rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
return 0;
}
-void rt6_mtu_change(struct net_device *dev, unsigned mtu)
+void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
{
struct rt6_mtu_change_arg arg = {
.dev = dev,
.mtu = mtu,
};
- fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
+ fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
@@ -2090,6 +2343,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2112,14 +2366,18 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_src_len = rtm->rtm_src_len;
cfg->fc_flags = RTF_UP;
cfg->fc_protocol = rtm->rtm_protocol;
+ cfg->fc_type = rtm->rtm_type;
- if (rtm->rtm_type == RTN_UNREACHABLE)
+ if (rtm->rtm_type == RTN_UNREACHABLE ||
+ rtm->rtm_type == RTN_BLACKHOLE ||
+ rtm->rtm_type == RTN_PROHIBIT ||
+ rtm->rtm_type == RTN_THROW)
cfg->fc_flags |= RTF_REJECT;
if (rtm->rtm_type == RTN_LOCAL)
cfg->fc_flags |= RTF_LOCAL;
- cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2146,6 +2404,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
}
+ if (tb[RTA_PREFSRC])
+ nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
+
if (tb[RTA_OIF])
cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
@@ -2160,12 +2421,72 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_TABLE])
cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+ if (tb[RTA_MULTIPATH]) {
+ cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+ cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+ }
+
err = 0;
errout:
return err;
}
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+ struct fib6_config r_cfg;
+ struct rtnexthop *rtnh;
+ int remaining;
+ int attrlen;
+ int err = 0, last_err = 0;
+
+beginning:
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+ remaining = cfg->fc_mp_len;
+
+ /* Parse a Multipath Entry */
+ while (rtnh_ok(rtnh, remaining)) {
+ memcpy(&r_cfg, cfg, sizeof(*cfg));
+ if (rtnh->rtnh_ifindex)
+ r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+ err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+ if (err) {
+ last_err = err;
+ /* If we are trying to remove a route, do not stop the
+ * loop when ip6_route_del() fails (because next hop is
+ * already gone), we should try to remove all next hops.
+ */
+ if (add) {
+ /* If add fails, we should try to delete all
+ * next hops that have been already added.
+ */
+ add = 0;
+ goto beginning;
+ }
+ }
+ /* Because each route is added like a single route we remove
+ * this flag after the first nexthop (if there is a collision,
+ * we have already fail to add the first nexthop:
+ * fib6_add_rt2node() has reject it).
+ */
+ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return last_err;
+}
+
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct fib6_config cfg;
int err;
@@ -2174,10 +2495,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_del(&cfg);
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 0);
+ else
+ return ip6_route_del(&cfg);
}
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct fib6_config cfg;
int err;
@@ -2186,7 +2510,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_add(&cfg);
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 1);
+ else
+ return ip6_route_add(&cfg);
}
static inline size_t rt6_nlmsg_size(void)
@@ -2207,7 +2534,7 @@ static inline size_t rt6_nlmsg_size(void)
static int rt6_fill_node(struct net *net,
struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
- int iif, int type, u32 pid, u32 seq,
+ int iif, int type, u32 portid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
struct rtmsg *rtm;
@@ -2222,8 +2549,8 @@ static int rt6_fill_node(struct net *net,
}
}
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
- if (nlh == NULL)
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
+ if (!nlh)
return -EMSGSIZE;
rtm = nlmsg_data(nlh);
@@ -2236,39 +2563,60 @@ static int rt6_fill_node(struct net *net,
else
table = RT6_TABLE_UNSPEC;
rtm->rtm_table = table;
- NLA_PUT_U32(skb, RTA_TABLE, table);
- if (rt->rt6i_flags&RTF_REJECT)
- rtm->rtm_type = RTN_UNREACHABLE;
- else if (rt->rt6i_flags&RTF_LOCAL)
+ if (nla_put_u32(skb, RTA_TABLE, table))
+ goto nla_put_failure;
+ if (rt->rt6i_flags & RTF_REJECT) {
+ switch (rt->dst.error) {
+ case -EINVAL:
+ rtm->rtm_type = RTN_BLACKHOLE;
+ break;
+ case -EACCES:
+ rtm->rtm_type = RTN_PROHIBIT;
+ break;
+ case -EAGAIN:
+ rtm->rtm_type = RTN_THROW;
+ break;
+ default:
+ rtm->rtm_type = RTN_UNREACHABLE;
+ break;
+ }
+ }
+ else if (rt->rt6i_flags & RTF_LOCAL)
rtm->rtm_type = RTN_LOCAL;
- else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+ else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
rtm->rtm_type = RTN_LOCAL;
else
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
- if (rt->rt6i_flags&RTF_DYNAMIC)
+ if (rt->rt6i_flags & RTF_DYNAMIC)
rtm->rtm_protocol = RTPROT_REDIRECT;
- else if (rt->rt6i_flags & RTF_ADDRCONF)
- rtm->rtm_protocol = RTPROT_KERNEL;
- else if (rt->rt6i_flags&RTF_DEFAULT)
- rtm->rtm_protocol = RTPROT_RA;
+ else if (rt->rt6i_flags & RTF_ADDRCONF) {
+ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
+ rtm->rtm_protocol = RTPROT_RA;
+ else
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ }
- if (rt->rt6i_flags&RTF_CACHE)
+ if (rt->rt6i_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
if (dst) {
- NLA_PUT(skb, RTA_DST, 16, dst);
+ if (nla_put(skb, RTA_DST, 16, dst))
+ goto nla_put_failure;
rtm->rtm_dst_len = 128;
} else if (rtm->rtm_dst_len)
- NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+ if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
+ goto nla_put_failure;
#ifdef CONFIG_IPV6_SUBTREES
if (src) {
- NLA_PUT(skb, RTA_SRC, 16, src);
+ if (nla_put(skb, RTA_SRC, 16, src))
+ goto nla_put_failure;
rtm->rtm_src_len = 128;
- } else if (rtm->rtm_src_len)
- NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+ } else if (rtm->rtm_src_len &&
+ nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
+ goto nla_put_failure;
#endif
if (iif) {
#ifdef CONFIG_IPV6_MROUTE
@@ -2286,35 +2634,39 @@ static int rt6_fill_node(struct net *net,
}
} else
#endif
- NLA_PUT_U32(skb, RTA_IIF, iif);
+ if (nla_put_u32(skb, RTA_IIF, iif))
+ goto nla_put_failure;
} else if (dst) {
- struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
struct in6_addr saddr_buf;
- if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
- dst, 0, &saddr_buf) == 0)
- NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
+ nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
+ goto nla_put_failure;
}
- if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
- goto nla_put_failure;
+ if (rt->rt6i_prefsrc.plen) {
+ struct in6_addr saddr_buf;
+ saddr_buf = rt->rt6i_prefsrc.addr;
+ if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
+ goto nla_put_failure;
+ }
- if (rt->dst.neighbour)
- NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ goto nla_put_failure;
- if (rt->dst.dev)
- NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+ if (rt->rt6i_flags & RTF_GATEWAY) {
+ if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
+ goto nla_put_failure;
+ }
- NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
+ if (rt->dst.dev &&
+ nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
+ goto nla_put_failure;
- if (!(rt->rt6i_flags & RTF_EXPIRES))
- expires = 0;
- else if (rt->rt6i_expires - jiffies < INT_MAX)
- expires = rt->rt6i_expires - jiffies;
- else
- expires = INT_MAX;
+ expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
- expires, rt->dst.error) < 0)
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2337,58 +2689,76 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
return rt6_fill_node(arg->net,
arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
- NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
+ NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
prefix, 0, NLM_F_MULTI);
}
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
- struct flowi fl;
- int err, iif = 0;
+ struct flowi6 fl6;
+ int err, iif = 0, oif = 0;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
if (err < 0)
goto errout;
err = -EINVAL;
- memset(&fl, 0, sizeof(fl));
+ memset(&fl6, 0, sizeof(fl6));
if (tb[RTA_SRC]) {
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
goto errout;
- ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+ fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
}
if (tb[RTA_DST]) {
if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
goto errout;
- ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+ fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
}
if (tb[RTA_IIF])
iif = nla_get_u32(tb[RTA_IIF]);
if (tb[RTA_OIF])
- fl.oif = nla_get_u32(tb[RTA_OIF]);
+ oif = nla_get_u32(tb[RTA_OIF]);
+
+ if (tb[RTA_MARK])
+ fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
if (iif) {
struct net_device *dev;
+ int flags = 0;
+
dev = __dev_get_by_index(net, iif);
if (!dev) {
err = -ENODEV;
goto errout;
}
+
+ fl6.flowi6_iif = iif;
+
+ if (!ipv6_addr_any(&fl6.saddr))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
+ flags);
+ } else {
+ fl6.flowi6_oif = oif;
+
+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (skb == NULL) {
+ if (!skb) {
+ ip6_rt_put(rt);
err = -ENOBUFS;
goto errout;
}
@@ -2399,18 +2769,17 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
skb_reset_mac_header(skb);
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
- rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
skb_dst_set(skb, &rt->dst);
- err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+ err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, 0, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
return err;
}
@@ -2423,21 +2792,21 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
int err;
err = -ENOBUFS;
- seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
+ seq = info->nlh ? info->nlh->nlmsg_seq : 0;
skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
- if (skb == NULL)
+ if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
- event, info->pid, seq, 0, 0, 0);
+ event, info->portid, seq, 0, 0, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
info->nlh, gfp_any());
return;
errout:
@@ -2446,9 +2815,9 @@ errout:
}
static int ip6_route_dev_notify(struct notifier_block *this,
- unsigned long event, void *data)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
@@ -2471,57 +2840,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
#ifdef CONFIG_PROC_FS
-struct rt6_proc_arg
-{
- char *buffer;
- int offset;
- int length;
- int skip;
- int len;
-};
-
-static int rt6_info_route(struct rt6_info *rt, void *p_arg)
-{
- struct seq_file *m = p_arg;
-
- seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
-
-#ifdef CONFIG_IPV6_SUBTREES
- seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
-#else
- seq_puts(m, "00000000000000000000000000000000 00 ");
-#endif
-
- if (rt->rt6i_nexthop) {
- seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
- } else {
- seq_puts(m, "00000000000000000000000000000000");
- }
- seq_printf(m, " %08x %08x %08x %08x %8s\n",
- rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, rt->rt6i_flags,
- rt->rt6i_dev ? rt->rt6i_dev->name : "");
- return 0;
-}
-
-static int ipv6_route_show(struct seq_file *m, void *v)
-{
- struct net *net = (struct net *)m->private;
- fib6_clean_all(net, rt6_info_route, 0, m);
- return 0;
-}
-
-static int ipv6_route_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, ipv6_route_show);
-}
-
static const struct file_operations ipv6_route_proc_fops = {
.owner = THIS_MODULE,
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release_net,
+ .release = seq_release_net,
};
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
@@ -2556,20 +2880,22 @@ static const struct file_operations rt6_stats_seq_fops = {
#ifdef CONFIG_SYSCTL
static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
+int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
- int delay = net->ipv6.sysctl.flush_delay;
- if (write) {
- proc_dointvec(ctl, write, buffer, lenp, ppos);
- fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
- return 0;
- } else
+ struct net *net;
+ int delay;
+ if (!write)
return -EINVAL;
+
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
+ proc_dointvec(ctl, write, buffer, lenp, ppos);
+ fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
+ return 0;
}
-ctl_table ipv6_route_table_template[] = {
+struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
.data = &init_net.ipv6.sysctl.flush_delay,
@@ -2653,6 +2979,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
if (table) {
table[0].data = &net->ipv6.sysctl.flush_delay;
+ table[0].extra1 = net;
table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
@@ -2662,6 +2989,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
return table;
@@ -2686,6 +3017,8 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_null_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+ ip6_template_metrics, true);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2696,6 +3029,8 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_prohibit_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+ ip6_template_metrics, true);
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2705,6 +3040,8 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_blk_hole_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+ ip6_template_metrics, true);
#endif
net->ipv6.sysctl.flush_delay = 0;
@@ -2716,10 +3053,6 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
-#ifdef CONFIG_PROC_FS
- proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
- proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
-#endif
net->ipv6.ip6_rt_gc_expire = 30*HZ;
ret = 0;
@@ -2740,10 +3073,6 @@ out_ip6_dst_ops:
static void __net_exit ip6_route_net_exit(struct net *net)
{
-#ifdef CONFIG_PROC_FS
- proc_net_remove(net, "ipv6_route");
- proc_net_remove(net, "rt6_stats");
-#endif
kfree(net->ipv6.ip6_null_entry);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
kfree(net->ipv6.ip6_prohibit_entry);
@@ -2752,11 +3081,58 @@ static void __net_exit ip6_route_net_exit(struct net *net)
dst_entries_destroy(&net->ipv6.ip6_dst_ops);
}
+static int __net_init ip6_route_net_init_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
+ proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
+#endif
+ return 0;
+}
+
+static void __net_exit ip6_route_net_exit_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("ipv6_route", net->proc_net);
+ remove_proc_entry("rt6_stats", net->proc_net);
+#endif
+}
+
static struct pernet_operations ip6_route_net_ops = {
.init = ip6_route_net_init,
.exit = ip6_route_net_exit,
};
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+ struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+ if (!bp)
+ return -ENOMEM;
+ inet_peer_base_init(bp);
+ net->ipv6.peers = bp;
+ return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+ struct inet_peer_base *bp = net->ipv6.peers;
+
+ net->ipv6.peers = NULL;
+ inetpeer_invalidate_tree(bp);
+ kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+ .init = ipv6_inetpeer_init,
+ .exit = ipv6_inetpeer_exit,
+};
+
+static struct pernet_operations ip6_route_net_late_ops = {
+ .init = ip6_route_net_init_late,
+ .exit = ip6_route_net_exit_late,
+};
+
static struct notifier_block ip6_route_dev_notifier = {
.notifier_call = ip6_route_dev_notify,
.priority = 0,
@@ -2777,10 +3153,14 @@ int __init ip6_route_init(void)
if (ret)
goto out_kmem_cache;
- ret = register_pernet_subsys(&ip6_route_net_ops);
+ ret = register_pernet_subsys(&ipv6_inetpeer_ops);
if (ret)
goto out_dst_entries;
+ ret = register_pernet_subsys(&ip6_route_net_ops);
+ if (ret)
+ goto out_register_inetpeer;
+
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
/* Registering of the loopback is done before this portion of code,
@@ -2806,19 +3186,25 @@ int __init ip6_route_init(void)
if (ret)
goto xfrm6_init;
- ret = -ENOBUFS;
- if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
- __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
- __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
+ ret = register_pernet_subsys(&ip6_route_net_late_ops);
+ if (ret)
goto fib6_rules_init;
+ ret = -ENOBUFS;
+ if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
+ __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
+ __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+ goto out_register_late_subsys;
+
ret = register_netdevice_notifier(&ip6_route_dev_notifier);
if (ret)
- goto fib6_rules_init;
+ goto out_register_late_subsys;
out:
return ret;
+out_register_late_subsys:
+ unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_init:
fib6_rules_cleanup();
xfrm6_init:
@@ -2827,6 +3213,8 @@ out_fib6_init:
fib6_gc_cleanup();
out_register_subsys:
unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
out_dst_entries:
dst_entries_destroy(&ip6_dst_blackhole_ops);
out_kmem_cache:
@@ -2837,9 +3225,11 @@ out_kmem_cache:
void ip6_route_cleanup(void)
{
unregister_netdevice_notifier(&ip6_route_dev_notifier);
+ unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_cleanup();
xfrm6_fini();
fib6_gc_cleanup();
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
unregister_pernet_subsys(&ip6_route_net_ops);
dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6e48a80d0f2..4f408176dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -17,6 +17,8 @@
* Fred Templin <fred.l.templin@boeing.com>: isatap support
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/errno.h>
@@ -47,7 +49,7 @@
#include <net/ip.h>
#include <net/udp.h>
#include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
@@ -63,9 +65,16 @@
#define HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
static int ipip6_tunnel_init(struct net_device *dev);
static void ipip6_tunnel_setup(struct net_device *dev);
static void ipip6_dev_free(struct net_device *dev);
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+ __be32 *v4dst);
+static struct rtnl_link_ops sit_link_ops __read_mostly;
static int sit_net_id __read_mostly;
struct sit_net {
@@ -79,43 +88,9 @@ struct sit_net {
};
/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- unsigned long rx_packets;
- unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_bytes;
-};
-
-static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
-{
- struct pcpu_tstats sum = { 0 };
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-
- sum.rx_packets += tstats->rx_packets;
- sum.rx_bytes += tstats->rx_bytes;
- sum.tx_packets += tstats->tx_packets;
- sum.tx_bytes += tstats->tx_bytes;
- }
- dev->stats.rx_packets = sum.rx_packets;
- dev->stats.rx_bytes = sum.rx_bytes;
- dev->stats.tx_packets = sum.tx_packets;
- dev->stats.tx_bytes = sum.tx_bytes;
- return &dev->stats;
-}
-/*
* Must be invoked with rcu_read_lock
*/
-static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
+static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
struct net_device *dev, __be32 remote, __be32 local)
{
unsigned int h0 = HASH(remote);
@@ -123,20 +98,20 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
- for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
@@ -213,6 +188,37 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
#endif
}
+static int ipip6_tunnel_create(struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+ int err;
+
+ err = ipip6_tunnel_init(dev);
+ if (err < 0)
+ goto out;
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
+ if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+ dev->priv_flags |= IFF_ISATAP;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &sit_link_ops;
+
+ dev_hold(dev);
+
+ ipip6_tunnel_link(sitn, t);
+ return 0;
+
+out:
+ return err;
+}
+
static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
@@ -250,27 +256,12 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
dev_net_set(dev, net);
- if (strchr(name, '%')) {
- if (dev_alloc_name(dev, name) < 0)
- goto failed_free;
- }
-
nt = netdev_priv(dev);
nt->parms = *parms;
- if (ipip6_tunnel_init(dev) < 0)
+ if (ipip6_tunnel_create(dev) < 0)
goto failed_free;
- ipip6_tunnel_clone_6rd(dev, sitn);
-
- if (parms->i_flags & SIT_ISATAP)
- dev->priv_flags |= IFF_ISATAP;
-
- if (register_netdevice(dev) < 0)
- goto failed_free;
-
- dev_hold(dev);
- ipip6_tunnel_link(sitn, nt);
return nt;
failed_free:
@@ -401,18 +392,13 @@ out:
return err;
}
-static void prl_entry_destroy_rcu(struct rcu_head *head)
-{
- kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
-}
-
static void prl_list_destroy_rcu(struct rcu_head *head)
{
struct ip_tunnel_prl_entry *p, *n;
p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
do {
- n = p->next;
+ n = rcu_dereference_protected(p->next, 1);
kfree(p);
p = n;
} while (p);
@@ -421,26 +407,28 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
static int
ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
{
- struct ip_tunnel_prl_entry *x, **p;
+ struct ip_tunnel_prl_entry *x;
+ struct ip_tunnel_prl_entry __rcu **p;
int err = 0;
ASSERT_RTNL();
if (a && a->addr != htonl(INADDR_ANY)) {
- for (p = &t->prl; *p; p = &(*p)->next) {
- if ((*p)->addr == a->addr) {
- x = *p;
+ for (p = &t->prl;
+ (x = rtnl_dereference(*p)) != NULL;
+ p = &x->next) {
+ if (x->addr == a->addr) {
*p = x->next;
- call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
+ kfree_rcu(x, rcu_head);
t->prl_count--;
goto out;
}
}
err = -ENXIO;
} else {
- if (t->prl) {
+ x = rtnl_dereference(t->prl);
+ if (x) {
t->prl_count = 0;
- x = t->prl;
call_rcu(&x->rcu_head, prl_list_destroy_rcu);
t->prl = NULL;
}
@@ -450,7 +438,7 @@ out:
}
static int
-isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
+isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
{
struct ip_tunnel_prl_entry *p;
int ok = 1;
@@ -463,7 +451,8 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
else
skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
} else {
- struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
+ const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
+
if (ipv6_addr_is_isatap(addr6) &&
(addr6->s6_addr32[3] == iph->saddr) &&
ipv6_chk_prefix(addr6, t->dev))
@@ -477,27 +466,58 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
static void ipip6_tunnel_uninit(struct net_device *dev)
{
- struct net *net = dev_net(dev);
- struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
if (dev == sitn->fb_tunnel_dev) {
- rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
+ RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
} else {
- ipip6_tunnel_unlink(sitn, netdev_priv(dev));
- ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
+ ipip6_tunnel_unlink(sitn, tunnel);
+ ipip6_tunnel_del_prl(tunnel, NULL);
}
+ ip_tunnel_dst_reset_all(tunnel);
dev_put(dev);
}
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
+ */
+static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct rt6_info *rt;
+ struct sk_buff *skb2;
+
+ if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8))
+ return 1;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+
+ if (!skb2)
+ return 1;
+
+ skb_dst_drop(skb2);
+ skb_pull(skb2, iph->ihl * 4);
+ skb_reset_network_header(skb2);
+
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
+
+ icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+
+ if (rt)
+ ip6_rt_put(rt);
+
+ kfree_skb(skb2);
+
+ return 0;
+}
static int ipip6_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
- struct iphdr *iph = (struct iphdr*)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
@@ -511,12 +531,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
/* Impossible event. */
return 0;
- case ICMP_FRAG_NEEDED:
- /* Soft state for pmtu is maintained by IP core. */
- return 0;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -529,19 +545,39 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (code != ICMP_EXC_TTL)
return 0;
break;
+ case ICMP_REDIRECT:
+ break;
}
err = -ENOENT;
- rcu_read_lock();
t = ipip6_tunnel_lookup(dev_net(skb->dev),
skb->dev,
iph->daddr,
iph->saddr);
- if (t == NULL || t->parms.iph.daddr == 0)
+ if (t == NULL)
+ goto out;
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ t->parms.link, 0, IPPROTO_IPV6, 0);
+ err = 0;
+ goto out;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+ IPPROTO_IPV6, 0);
+ err = 0;
+ goto out;
+ }
+
+ if (t->parms.iph.daddr == 0)
goto out;
err = 0;
+ if (!ipip6_err_gen_icmpv6_unreach(skb))
+ goto out;
+
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
@@ -551,77 +587,178 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
t->err_count = 1;
t->err_time = jiffies;
out:
- rcu_read_unlock();
return err;
}
-static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
+static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
+ const struct in6_addr *v6addr)
{
- if (INET_ECN_is_ce(iph->tos))
- IP6_ECN_set_ce(ipv6_hdr(skb));
+ __be32 v4embed = 0;
+ if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
+ return true;
+ return false;
}
-static int ipip6_rcv(struct sk_buff *skb)
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
{
- struct iphdr *iph;
- struct ip_tunnel *tunnel;
+ int prefix_len;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
- iph = ip_hdr(skb);
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
+static int ipip6_rcv(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct ip_tunnel *tunnel;
+ int err;
- rcu_read_lock();
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel != NULL) {
- struct pcpu_tstats *tstats;
+ struct pcpu_sw_netstats *tstats;
+
+ if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
+ tunnel->parms.iph.protocol != 0)
+ goto out;
- secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- skb->pkt_type = PACKET_HOST;
- if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
- !isatap_chksrc(skb, iph, tunnel)) {
+ if (packet_is_spoofed(skb, iph, tunnel)) {
tunnel->dev->stats.rx_errors++;
- rcu_read_unlock();
- kfree_skb(skb);
- return 0;
+ goto out;
+ }
+
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+ err = IP_ECN_decapsulate(iph, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto out;
+ }
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
-
- __skb_tunnel_rx(skb, tunnel->dev);
-
- ipip6_ecn_decapsulate(iph, skb);
+ u64_stats_update_end(&tstats->syncp);
netif_rx(skb);
- rcu_read_unlock();
return 0;
}
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
- rcu_read_unlock();
+ /* no tunnel matched, let upstream know, ipsec may handle it */
+ return 1;
out:
kfree_skb(skb);
return 0;
}
+static const struct tnl_ptk_info tpi = {
+ /* no tunnel info required for ipip. */
+ .proto = htons(ETH_P_IP),
+};
+
+static int ipip_rcv(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
+ iph = ip_hdr(skb);
+ tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+ iph->saddr, iph->daddr);
+ if (tunnel != NULL) {
+ if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ if (iptunnel_pull_header(skb, 0, tpi.proto))
+ goto drop;
+ return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ }
+
+ return 1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
/*
- * Returns the embedded IPv4 address if the IPv6 address
- * comes from 6rd / 6to4 (RFC 3056) addr space.
+ * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
+ * stores the embedded IPv4 address in v4dst and returns true.
*/
-static inline
-__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
+static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
+ __be32 *v4dst)
{
- __be32 dst = 0;
-
#ifdef CONFIG_IPV6_SIT_6RD
if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
tunnel->ip6rd.prefixlen)) {
@@ -640,14 +777,24 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
(32 - pbi1);
- dst = tunnel->ip6rd.relay_prefix | htonl(d);
+ *v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
+ return true;
}
#else
if (v6dst->s6_addr16[0] == htons(0x2002)) {
/* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
- memcpy(&dst, &v6dst->s6_addr16[1], 4);
+ memcpy(v4dst, &v6dst->s6_addr16[1], 4);
+ return true;
}
#endif
+ return false;
+}
+
+static inline __be32 try_6rd(struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ __be32 dst = 0;
+ check_6rd(tunnel, v6dst, &dst);
return dst;
}
@@ -660,62 +807,70 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct pcpu_tstats *tstats;
- struct iphdr *tiph = &tunnel->parms.iph;
- struct ipv6hdr *iph6 = ipv6_hdr(skb);
+ const struct iphdr *tiph = &tunnel->parms.iph;
+ const struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
+ struct flowi4 fl4;
int mtu;
- struct in6_addr *addr6;
+ const struct in6_addr *addr6;
int addr_type;
+ u8 ttl;
+ int err;
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
+ if (tos == 1)
+ tos = ipv6_get_dsfield(iph6);
+
/* ISATAP (RFC4214) - must come before 6to4 */
if (dev->priv_flags & IFF_ISATAP) {
struct neighbour *neigh = NULL;
+ bool do_tx_error = false;
if (skb_dst(skb))
- neigh = skb_dst(skb)->neighbour;
+ neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- if (net_ratelimit())
- printk(KERN_DEBUG "sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
- addr6 = (struct in6_addr*)&neigh->primary_key;
+ addr6 = (const struct in6_addr *)&neigh->primary_key;
addr_type = ipv6_addr_type(addr6);
if ((addr_type & IPV6_ADDR_UNICAST) &&
ipv6_addr_is_isatap(addr6))
dst = addr6->s6_addr32[3];
else
+ do_tx_error = true;
+
+ neigh_release(neigh);
+ if (do_tx_error)
goto tx_error;
}
if (!dst)
- dst = try_6rd(&iph6->daddr, tunnel);
+ dst = try_6rd(tunnel, &iph6->daddr);
if (!dst) {
struct neighbour *neigh = NULL;
+ bool do_tx_error = false;
if (skb_dst(skb))
- neigh = skb_dst(skb)->neighbour;
+ neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- if (net_ratelimit())
- printk(KERN_DEBUG "sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
- addr6 = (struct in6_addr*)&neigh->primary_key;
+ addr6 = (const struct in6_addr *)&neigh->primary_key;
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
@@ -723,22 +878,24 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
addr_type = ipv6_addr_type(addr6);
}
- if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
- goto tx_error_icmp;
+ if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
+ dst = addr6->s6_addr32[3];
+ else
+ do_tx_error = true;
- dst = addr6->s6_addr32[3];
+ neigh_release(neigh);
+ if (do_tx_error)
+ goto tx_error;
}
- {
- struct flowi fl = { .fl4_dst = dst,
- .fl4_src = tiph->saddr,
- .fl4_tos = RT_TOS(tos),
- .oif = tunnel->parms.link,
- .proto = IPPROTO_IPV6 };
- if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- dev->stats.tx_carrier_errors++;
- goto tx_error_icmp;
- }
+ rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
+ dst, tiph->saddr,
+ 0, 0,
+ IPPROTO_IPV6, RT_TOS(tos),
+ tunnel->parms.link);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
}
if (rt->rt_type != RTN_UNICAST) {
ip_rt_put(rt);
@@ -768,9 +925,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ip_rt_put(rt);
goto tx_error;
@@ -797,7 +954,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
if (skb->sk)
@@ -806,61 +963,91 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb = new_skb;
iph6 = ipv6_hdr(skb);
}
+ ttl = tiph->ttl;
+ if (ttl == 0)
+ ttl = iph6->hop_limit;
+ tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- skb->transport_header = skb->network_header;
- skb_push(skb, sizeof(struct iphdr));
- skb_reset_network_header(skb);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags = 0;
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
- /*
- * Push down and install the IPIP header.
- */
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
+ }
- iph = ip_hdr(skb);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr)>>2;
- iph->frag_off = df;
- iph->protocol = IPPROTO_IPV6;
- iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
-
- if ((iph->ttl = tiph->ttl) == 0)
- iph->ttl = iph6->hop_limit;
-
- nf_reset(skb);
- tstats = this_cpu_ptr(dev->tstats);
- __IPTUNNEL_XMIT(tstats, &dev->stats);
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
+ kfree_skb(skb);
+out:
+ dev->stats.tx_errors++;
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *tiph = &tunnel->parms.iph;
+
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ if (IS_ERR(skb))
+ goto out;
+
+ ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ return NETDEV_TX_OK;
+out:
dev->stats.tx_errors++;
- dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
+static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipip_tunnel_xmit(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ipip6_tunnel_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+
+}
+
static void ipip6_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
struct ip_tunnel *tunnel;
- struct iphdr *iph;
+ const struct iphdr *iph;
+ struct flowi4 fl4;
tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
if (iph->daddr) {
- struct flowi fl = { .fl4_dst = iph->daddr,
- .fl4_src = iph->saddr,
- .fl4_tos = RT_TOS(iph->tos),
- .oif = tunnel->parms.link,
- .proto = IPPROTO_IPV6 };
- struct rtable *rt;
- if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
+ struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
+ NULL,
+ iph->daddr, iph->saddr,
+ 0, 0,
+ IPPROTO_IPV6,
+ RT_TOS(iph->tos),
+ tunnel->parms.link);
+
+ if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
ip_rt_put(rt);
}
@@ -868,7 +1055,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+ tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -879,14 +1066,69 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
dev->iflink = tunnel->parms.link;
}
+static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+{
+ struct net *net = t->net;
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ ipip6_tunnel_unlink(sitn, t);
+ synchronize_net();
+ t->parms.iph.saddr = p->iph.saddr;
+ t->parms.iph.daddr = p->iph.daddr;
+ memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
+ memcpy(t->dev->broadcast, &p->iph.daddr, 4);
+ ipip6_tunnel_link(sitn, t);
+ t->parms.iph.ttl = p->iph.ttl;
+ t->parms.iph.tos = p->iph.tos;
+ if (t->parms.link != p->link) {
+ t->parms.link = p->link;
+ ipip6_tunnel_bind_dev(t->dev);
+ }
+ ip_tunnel_dst_reset_all(t);
+ netdev_state_change(t->dev);
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
+ struct ip_tunnel_6rd *ip6rd)
+{
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+
+ if (ip6rd->relay_prefixlen > 32 ||
+ ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
+ return -EINVAL;
+
+ ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
+ if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
+ return -EINVAL;
+ if (ip6rd->relay_prefixlen)
+ relay_prefix = ip6rd->relay_prefix &
+ htonl(0xffffffffUL <<
+ (32 - ip6rd->relay_prefixlen));
+ else
+ relay_prefix = 0;
+ if (relay_prefix != ip6rd->relay_prefix)
+ return -EINVAL;
+
+ t->ip6rd.prefix = prefix;
+ t->ip6rd.relay_prefix = relay_prefix;
+ t->ip6rd.prefixlen = ip6rd->prefixlen;
+ t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ ip_tunnel_dst_reset_all(t);
+ netdev_state_change(t->dev);
+ return 0;
+}
+#endif
+
static int
ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel_prl prl;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -897,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
#endif
- t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
t = ipip6_tunnel_locate(net, &p, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
err = -EFAULT;
if (cmd == SIOCGETTUNNEL) {
@@ -916,7 +1157,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
#ifdef CONFIG_IPV6_SIT_6RD
} else {
- ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+ ip6rd.prefix = t->ip6rd.prefix;
ip6rd.relay_prefix = t->ip6rd.relay_prefix;
ip6rd.prefixlen = t->ip6rd.prefixlen;
ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
@@ -931,7 +1172,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -939,7 +1180,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+ if (p.iph.protocol != IPPROTO_IPV6 &&
+ p.iph.protocol != IPPROTO_IPIP &&
+ p.iph.protocol != 0)
+ goto done;
+ if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
goto done;
if (p.iph.ttl)
@@ -960,28 +1205,13 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
t = netdev_priv(dev);
- ipip6_tunnel_unlink(sitn, t);
- synchronize_net();
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip6_tunnel_link(sitn, t);
- netdev_state_change(dev);
}
+
+ ipip6_tunnel_update(t, &p);
}
if (t) {
err = 0;
- if (cmd == SIOCCHGTUNNEL) {
- t->parms.iph.ttl = p.iph.ttl;
- t->parms.iph.tos = p.iph.tos;
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- ipip6_tunnel_bind_dev(dev);
- netdev_state_change(dev);
- }
- }
if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
err = -EFAULT;
} else
@@ -990,7 +1220,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == sitn->fb_tunnel_dev) {
@@ -1013,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
break;
@@ -1023,7 +1250,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELPRL:
case SIOCCHGPRL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
@@ -1031,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
switch (cmd) {
case SIOCDELPRL:
@@ -1044,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
break;
@@ -1052,7 +1277,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCCHG6RD:
case SIOCDEL6RD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -1060,34 +1285,10 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
sizeof(ip6rd)))
goto done;
- t = netdev_priv(dev);
-
if (cmd != SIOCDEL6RD) {
- struct in6_addr prefix;
- __be32 relay_prefix;
-
- err = -EINVAL;
- if (ip6rd.relay_prefixlen > 32 ||
- ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
- goto done;
-
- ipv6_addr_prefix(&prefix, &ip6rd.prefix,
- ip6rd.prefixlen);
- if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+ err = ipip6_tunnel_update_6rd(t, &ip6rd);
+ if (err < 0)
goto done;
- if (ip6rd.relay_prefixlen)
- relay_prefix = ip6rd.relay_prefix &
- htonl(0xffffffffUL <<
- (32 - ip6rd.relay_prefixlen));
- else
- relay_prefix = 0;
- if (relay_prefix != ip6rd.relay_prefix)
- goto done;
-
- ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
- t->ip6rd.relay_prefix = relay_prefix;
- t->ip6rd.prefixlen = ip6rd.prefixlen;
- t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
} else
ipip6_tunnel_clone_6rd(dev, sitn);
@@ -1113,18 +1314,27 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
- .ndo_start_xmit = ipip6_tunnel_xmit,
+ .ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
.ndo_change_mtu = ipip6_tunnel_change_mtu,
- .ndo_get_stats = ipip6_get_stats,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void ipip6_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
+#define SIT_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
+
static void ipip6_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip6_netdev_ops;
@@ -1137,8 +1347,9 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->iflink = 0;
dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
+ dev->features |= SIT_FEATURES;
+ dev->hw_features |= SIT_FEATURES;
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -1146,16 +1357,22 @@ static int ipip6_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
+ tunnel->net = dev_net(dev);
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -1167,6 +1384,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct sit_net *sitn = net_generic(net, sit_net_id);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
iph->version = 4;
@@ -1174,32 +1392,326 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
dev_hold(dev);
- sitn->tunnels_wc[0] = tunnel;
+ rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
return 0;
}
+static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPV6 &&
+ proto != IPPROTO_IPIP &&
+ proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void ipip6_netlink_parms(struct nlattr *data[],
+ struct ip_tunnel_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ parms->iph.version = 4;
+ parms->iph.protocol = IPPROTO_IPV6;
+ parms->iph.ihl = 5;
+ parms->iph.ttl = 64;
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+
+ if (data[IFLA_IPTUN_REMOTE])
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+
+ if (data[IFLA_IPTUN_TTL]) {
+ parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
+ if (parms->iph.ttl)
+ parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_IPTUN_TOS])
+ parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+
+ if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
+ parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+/* This function returns true when 6RD attributes are present in the nl msg */
+static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
+ struct ip_tunnel_6rd *ip6rd)
+{
+ bool ret = false;
+ memset(ip6rd, 0, sizeof(*ip6rd));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_6RD_PREFIX]) {
+ ret = true;
+ nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX],
+ sizeof(struct in6_addr));
+ }
+
+ if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
+ ret = true;
+ ip6rd->relay_prefix =
+ nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
+ }
+
+ if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
+ ret = true;
+ ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
+ }
+
+ if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
+ ret = true;
+ ip6rd->relay_prefixlen =
+ nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
+ }
+
+ return ret;
+}
+#endif
+
+static int ipip6_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *nt;
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+ int err;
+
+ nt = netdev_priv(dev);
+ ipip6_netlink_parms(data, &nt->parms);
+
+ if (ipip6_tunnel_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ err = ipip6_tunnel_create(dev);
+ if (err < 0)
+ return err;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+#endif
+
+ return err;
+}
+
+static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm p;
+ struct net *net = t->net;
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+
+ if (dev == sitn->fb_tunnel_dev)
+ return -EINVAL;
+
+ ipip6_netlink_parms(data, &p);
+
+ if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
+ (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+ return -EINVAL;
+
+ t = ipip6_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ ipip6_tunnel_update(t, &p);
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ return ipip6_tunnel_update_6rd(t, &ip6rd);
+#endif
+
+ return 0;
+}
+
+static size_t ipip6_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_TOS */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_PMTUDISC */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
+#ifdef CONFIG_IPV6_SIT_6RD
+ /* IFLA_IPTUN_6RD_PREFIX */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_6RD_RELAY_PREFIX */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_6RD_PREFIXLEN */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
+ nla_total_size(2) +
+#endif
+ 0;
+}
+
+static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+ nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
+ nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
+ !!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ goto nla_put_failure;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr),
+ &tunnel->ip6rd.prefix) ||
+ nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
+ tunnel->ip6rd.relay_prefix) ||
+ nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
+ tunnel->ip6rd.prefixlen) ||
+ nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+ tunnel->ip6rd.relay_prefixlen))
+ goto nla_put_failure;
+#endif
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
+ [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
+#ifdef CONFIG_IPV6_SIT_6RD
+ [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
+ [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
+ [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
+#endif
+};
+
+static void ipip6_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ if (dev != sitn->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
+static struct rtnl_link_ops sit_link_ops __read_mostly = {
+ .kind = "sit",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ipip6_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = ipip6_tunnel_setup,
+ .validate = ipip6_validate,
+ .newlink = ipip6_newlink,
+ .changelink = ipip6_changelink,
+ .get_size = ipip6_get_size,
+ .fill_info = ipip6_fill_info,
+ .dellink = ipip6_dellink,
+};
+
static struct xfrm_tunnel sit_handler __read_mostly = {
.handler = ipip6_rcv,
.err_handler = ipip6_err,
.priority = 1,
};
-static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+ .handler = ipip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+
+static void __net_exit sit_destroy_tunnels(struct net *net,
+ struct list_head *head)
{
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct net_device *dev, *aux;
int prio;
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &sit_link_ops)
+ unregister_netdevice_queue(dev, head);
+
for (prio = 1; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t = sitn->tunnels[prio][h];
+ struct ip_tunnel *t;
+ t = rtnl_dereference(sitn->tunnels[prio][h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
- t = t->next;
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev,
+ head);
+ t = rtnl_dereference(t->next);
}
}
}
@@ -1208,6 +1720,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
static int __net_init sit_init_net(struct net *net)
{
struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct ip_tunnel *t;
int err;
sitn->tunnels[0] = sitn->tunnels_wc;
@@ -1222,6 +1735,11 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
if (err)
@@ -1232,6 +1750,9 @@ static int __net_init sit_init_net(struct net *net)
if ((err = register_netdev(sitn->fb_tunnel_dev)))
goto err_reg_dev;
+ t = netdev_priv(sitn->fb_tunnel_dev);
+
+ strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
return 0;
err_reg_dev:
@@ -1244,12 +1765,10 @@ err_alloc_dev:
static void __net_exit sit_exit_net(struct net *net)
{
- struct sit_net *sitn = net_generic(net, sit_net_id);
LIST_HEAD(list);
rtnl_lock();
- sit_destroy_tunnels(sitn, &list);
- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
+ sit_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
@@ -1263,7 +1782,9 @@ static struct pernet_operations sit_net_ops = {
static void __exit sit_cleanup(void)
{
+ rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1273,20 +1794,39 @@ static int __init sit_init(void)
{
int err;
- printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
+ pr_info("IPv6 over IPv4 tunneling driver\n");
err = register_pernet_device(&sit_net_ops);
if (err < 0)
return err;
err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
if (err < 0) {
- unregister_pernet_device(&sit_net_ops);
- printk(KERN_INFO "sit init: Can't add protocol\n");
+ pr_info("%s: can't register ip6ip4\n", __func__);
+ goto xfrm_tunnel_failed;
}
+ err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+ if (err < 0) {
+ pr_info("%s: can't register ip4ip4\n", __func__);
+ goto xfrm_tunnel4_failed;
+ }
+ err = rtnl_link_register(&sit_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+out:
return err;
+
+rtnl_link_failed:
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel4_failed:
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+xfrm_tunnel_failed:
+ unregister_pernet_device(&sit_net_ops);
+ goto out;
}
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
-MODULE_ALIAS("sit0");
+MODULE_ALIAS_RTNL_LINK("sit");
+MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 09fd34f0dbf..a822b880689 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -21,32 +21,26 @@
#include <net/ipv6.h>
#include <net/tcp.h>
-extern int sysctl_tcp_syncookies;
-extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
-/* Table must be sorted. */
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+
+/* RFC 2460, Section 8.3:
+ * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
+ *
+ * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows
+ * using higher values than ipv4 tcp syncookies.
+ * The other values are chosen based on ethernet (1500 and 9k MTU), plus
+ * one that accounts for common encap (PPPoe) overhead. Table must be sorted.
+ */
static __u16 const msstab[] = {
- 64,
- 512,
- 536,
- 1280 - 60,
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
1480 - 60,
1500 - 60,
- 4460 - 60,
9000 - 60,
};
-/*
- * This (misnamed) value is the age of syncookie which is permitted.
- * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
- * sysctl_tcp_retries1. It's a rather complicated formula (exponential
- * backoff) to compute at runtime so it's currently hardcoded here.
- */
-#define COUNTER_TRIES 4
-
static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
@@ -66,17 +60,21 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv6_cookie_scratch);
-static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
+static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
+ __u32 *tmp;
+
+ net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
+
+ tmp = __get_cpu_var(ipv6_cookie_scratch);
/*
* we have 320 bits of information to hash, copy in the remaining
- * 192 bits required for sha_transform, from the syncookie_secret
+ * 192 bits required for sha_transform, from the syncookie6_secret
* and overwrite the digest with the secret
*/
- memcpy(tmp + 10, syncookie_secret[c], 44);
+ memcpy(tmp + 10, syncookie6_secret[c], 44);
memcpy(tmp, saddr, 16);
memcpy(tmp + 4, daddr, 16);
tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
@@ -86,27 +84,28 @@ static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
return tmp[17];
}
-static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *daddr,
+static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
__be16 sport, __be16 dport, __u32 sseq,
- __u32 count, __u32 data)
+ __u32 data)
{
+ u32 count = tcp_cookie_time();
return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
sseq + (count << COOKIEBITS) +
((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
& COOKIEMASK));
}
-static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
- struct in6_addr *daddr, __be16 sport,
- __be16 dport, __u32 sseq, __u32 count,
- __u32 maxdiff)
+static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, __be16 sport,
+ __be16 dport, __u32 sseq)
{
- __u32 diff;
+ __u32 diff, count = tcp_cookie_time();
cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
- if (diff >= maxdiff)
+ if (diff >= MAX_SYNCOOKIE_AGE)
return (__u32)-1;
return (cookie -
@@ -114,46 +113,49 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
& COOKIEMASK;
}
-__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+ const struct tcphdr *th, __u16 *mssp)
{
- struct ipv6hdr *iph = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tcp_synq_overflow(sk);
-
for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
if (mss >= msstab[mssind])
break;
*mssp = msstab[mssind];
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
- th->dest, ntohl(th->seq),
- jiffies / (HZ * 60), mssind);
+ th->dest, ntohl(th->seq), mssind);
}
+EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
{
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
+
+ tcp_synq_overflow(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return __cookie_v6_init_sequence(iph, th, mssp);
+}
+
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
+ __u32 cookie)
+{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq,
- jiffies / (HZ * 60), COUNTER_TRIES);
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
+EXPORT_SYMBOL_GPL(__cookie_v6_check);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
- u8 *hash_location;
struct inet_request_sock *ireq;
- struct inet6_request_sock *ireq6;
struct tcp_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -164,13 +166,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct dst_entry *dst;
__u8 rcv_wscale;
- bool ecn_ok;
+ bool ecn_ok = false;
if (!sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk) ||
- (mss = cookie_check(skb, cookie)) == 0) {
+ (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
@@ -179,9 +181,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tcp_opt, 0, NULL);
- if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+ if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
goto out;
ret = NULL;
@@ -190,38 +192,41 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ireq = inet_rsk(req);
- ireq6 = inet6_rsk(req);
treq = tcp_rsk(req);
+ treq->listener = NULL;
if (security_inet_conn_request(sk, skb, req))
goto out_free;
req->mss = mss;
- ireq->rmt_port = th->source;
- ireq->loc_port = th->dest;
- ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
- ireq6->pktopts = skb;
+ ireq->pktopts = skb;
}
- ireq6->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq6->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
+
+ ireq->ir_mark = inet_request_mark(sk, skb);
req->expires = 0UL;
- req->retrans = 0;
+ req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
@@ -232,23 +237,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
*/
{
struct in6_addr *final_p, final;
- struct flowi fl;
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
- final_p = fl6_update_dst(&fl, np->opt, &final);
- ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_sk(sk)->inet_sport;
- security_req_classify_flow(req, &fl);
- if (ip6_dst_lookup(sk, &dst, &fl))
- goto out_free;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+ struct flowi6 fl6;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+ fl6.saddr = ireq->ir_v6_loc_addr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = ireq->ir_mark;
+ fl6.fl6_dport = ireq->ir_rmt_port;
+ fl6.fl6_sport = inet_sk(sk)->inet_sport;
+ security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst))
goto out_free;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index fa1d8f4e005..058f3eca2e5 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -10,27 +10,37 @@
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/slab.h>
+#include <linux/export.h>
#include <net/ndisc.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
-static ctl_table ipv6_table_template[] = {
+static struct ctl_table ipv6_table_template[] = {
{
- .procname = "route",
- .maxlen = 0,
- .mode = 0555,
- .child = ipv6_route_table_template
+ .procname = "bindv6only",
+ .data = &init_net.ipv6.sysctl.bindv6only,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
},
{
- .procname = "icmp",
- .maxlen = 0,
- .mode = 0555,
- .child = ipv6_icmp_table_template
+ .procname = "anycast_src_echo_reply",
+ .data = &init_net.ipv6.sysctl.anycast_src_echo_reply,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
},
{
- .procname = "bindv6only",
- .data = &init_net.ipv6.sysctl.bindv6only,
+ .procname = "flowlabel_consistency",
+ .data = &init_net.ipv6.sysctl.flowlabel_consistency,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "fwmark_reflect",
+ .data = &init_net.ipv6.sysctl.fwmark_reflect,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -38,7 +48,7 @@ static ctl_table ipv6_table_template[] = {
{ }
};
-static ctl_table ipv6_rotable[] = {
+static struct ctl_table ipv6_rotable[] = {
{
.procname = "mld_max_msf",
.data = &sysctl_mld_max_msf,
@@ -49,13 +59,6 @@ static ctl_table ipv6_rotable[] = {
{ }
};
-struct ctl_path net_ipv6_ctl_path[] = {
- { .procname = "net", },
- { .procname = "ipv6", },
- { },
-};
-EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
-
static int __net_init ipv6_sysctl_net_init(struct net *net)
{
struct ctl_table *ipv6_table;
@@ -68,28 +71,39 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
GFP_KERNEL);
if (!ipv6_table)
goto out;
+ ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
+ ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
+ ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
goto out_ipv6_table;
- ipv6_table[0].child = ipv6_route_table;
ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
if (!ipv6_icmp_table)
goto out_ipv6_route_table;
- ipv6_table[1].child = ipv6_icmp_table;
-
- ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
- net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
- ipv6_table);
- if (!net->ipv6.sysctl.table)
+ net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table);
+ if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
+ net->ipv6.sysctl.route_hdr =
+ register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
+ if (!net->ipv6.sysctl.route_hdr)
+ goto out_unregister_ipv6_table;
+
+ net->ipv6.sysctl.icmp_hdr =
+ register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table);
+ if (!net->ipv6.sysctl.icmp_hdr)
+ goto out_unregister_route_table;
+
err = 0;
out:
return err;
-
+out_unregister_route_table:
+ unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
+out_unregister_ipv6_table:
+ unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
out_ipv6_icmp_table:
kfree(ipv6_icmp_table);
out_ipv6_route_table:
@@ -105,11 +119,13 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
- ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
- ipv6_route_table = ipv6_table[0].child;
- ipv6_icmp_table = ipv6_table[1].child;
+ ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
+ ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
+ ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg;
- unregister_net_sysctl_table(net->ipv6.sysctl.table);
+ unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr);
+ unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
+ unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
kfree(ipv6_table);
kfree(ipv6_route_table);
@@ -127,7 +143,7 @@ int ipv6_sysctl_register(void)
{
int err = -ENOMEM;
- ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_rotable);
+ ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
if (ip6_header == NULL)
goto out;
@@ -147,19 +163,3 @@ void ipv6_sysctl_unregister(void)
unregister_net_sysctl_table(ip6_header);
unregister_pernet_subsys(&ipv6_sysctl_net_ops);
}
-
-static struct ctl_table_header *ip6_base;
-
-int ipv6_static_sysctl_register(void)
-{
- static struct ctl_table empty[1];
- ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
- if (ip6_base == NULL)
- return -ENOMEM;
- return 0;
-}
-
-void ipv6_static_sysctl_unregister(void)
-{
- unregister_net_sysctl_table(ip6_base);
-}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 319458558df..229239ad96b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -39,7 +39,7 @@
#include <linux/ipsec.h>
#include <linux/times.h>
#include <linux/slab.h>
-
+#include <linux/uaccess.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
@@ -61,8 +61,9 @@
#include <net/timewait_sock.h>
#include <net/netdma.h>
#include <net/inet_common.h>
-
-#include <asm/uaccess.h>
+#include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
+#include <net/busy_poll.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -75,9 +76,6 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static void __tcp_v6_send_check(struct sk_buff *skb,
- struct in6_addr *saddr,
- struct in6_addr *daddr);
static const struct inet_connection_sock_af_ops ipv6_mapped;
static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -86,12 +84,24 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
#else
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
- struct in6_addr *addr)
+ const struct in6_addr *addr)
{
return NULL;
}
#endif
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (rt->rt6i_node)
+ inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
static void tcp_v6_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
@@ -105,15 +115,7 @@ static void tcp_v6_hash(struct sock *sk)
}
}
-static __inline__ __sum16 tcp_v6_check(int len,
- struct in6_addr *saddr,
- struct in6_addr *daddr,
- __wsum base)
-{
- return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
-}
-
-static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
+static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
{
return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32,
@@ -131,7 +133,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
struct rt6_info *rt;
- struct flowi fl;
+ struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
int err;
@@ -142,17 +144,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- memset(&fl, 0, sizeof(fl));
+ memset(&fl6, 0, sizeof(fl6));
if (np->sndflow) {
- fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- IP6_ECN_flow_init(fl.fl6_flowlabel);
- if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+ fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ IP6_ECN_flow_init(fl6.flowlabel);
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel;
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
fl6_sock_release(flowlabel);
}
}
@@ -161,12 +162,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* connect() to INADDR_ANY means loopback (BSD'ism).
*/
- if(ipv6_addr_any(&usin->sin6_addr))
+ if (ipv6_addr_any(&usin->sin6_addr))
usin->sin6_addr.s6_addr[15] = 0x1;
addr_type = ipv6_addr_type(&usin->sin6_addr);
- if(addr_type & IPV6_ADDR_MULTICAST)
+ if (addr_type & IPV6_ADDR_MULTICAST)
return -ENETUNREACH;
if (addr_type&IPV6_ADDR_LINKLOCAL) {
@@ -188,14 +189,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (tp->rx_opt.ts_recent_stamp &&
- !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+ !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
- ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
- np->flow_label = fl.fl6_flowlabel;
+ sk->sk_v6_daddr = usin->sin6_addr;
+ np->flow_label = fl6.flowlabel;
/*
* TCP over IPv4
@@ -233,49 +234,40 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
} else {
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &np->rcv_saddr);
+ &sk->sk_v6_rcv_saddr);
}
return err;
}
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ saddr = &sk->sk_v6_rcv_saddr;
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src,
- (saddr ? saddr : &np->saddr));
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = usin->sin6_port;
- fl.fl_ip_sport = inet->inet_sport;
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = sk->sk_v6_daddr;
+ fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = usin->sin6_port;
+ fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
- security_sk_classify_flow(sk, &fl);
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
goto failure;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
- if (err < 0) {
- if (err == -EREMOTE)
- err = ip6_dst_blackhole(sk, &dst, &fl);
- if (err < 0)
- goto failure;
}
if (saddr == NULL) {
- saddr = &fl.fl6_src;
- ipv6_addr_copy(&np->rcv_saddr, saddr);
+ saddr = &fl6.saddr;
+ sk->sk_v6_rcv_saddr = *saddr;
}
/* set the source address */
- ipv6_addr_copy(&np->saddr, saddr);
+ np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
@@ -284,22 +276,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
- struct inet_peer *peer = rt6_get_peer(rt);
- /*
- * VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state
- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
- * when trying new connection.
- */
- if (peer) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
- tp->rx_opt.ts_recent = peer->tcp_ts;
- }
- }
- }
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
+ tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
@@ -315,9 +293,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
- if (!tp->write_seq)
+ if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
@@ -336,16 +314,34 @@ failure:
return err;
}
+static void tcp_v6_mtu_reduced(struct sock *sk)
+{
+ struct dst_entry *dst;
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ if (!dst)
+ return;
+
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_simple_retransmit(sk);
+ }
+}
+
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
struct ipv6_pinfo *np;
struct sock *sk;
int err;
struct tcp_sock *tp;
- __u32 seq;
+ struct request_sock *fastopen;
+ __u32 seq, snd_una;
struct net *net = dev_net(skb->dev);
sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
@@ -363,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
+ if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
@@ -376,61 +372,42 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp = tcp_sk(sk);
seq = ntohl(th->seq);
+ /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+ fastopen = tp->fastopen_rsk;
+ snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt)) {
+ !between(seq, snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
np = inet6_sk(sk);
- if (type == ICMPV6_PKT_TOOBIG) {
- struct dst_entry *dst = NULL;
+ if (type == NDISC_REDIRECT) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (sock_owned_by_user(sk))
- goto out;
- if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
- goto out;
-
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi fl;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet->inet_dport;
- fl.fl_ip_sport = inet->inet_sport;
- security_skb_classify_flow(skb, &fl);
-
- if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
- sk->sk_err_soft = -err;
- goto out;
- }
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ goto out;
+ }
- if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
- sk->sk_err_soft = -err;
- goto out;
- }
+ if (type == ICMPV6_PKT_TOOBIG) {
+ /* We are not interested in TCP_LISTEN and open_requests
+ * (SYN-ACKs send out by Linux are always <576bytes so
+ * they should go through unfragmented).
+ */
+ if (sk->sk_state == TCP_LISTEN)
+ goto out;
- } else
- dst_hold(dst);
+ if (!ip6_sk_accept_pmtu(sk))
+ goto out;
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
- tcp_sync_mss(sk, dst_mtu(dst));
- tcp_simple_retransmit(sk);
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
+ tp->mtu_info = ntohl(info);
+ if (!sock_owned_by_user(sk))
+ tcp_v6_mtu_reduced(sk);
+ else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
+ &tp->tsq_flags))
+ sock_hold(sk);
goto out;
}
@@ -459,11 +436,17 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
inet_csk_reqsk_queue_drop(sk, req, prev);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
goto out;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen.
- It can, it SYNs are crossed. --ANK */
+ case TCP_SYN_RECV:
+ /* Only in fast or simultaneous open. If a fast open socket is
+ * is already accepted it is treated as a connected one below.
+ */
+ if (fastopen && fastopen->sk == NULL)
+ break;
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
@@ -486,242 +469,82 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
- struct request_values *rvp)
+static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+ struct flowi6 *fl6,
+ struct request_sock *req,
+ u16 queue_mapping,
+ struct tcp_fastopen_cookie *foc)
{
- struct inet6_request_sock *treq = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sk_buff * skb;
- struct ipv6_txoptions *opt = NULL;
- struct in6_addr * final_p, final;
- struct flowi fl;
- struct dst_entry *dst;
- int err = -1;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
- ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
- fl.fl6_flowlabel = 0;
- fl.oif = treq->iif;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, &fl);
-
- opt = np->opt;
- final_p = fl6_update_dst(&fl, opt, &final);
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
- goto done;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ /* First, grab a route. */
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, rvp);
+ skb = tcp_make_synack(sk, dst, req, foc);
+
if (skb) {
- __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
+ __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
+ &ireq->ir_v6_rmt_addr);
- ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
- err = ip6_xmit(sk, skb, &fl, opt);
+ fl6->daddr = ireq->ir_v6_rmt_addr;
+ if (np->repflow && (ireq->pktopts != NULL))
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+
+ skb_set_queue_mapping(skb, queue_mapping);
+ err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- dst_release(dst);
return err;
}
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
- struct request_values *rvp)
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
{
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, req, rvp);
-}
+ struct flowi6 fl6;
+ int res;
-static inline void syn_flood_warning(struct sk_buff *skb)
-{
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
- printk(KERN_INFO
- "TCPv6: Possible SYN flooding on port %d. "
- "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
- else
-#endif
- printk(KERN_INFO
- "TCPv6: Possible SYN flooding on port %d. "
- "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
+ res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
+ if (!res) {
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
+ return res;
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
- kfree_skb(inet6_rsk(req)->pktopts);
+ kfree_skb(inet_rsk(req)->pktopts);
}
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
- struct in6_addr *addr)
+ const struct in6_addr *addr)
{
- struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- BUG_ON(tp == NULL);
-
- if (!tp->md5sig_info || !tp->md5sig_info->entries6)
- return NULL;
-
- for (i = 0; i < tp->md5sig_info->entries6; i++) {
- if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
- return &tp->md5sig_info->keys6[i].base;
- }
- return NULL;
+ return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
}
static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
struct sock *addr_sk)
{
- return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
}
static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
struct request_sock *req)
{
- return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
-}
-
-static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
- char *newkey, u8 newkeylen)
-{
- /* Add key to the list */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp6_md5sig_key *keys;
-
- key = tcp_v6_md5_do_lookup(sk, peer);
- if (key) {
- /* modify existing entry - just update that one */
- kfree(key->key);
- key->key = newkey;
- key->keylen = newkeylen;
- } else {
- /* reallocate new list if current one is full. */
- if (!tp->md5sig_info) {
- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
- if (!tp->md5sig_info) {
- kfree(newkey);
- return -ENOMEM;
- }
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
- if (tcp_alloc_md5sig_pool(sk) == NULL) {
- kfree(newkey);
- return -ENOMEM;
- }
- if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
- keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
- (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
-
- if (!keys) {
- tcp_free_md5sig_pool();
- kfree(newkey);
- return -ENOMEM;
- }
-
- if (tp->md5sig_info->entries6)
- memmove(keys, tp->md5sig_info->keys6,
- (sizeof (tp->md5sig_info->keys6[0]) *
- tp->md5sig_info->entries6));
-
- kfree(tp->md5sig_info->keys6);
- tp->md5sig_info->keys6 = keys;
- tp->md5sig_info->alloced6++;
- }
-
- ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
- peer);
- tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
- tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
-
- tp->md5sig_info->entries6++;
- }
- return 0;
-}
-
-static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
- u8 *newkey, __u8 newkeylen)
-{
- return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
- newkey, newkeylen);
+ return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
}
-static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- for (i = 0; i < tp->md5sig_info->entries6; i++) {
- if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
- /* Free the key */
- kfree(tp->md5sig_info->keys6[i].base.key);
- tp->md5sig_info->entries6--;
-
- if (tp->md5sig_info->entries6 == 0) {
- kfree(tp->md5sig_info->keys6);
- tp->md5sig_info->keys6 = NULL;
- tp->md5sig_info->alloced6 = 0;
- } else {
- /* shrink the database */
- if (tp->md5sig_info->entries6 != i)
- memmove(&tp->md5sig_info->keys6[i],
- &tp->md5sig_info->keys6[i+1],
- (tp->md5sig_info->entries6 - i)
- * sizeof (tp->md5sig_info->keys6[0]));
- }
- tcp_free_md5sig_pool();
- return 0;
- }
- }
- return -ENOENT;
-}
-
-static void tcp_v6_clear_md5_list (struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int i;
-
- if (tp->md5sig_info->entries6) {
- for (i = 0; i < tp->md5sig_info->entries6; i++)
- kfree(tp->md5sig_info->keys6[i].base.key);
- tp->md5sig_info->entries6 = 0;
- tcp_free_md5sig_pool();
- }
-
- kfree(tp->md5sig_info->keys6);
- tp->md5sig_info->keys6 = NULL;
- tp->md5sig_info->alloced6 = 0;
-
- if (tp->md5sig_info->entries4) {
- for (i = 0; i < tp->md5sig_info->entries4; i++)
- kfree(tp->md5sig_info->keys4[i].base.key);
- tp->md5sig_info->entries4 = 0;
- tcp_free_md5sig_pool();
- }
-
- kfree(tp->md5sig_info->keys4);
- tp->md5sig_info->keys4 = NULL;
- tp->md5sig_info->alloced4 = 0;
-}
-
-static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
- int optlen)
+static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
+ int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- u8 *newkey;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -733,49 +556,35 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
return -EINVAL;
if (!cmd.tcpm_keylen) {
- if (!tcp_sk(sk)->md5sig_info)
- return -ENOENT;
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
- return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (!tcp_sk(sk)->md5sig_info) {
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *p;
-
- p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- tp->md5sig_info = p;
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
- if (!newkey)
- return -ENOMEM;
- if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
- return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
- newkey, cmd.tcpm_keylen);
- }
- return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
- struct in6_addr *daddr,
- struct in6_addr *saddr, int nbytes)
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int nbytes)
{
struct tcp6_pseudohdr *bp;
struct scatterlist sg;
bp = &hp->md5_blk.ip6;
/* 1. TCP pseudo-header (RFC2460) */
- ipv6_addr_copy(&bp->saddr, saddr);
- ipv6_addr_copy(&bp->daddr, daddr);
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
bp->protocol = cpu_to_be32(IPPROTO_TCP);
bp->len = cpu_to_be32(nbytes);
@@ -784,8 +593,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
}
static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
- struct in6_addr *daddr, struct in6_addr *saddr,
- struct tcphdr *th)
+ const struct in6_addr *daddr, struct in6_addr *saddr,
+ const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
struct hash_desc *desc;
@@ -817,22 +626,23 @@ clear_hash_noput:
}
static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
- struct sock *sk, struct request_sock *req,
- struct sk_buff *skb)
+ const struct sock *sk,
+ const struct request_sock *req,
+ const struct sk_buff *skb)
{
- struct in6_addr *saddr, *daddr;
+ const struct in6_addr *saddr, *daddr;
struct tcp_md5sig_pool *hp;
struct hash_desc *desc;
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
if (sk) {
saddr = &inet6_sk(sk)->saddr;
- daddr = &inet6_sk(sk)->daddr;
+ daddr = &sk->sk_v6_daddr;
} else if (req) {
- saddr = &inet6_rsk(req)->loc_addr;
- daddr = &inet6_rsk(req)->rmt_addr;
+ saddr = &inet_rsk(req)->ir_v6_loc_addr;
+ daddr = &inet_rsk(req)->ir_v6_rmt_addr;
} else {
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
saddr = &ip6h->saddr;
daddr = &ip6h->daddr;
}
@@ -866,12 +676,12 @@ clear_hash_noput:
return 1;
}
-static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
+static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
{
- __u8 *hash_location = NULL;
+ const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
int genhash;
u8 newhash[16];
@@ -898,12 +708,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
NULL, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- if (net_ratelimit()) {
- printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
- genhash ? "failed" : "mismatch",
- &ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest));
- }
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+ genhash ? "failed" : "mismatch",
+ &ip6h->saddr, ntohs(th->source),
+ &ip6h->daddr, ntohs(th->dest));
return 1;
}
return 0;
@@ -917,7 +725,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
- .syn_ack_timeout = tcp_syn_ack_timeout,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -927,93 +735,22 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
};
#endif
-static void __tcp_v6_send_check(struct sk_buff *skb,
- struct in6_addr *saddr, struct in6_addr *daddr)
-{
- struct tcphdr *th = tcp_hdr(skb);
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v6_check(skb->len, saddr, daddr,
- csum_partial(th, th->doff << 2,
- skb->csum));
- }
-}
-
-static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
-}
-
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
- struct ipv6hdr *ipv6h;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
- return 0;
-}
-
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- struct ipv6hdr *iph = skb_gro_network_header(skb);
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
- skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
- /* fall through */
- case CHECKSUM_NONE:
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
-
- return tcp_gro_receive(head, skb);
-}
-
-static int tcp6_gro_complete(struct sk_buff *skb)
-{
- struct ipv6hdr *iph = ipv6_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
-
- th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
- &iph->saddr, &iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-
- return tcp_gro_complete(skb);
-}
-
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
- u32 ts, struct tcp_md5sig_key *key, int rst)
+ u32 tsval, u32 tsecr, int oif,
+ struct tcp_md5sig_key *key, int rst, u8 tclass,
+ u32 label)
{
- struct tcphdr *th = tcp_hdr(skb), *t1;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcphdr *t1;
struct sk_buff *buff;
- struct flowi fl;
+ struct flowi6 fl6;
struct net *net = dev_net(skb_dst(skb)->dev);
struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(struct tcphdr);
struct dst_entry *dst;
__be32 *topt;
- if (ts)
+ if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
if (key)
@@ -1043,11 +780,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
topt = (__be32 *)(t1 + 1);
- if (ts) {
+ if (tsecr) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- *topt++ = htonl(tcp_time_stamp);
- *topt++ = htonl(ts);
+ *topt++ = htonl(tsval);
+ *topt++ = htonl(tsecr);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1060,34 +797,38 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
}
#endif
- memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = ipv6_hdr(skb)->saddr;
+ fl6.saddr = ipv6_hdr(skb)->daddr;
+ fl6.flowlabel = label;
buff->ip_summed = CHECKSUM_PARTIAL;
buff->csum = 0;
- __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst);
+ __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
- fl.proto = IPPROTO_TCP;
- fl.oif = inet6_iif(skb);
- fl.fl_ip_dport = t1->dest;
- fl.fl_ip_sport = t1->source;
- security_skb_classify_flow(skb, &fl);
+ fl6.flowi6_proto = IPPROTO_TCP;
+ if (rt6_need_strict(&fl6.daddr) && !oif)
+ fl6.flowi6_oif = inet6_iif(skb);
+ else
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
+ fl6.fl6_dport = t1->dest;
+ fl6.fl6_sport = t1->source;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
/* Pass a socket to ip6_dst_lookup either it is for RST
* Underlying function will use this to retrieve the network
* namespace
*/
- if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
- if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
- skb_dst_set(buff, dst);
- ip6_xmit(ctl_sk, buff, &fl, NULL);
- TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
- if (rst)
- TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
- return;
- }
+ dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
+ if (!IS_ERR(dst)) {
+ skb_dst_set(buff, dst);
+ ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ if (rst)
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+ return;
}
kfree_skb(buff);
@@ -1095,9 +836,17 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
u32 seq = 0, ack_seq = 0;
struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+ const __u8 *hash_location = NULL;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ unsigned char newhash[16];
+ int genhash;
+ struct sock *sk1 = NULL;
+#endif
+ int oif;
if (th->rst)
return;
@@ -1106,8 +855,33 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
return;
#ifdef CONFIG_TCP_MD5SIG
- if (sk)
- key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
+ hash_location = tcp_parse_md5sig_option(th);
+ if (!sk && hash_location) {
+ /*
+ * active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, &ipv6h->saddr,
+ th->source, &ipv6h->daddr,
+ ntohs(th->source), inet6_iif(skb));
+ if (!sk1)
+ return;
+
+ rcu_read_lock();
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!key)
+ goto release_sk1;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto release_sk1;
+ } else {
+ key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+ }
#endif
if (th->ack)
@@ -1116,13 +890,25 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
+ oif = sk ? sk->sk_bound_dev_if : 0;
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+ if (sk1) {
+ rcu_read_unlock();
+ sock_put(sk1);
+ }
+#endif
}
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
- struct tcp_md5sig_key *key)
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+ u32 win, u32 tsval, u32 tsecr, int oif,
+ struct tcp_md5sig_key *key, u8 tclass,
+ u32 label)
{
- tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
+ tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
+ label);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1132,7 +918,9 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
+ tcp_time_stamp + tcptw->tw_ts_offset,
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+ tw->tw_tclass, (tw->tw_flowlabel << 12));
inet_twsk_put(tw);
}
@@ -1140,12 +928,19 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+ */
+ tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ tcp_rsk(req)->rcv_nxt,
+ req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ 0, 0);
}
-static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
{
struct request_sock *req, **prev;
const struct tcphdr *th = tcp_hdr(skb);
@@ -1156,7 +951,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, inet6_iif(skb));
if (req)
- return tcp_check_req(sk, skb, req, prev);
+ return tcp_check_req(sk, skb, req, prev, false);
nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
@@ -1183,20 +978,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
*/
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_extend_values tmp_ext;
struct tcp_options_received tmp_opt;
- u8 *hash_location;
struct request_sock *req;
- struct inet6_request_sock *treq;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
-#ifdef CONFIG_SYN_COOKIES
- int want_cookie = 0;
-#else
-#define want_cookie 0
-#endif
+ struct tcp_fastopen_cookie foc = { .len = -1 };
+ bool want_cookie = false, fastopen;
+ struct flowi6 fl6;
+ int err;
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -1204,19 +996,17 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
- if (net_ratelimit())
- syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
- want_cookie = 1;
- else
-#endif
- goto drop;
+ if ((sysctl_tcp_syncookies == 2 ||
+ inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+ if (!want_cookie)
+ goto drop;
}
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
+ }
req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
if (req == NULL)
@@ -1229,52 +1019,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
-
- if (tmp_opt.cookie_plus > 0 &&
- tmp_opt.saw_tstamp &&
- !tp->rx_opt.cookie_out_never &&
- (sysctl_tcp_cookie_size > 0 ||
- (tp->cookie_values != NULL &&
- tp->cookie_values->cookie_desired > 0))) {
- u8 *c;
- u32 *d;
- u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
- int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
- if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
- goto drop_and_free;
-
- /* Secret recipe starts with IP addresses */
- d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
-
- /* plus variable length Initiator Cookie */
- c = (u8 *)mess;
- while (l-- > 0)
- *c++ ^= *hash_location++;
-
-#ifdef CONFIG_SYN_COOKIES
- want_cookie = 0; /* not our kind of cookie */
-#endif
- tmp_ext.cookie_out_never = 0; /* false */
- tmp_ext.cookie_plus = tmp_opt.cookie_plus;
- } else if (!tp->rx_opt.cookie_in_always) {
- /* redundant indications, but ensure initialization. */
- tmp_ext.cookie_out_never = 1; /* true */
- tmp_ext.cookie_plus = 0;
- } else {
- goto drop_and_free;
- }
- tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+ tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1282,27 +1027,28 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
- treq = inet6_rsk(req);
- ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+ ireq = inet_rsk(req);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, tcp_hdr(skb));
+ TCP_ECN_create_request(req, skb, sock_net(sk));
- if (!isn) {
- struct inet_peer *peer = NULL;
+ ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_mark = inet_request_mark(sk, skb);
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
+
+ if (!isn) {
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
+ np->repflow) {
atomic_inc(&skb->users);
- treq->pktopts = skb;
+ ireq->pktopts = skb;
}
- treq->iif = sk->sk_bound_dev_if;
-
- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- treq->iif = inet6_iif(skb);
if (want_cookie) {
isn = cookie_v6_init_sequence(sk, skb, &req->mss);
@@ -1321,14 +1067,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, req)) != NULL &&
- (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
- ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
- &treq->rmt_addr)) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
- (s32)(peer->tcp_ts - req->ts_recent) >
- TCP_PAWS_WINDOW) {
+ (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+ if (!tcp_peer_is_proven(req, dst, true)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
@@ -1337,8 +1077,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
- (!peer || !peer->tcp_ts_stamp) &&
- (!dst || !dst_metric(dst, RTAX_RTT))) {
+ !tcp_peer_is_proven(req, dst, false)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -1347,23 +1086,34 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
* to the moment of synflood.
*/
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
- &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+ &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
goto drop_and_release;
}
isn = tcp_v6_init_sequence(skb);
}
have_isn:
- tcp_rsk(req)->snt_isn = isn;
- security_inet_conn_request(sk, skb, req);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_release;
- if (tcp_v6_send_synack(sk, req,
- (struct request_values *)&tmp_ext) ||
- want_cookie)
+ if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
goto drop_and_free;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_openreq_init_rwin(req, sk, dst);
+ fastopen = !want_cookie &&
+ tcp_try_fastopen(sk, skb, req, &foc, dst);
+ err = tcp_v6_send_synack(sk, dst, &fl6, req,
+ skb_get_queue_mapping(skb), &foc);
+ if (!fastopen) {
+ if (err || want_cookie)
+ goto drop_and_free;
+
+ tcp_rsk(req)->listener = NULL;
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ }
return 0;
drop_and_release:
@@ -1371,23 +1121,24 @@ drop_and_release:
drop_and_free:
reqsk_free(req);
drop:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0; /* don't send reset */
}
-static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
+static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
{
- struct inet6_request_sock *treq;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
- struct ipv6_txoptions *opt;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -1408,11 +1159,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+ newsk->sk_v6_rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1420,10 +1171,15 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ if (np->repflow)
+ newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1440,14 +1196,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
}
- treq = inet6_rsk(req);
- opt = np->opt;
+ ireq = inet_rsk(req);
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, req);
+ dst = inet6_csk_route_req(sk, &fl6, req);
if (!dst)
goto out;
}
@@ -1464,6 +1219,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(newsk, dst, NULL, NULL);
+ inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1474,16 +1230,17 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
- ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
- ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
- newsk->sk_bound_dev_if = treq->iif;
+ newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+ newnp->saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
First: no IPv4 options.
*/
- newinet->opt = NULL;
+ newinet->inet_opt = NULL;
+ newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
/* Clone RX bits */
@@ -1491,16 +1248,20 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
- kfree_skb(treq->pktopts);
- treq->pktopts = NULL;
+ if (ireq->pktopts != NULL) {
+ newnp->pktoptions = skb_clone(ireq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
if (newnp->pktoptions)
skb_set_owner_r(newnp->pktoptions, newsk);
}
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ if (np->repflow)
+ newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/* Clone native IPv6 options from listening socket (if any)
@@ -1508,20 +1269,20 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
newnp->opt->opt_flen);
- tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
- newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ newtp->advmss = dst_metric_advmss(dst);
+ if (tcp_sk(sk)->rx_opt.user_mss &&
+ tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+ newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+
tcp_initialize_rcv_mss(newsk);
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
@@ -1529,21 +1290,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ if (key != NULL) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
* across. Shucks.
*/
- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
- if (newkey != NULL)
- tcp_v6_md5_do_add(newsk, &newnp->daddr,
- newkey, key->keylen);
+ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
+ AF_INET6, key->key, key->keylen,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
}
#endif
if (__inet_inherit_port(sk, newsk) < 0) {
- sock_put(newsk);
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
goto out;
}
__inet6_hash(newsk, NULL);
@@ -1553,34 +1315,12 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
}
-static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- }
-
- skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0));
-
- if (skb->len <= 76) {
- return __skb_checksum_complete(skb);
- }
- return 0;
-}
-
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1607,7 +1347,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return tcp_v4_do_rcv(sk, skb);
#ifdef CONFIG_TCP_MD5SIG
- if (tcp_v6_inbound_md5_hash (sk, skb))
+ if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard;
#endif
@@ -1633,13 +1373,21 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
- goto reset;
- TCP_CHECK_TIMER(sk);
+ struct dst_entry *dst = sk->sk_rx_dst;
+
+ sock_rps_save_rxhash(sk, skb);
+ if (dst) {
+ if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ dst_release(dst);
+ sk->sk_rx_dst = NULL;
+ }
+ }
+
+ tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
if (opt_skb)
goto ipv6_pktoptions;
return 0;
@@ -1658,19 +1406,19 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* otherwise we just shortcircuit this and continue with
* the new socket..
*/
- if(nsk != sk) {
+ if (nsk != sk) {
+ sock_rps_save_rxhash(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
__kfree_skb(opt_skb);
return 0;
}
- }
+ } else
+ sock_rps_save_rxhash(sk, skb);
- TCP_CHECK_TIMER(sk);
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
- TCP_CHECK_TIMER(sk);
if (opt_skb)
goto ipv6_pktoptions;
return 0;
@@ -1683,6 +1431,7 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1702,6 +1451,10 @@ ipv6_pktoptions:
np->mcast_oif = inet6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
+ np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+ if (np->repflow)
+ np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1717,8 +1470,8 @@ ipv6_pktoptions:
static int tcp_v6_rcv(struct sk_buff *skb)
{
- struct tcphdr *th;
- struct ipv6hdr *hdr;
+ const struct tcphdr *th;
+ const struct ipv6hdr *hdr;
struct sock *sk;
int ret;
struct net *net = dev_net(skb->dev);
@@ -1741,8 +1494,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
- if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
- goto bad_packet;
+ if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
+ goto csum_error;
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
@@ -1751,7 +1504,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
+ TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1773,6 +1526,7 @@ process:
if (sk_filter(sk, skb))
goto discard_and_relse;
+ sk_mark_napi_id(sk, skb);
skb->dev = NULL;
bh_lock_sock_nested(sk);
@@ -1781,7 +1535,7 @@ process:
#ifdef CONFIG_NET_DMA
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+ tp->ucopy.dma_chan = net_dma_find_channel();
if (tp->ucopy.dma_chan)
ret = tcp_v6_do_rcv(sk, skb);
else
@@ -1790,7 +1544,8 @@ process:
if (!tcp_prequeue(sk, skb))
ret = tcp_v6_do_rcv(sk, skb);
}
- } else if (unlikely(sk_add_backlog(sk, skb))) {
+ } else if (unlikely(sk_add_backlog(sk, skb,
+ sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
goto discard_and_relse;
@@ -1805,6 +1560,8 @@ no_tcp_socket:
goto discard_it;
if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+csum_error:
+ TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1812,11 +1569,6 @@ bad_packet:
}
discard_it:
-
- /*
- * Discard frame
- */
-
kfree_skb(skb);
return 0;
@@ -1830,10 +1582,13 @@ do_time_wait:
goto discard_it;
}
- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ if (skb->len < (th->doff<<2)) {
inet_twsk_put(inet_twsk(sk));
- goto discard_it;
+ goto bad_packet;
+ }
+ if (tcp_checksum_complete(skb)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
@@ -1842,6 +1597,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ &ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (sk2 != NULL) {
@@ -1858,57 +1614,64 @@ do_time_wait:
break;
case TCP_TW_RST:
goto no_tcp_socket;
- case TCP_TW_SUCCESS:;
+ case TCP_TW_SUCCESS:
+ ;
}
goto discard_it;
}
-static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
+static void tcp_v6_early_demux(struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet_peer *peer;
+ const struct ipv6hdr *hdr;
+ const struct tcphdr *th;
+ struct sock *sk;
- if (!rt ||
- !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
- peer = inet_getpeer_v6(&np->daddr, 1);
- *release_it = true;
- } else {
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
- *release_it = true;
- }
+ if (skb->pkt_type != PACKET_HOST)
+ return;
- return peer;
-}
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
+ return;
-static void *tcp_v6_tw_get_peer(struct sock *sk)
-{
- struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
- struct inet_timewait_sock *tw = inet_twsk(sk);
+ hdr = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
- if (tw->tw_family == AF_INET)
- return tcp_v4_tw_get_peer(sk);
+ if (th->doff < sizeof(struct tcphdr) / 4)
+ return;
- return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+ sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ inet6_iif(skb));
+ if (sk) {
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ if (sk->sk_state != TCP_TIME_WAIT) {
+ struct dst_entry *dst = sk->sk_rx_dst;
+
+ if (dst)
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ if (dst &&
+ inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
+ skb_dst_set_noref(skb, dst);
+ }
+ }
}
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
- .twsk_getpeer = tcp_v6_tw_get_peer,
+ .twsk_destructor = tcp_twsk_destructor,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
+ .sk_rx_dst_set = inet6_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v6_get_peer,
.net_header_len = sizeof(struct ipv6hdr),
+ .net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
@@ -1924,7 +1687,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
.md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
- .md5_add = tcp_v6_md5_add_func,
.md5_parse = tcp_v6_parse_md5_keys,
};
#endif
@@ -1932,14 +1694,13 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
/*
* TCP over IPv4 via INET6 API
*/
-
static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -1956,7 +1717,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_add = tcp_v6_md5_add_func,
.md5_parse = tcp_v6_parse_md5_keys,
};
#endif
@@ -1967,73 +1727,20 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
static int tcp_v6_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- skb_queue_head_init(&tp->out_of_order_queue);
- tcp_init_xmit_timers(sk);
- tcp_prequeue_init(tp);
-
- icsk->icsk_rto = TCP_TIMEOUT_INIT;
- tp->mdev = TCP_TIMEOUT_INIT;
-
- /* So many TCP implementations out there (incorrectly) count the
- * initial SYN frame in their delayed-ACK and congestion control
- * algorithms that we must have the following bandaid to talk
- * efficiently to them. -DaveM
- */
- tp->snd_cwnd = 2;
-
- /* See draft-stevens-tcpca-spec-01 for discussion of the
- * initialization of these values.
- */
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = TCP_MSS_DEFAULT;
-
- tp->reordering = sysctl_tcp_reordering;
- sk->sk_state = TCP_CLOSE;
+ tcp_init_sock(sk);
icsk->icsk_af_ops = &ipv6_specific;
- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
- icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_write_space = sk_stream_write_space;
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
#ifdef CONFIG_TCP_MD5SIG
- tp->af_specific = &tcp_sock_ipv6_specific;
+ tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
#endif
- /* TCP Cookie Transactions */
- if (sysctl_tcp_cookie_size > 0) {
- /* Default, cookies without s_data_payload. */
- tp->cookie_values =
- kzalloc(sizeof(*tp->cookie_values),
- sk->sk_allocation);
- if (tp->cookie_values != NULL)
- kref_init(&tp->cookie_values->kref);
- }
- /* Presumed zeroed, in order of appearance:
- * cookie_in_always, cookie_out_never,
- * s_data_constant, s_data_in, s_data_out
- */
- sk->sk_sndbuf = sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
-
- local_bh_disable();
- percpu_counter_inc(&tcp_sockets_allocated);
- local_bh_enable();
-
return 0;
}
static void tcp_v6_destroy_sock(struct sock *sk)
{
-#ifdef CONFIG_TCP_MD5SIG
- /* Clean up the MD5 key list */
- if (tcp_sk(sk)->md5sig_info)
- tcp_v6_clear_md5_list(sk);
-#endif
tcp_v4_destroy_sock(sk);
inet6_destroy_sock(sk);
}
@@ -2041,31 +1748,31 @@ static void tcp_v6_destroy_sock(struct sock *sk)
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
- struct sock *sk, struct request_sock *req, int i, int uid)
+ const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
{
int ttd = req->expires - jiffies;
- struct in6_addr *src = &inet6_rsk(req)->loc_addr;
- struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
+ const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
+ const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
if (ttd < 0)
ttd = 0;
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
- ntohs(inet_rsk(req)->loc_port),
+ inet_rsk(req)->ir_num,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3],
- ntohs(inet_rsk(req)->rmt_port),
+ ntohs(inet_rsk(req)->ir_rmt_port),
TCP_SYN_RECV,
- 0,0, /* could print option size, but that is af dependent. */
+ 0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
- req->retrans,
- uid,
+ req->num_timeout,
+ from_kuid_munged(seq_user_ns(seq), uid),
0, /* non standard timer */
0, /* open_requests have no inode */
0, req);
@@ -2073,17 +1780,17 @@ static void get_openreq6(struct seq_file *seq,
static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
{
- struct in6_addr *dest, *src;
+ const struct in6_addr *dest, *src;
__u16 destp, srcp;
int timer_active;
unsigned long timer_expires;
- struct inet_sock *inet = inet_sk(sp);
- struct tcp_sock *tp = tcp_sk(sp);
+ const struct inet_sock *inet = inet_sk(sp);
+ const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
- struct ipv6_pinfo *np = inet6_sk(sp);
+ struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
- dest = &np->daddr;
- src = &np->rcv_saddr;
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
@@ -2103,7 +1810,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -2113,52 +1820,51 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
tp->write_seq-tp->snd_una,
(sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
timer_active,
- jiffies_to_clock_t(timer_expires - jiffies),
+ jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- sock_i_uid(sp),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
icsk->icsk_probes_out,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
- (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
+ (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ sp->sk_state == TCP_LISTEN ?
+ (fastopenq ? fastopenq->max_qlen : 0) :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
static void get_timewait6_sock(struct seq_file *seq,
struct inet_timewait_sock *tw, int i)
{
- struct in6_addr *dest, *src;
+ const struct in6_addr *dest, *src;
__u16 destp, srcp;
- struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
- int ttd = tw->tw_ttd - jiffies;
-
- if (ttd < 0)
- ttd = 0;
+ s32 delta = tw->tw_ttd - inet_tw_time_stamp();
- dest = &tw6->tw_v6_daddr;
- src = &tw6->tw_v6_rcv_saddr;
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
destp = ntohs(tw->tw_dport);
srcp = ntohs(tw->tw_sport);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
tw->tw_substate, 0, 0,
- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw);
}
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
+ struct sock *sk = v;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
@@ -2174,25 +1880,31 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
case TCP_SEQ_STATE_ESTABLISHED:
- get_tcp6_sock(seq, v, st->num);
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else
+ get_tcp6_sock(seq, v, st->num);
break;
case TCP_SEQ_STATE_OPENREQ:
get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
- get_timewait6_sock(seq, v, st->num);
- break;
}
out:
return 0;
}
+static const struct file_operations tcp6_afinfo_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = tcp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net
+};
+
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
.name = "tcp6",
.family = AF_INET6,
- .seq_fops = {
- .owner = THIS_MODULE,
- },
+ .seq_fops = &tcp6_afinfo_seq_fops,
.seq_ops = {
.show = tcp6_seq_show,
},
@@ -2209,6 +1921,17 @@ void tcp6_proc_exit(struct net *net)
}
#endif
+static void tcp_v6_clear_sk(struct sock *sk, int size)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* we do not want to clear pinet6 field, because of RCU lookups */
+ sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+ size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+ memset(&inet->pinet6 + 1, 0, size);
+}
+
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
@@ -2226,10 +1949,13 @@ struct proto tcpv6_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
+ .release_cb = tcp_release_cb,
+ .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
@@ -2248,15 +1974,16 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ .proto_cgroup = tcp_proto_cgroup,
+#endif
+ .clear_sk = tcp_v6_clear_sk,
};
static const struct inet6_protocol tcpv6_protocol = {
+ .early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
- .gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -2265,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
};
@@ -2311,10 +2037,10 @@ int __init tcpv6_init(void)
out:
return ret;
-out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
+out_tcpv6_protocol:
+ inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
goto out;
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
new file mode 100644
index 00000000000..01b0ff9a0c2
--- /dev/null
+++ b/net/ipv6/tcpv6_offload.c
@@ -0,0 +1,93 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * TCPv6 GSO/GRO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+ return 0;
+}
+
+static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
+ __wsum wsum;
+
+ /* Don't bother verifying checksum if we're going to flush anyway. */
+ if (NAPI_GRO_CB(skb)->flush)
+ goto skip_csum;
+
+ wsum = NAPI_GRO_CB(skb)->csum;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_NONE:
+ wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb),
+ wsum);
+
+ /* fall through */
+
+ case CHECKSUM_COMPLETE:
+ if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
+ wsum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+skip_csum:
+ return tcp_gro_receive(head, skb);
+}
+
+static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
+ &iph->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+
+ return tcp_gro_complete(skb);
+}
+
+static const struct net_offload tcpv6_offload = {
+ .callbacks = {
+ .gso_send_check = tcp_v6_gso_send_check,
+ .gso_segment = tcp_gso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+};
+
+int __init tcpv6_offload_init(void)
+{
+ return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 4f3cec12aa8..2c4e4c5c761 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -12,13 +12,14 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors Mitsuru KANDA <mk@linux-ipv6.org>
* YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
*/
+#define pr_fmt(fmt) "IPv6: " fmt
+
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -160,11 +161,11 @@ static const struct inet6_protocol tunnel46_protocol = {
static int __init tunnel6_init(void)
{
if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
- printk(KERN_ERR "tunnel6 init(): can't add protocol\n");
+ pr_err("%s: can't add protocol\n", __func__);
return -EAGAIN;
}
if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) {
- printk(KERN_ERR "tunnel6 init(): can't add protocol\n");
+ pr_err("%s: can't add protocol\n", __func__);
inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
return -EAGAIN;
}
@@ -174,9 +175,9 @@ static int __init tunnel6_init(void)
static void __exit tunnel6_fini(void)
{
if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP))
- printk(KERN_ERR "tunnel6 close: can't remove protocol\n");
+ pr_err("%s: can't remove protocol\n", __func__);
if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
- printk(KERN_ERR "tunnel6 close: can't remove protocol\n");
+ pr_err("%s: can't remove protocol\n", __func__);
}
module_init(tunnel6_init);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b541a4e009f..7092ff78fd8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,27 +45,50 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/inet6_hashtables.h>
+#include <net/busy_poll.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/events/skb.h>
#include "udp_impl.h"
+static unsigned int udp6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
+{
+ static u32 udp6_ehash_secret __read_mostly;
+ static u32 udp_ipv6_hash_secret __read_mostly;
+
+ u32 lhash, fhash;
+
+ net_get_random_once(&udp6_ehash_secret,
+ sizeof(udp6_ehash_secret));
+ net_get_random_once(&udp_ipv6_hash_secret,
+ sizeof(udp_ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ udp_ipv6_hash_secret + net_hash_mix(net));
+}
+
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
return (!sk2_ipv6only &&
- (!sk1_rcv_saddr || !sk2_rcv_saddr ||
- sk1_rcv_saddr == sk2_rcv_saddr));
+ (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
+ sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
if (addr_type2 == IPV6_ADDR_ANY &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -76,7 +99,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
return 1;
return 0;
@@ -103,8 +126,8 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
- unsigned int hash2_partial =
- udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+ unsigned int hash2_partial =
+ udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -114,7 +137,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk)
{
u16 new_hash = udp6_portaddr_hash(sock_net(sk),
- &inet6_sk(sk)->rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash);
@@ -130,7 +153,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
score = 0;
@@ -139,13 +161,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
- if (!ipv6_addr_any(&np->daddr)) {
- if (!ipv6_addr_equal(&np->daddr, saddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
@@ -168,10 +190,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score = 0;
if (inet->inet_dport) {
@@ -179,8 +200,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (!ipv6_addr_any(&np->daddr)) {
- if (!ipv6_addr_equal(&np->daddr, saddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
@@ -202,7 +223,8 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
- int score, badness;
+ int score, badness, matches = 0, reuseport = 0;
+ u32 hash = 0;
begin:
result = NULL;
@@ -213,8 +235,18 @@ begin:
if (score > badness) {
result = sk;
badness = score;
- if (score == SCORE2_MAX)
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ hash = udp6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
+ } else if (score == SCORE2_MAX)
goto exact_match;
+ } else if (score == badness && reuseport) {
+ matches++;
+ if (((u64)hash * matches) >> 32 == 0)
+ result = sk;
+ hash = next_pseudo_random32(hash);
}
}
/*
@@ -238,7 +270,7 @@ exact_match:
return result;
}
-static struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *udptable)
@@ -248,7 +280,8 @@ static struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
- int score, badness;
+ int score, badness, matches = 0, reuseport = 0;
+ u32 hash = 0;
rcu_read_lock();
if (hslot->count > 10) {
@@ -283,6 +316,17 @@ begin:
if (score > badness) {
result = sk;
badness = score;
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ hash = udp6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
+ }
+ } else if (score == badness && reuseport) {
+ matches++;
+ if (((u64)hash * matches) >> 32 == 0)
+ result = sk;
+ hash = next_pseudo_random32(hash);
}
}
/*
@@ -305,13 +349,14 @@ begin:
rcu_read_unlock();
return result;
}
+EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
struct udp_table *udptable)
{
struct sock *sk;
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
if (unlikely(sk = skb_steal_sock(skb)))
return sk;
@@ -340,32 +385,30 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
- unsigned int ulen;
- int peeked;
+ unsigned int ulen, copied;
+ int peeked, off = 0;
int err;
int is_udplite = IS_UDPLITE(sk);
int is_udp4;
bool slow;
- if (addr_len)
- *addr_len=sizeof(struct sockaddr_in6);
-
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
- return ipv6_recv_rxpmtu(sk, msg, len);
+ return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
- &peeked, &err);
+ &peeked, &off, &err);
if (!skb)
goto out;
ulen = skb->len - sizeof(struct udphdr);
- if (len > ulen)
- len = ulen;
- else if (len < ulen)
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
is_udp4 = (skb->protocol == htons(ETH_P_IP));
@@ -376,22 +419,34 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
}
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov,len);
+ msg->msg_iov, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
- if (err)
+ if (unlikely(err)) {
+ trace_kfree_skb(skb, udpv6_recvmsg);
+ if (!peeked) {
+ atomic_inc(&sk->sk_drops);
+ if (is_udp4)
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS,
+ is_udplite);
+ else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS,
+ is_udplite);
+ }
goto out_free;
-
+ }
if (!peeked) {
if (is_udp4)
UDP_INC_STATS_USER(sock_net(sk),
@@ -405,34 +460,36 @@ try_again:
/* Copy the address. */
if (msg->msg_name) {
- struct sockaddr_in6 *sin6;
-
- sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
sin6->sin6_family = AF_INET6;
sin6->sin6_port = udp_hdr(skb)->source;
sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (is_udp4)
+ if (is_udp4) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin6->sin6_addr);
- else {
- ipv6_addr_copy(&sin6->sin6_addr,
- &ipv6_hdr(skb)->saddr);
- if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
+ sin6->sin6_scope_id = 0;
+ } else {
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
+ sin6->sin6_scope_id =
+ ipv6_iface_scope_id(&sin6->sin6_addr,
+ IP6CB(skb)->iif);
}
-
+ *addr_len = sizeof(*sin6);
}
+
+ if (np->rxopt.all)
+ ip6_datagram_recv_common_ctl(sk, msg, skb);
+
if (is_udp4) {
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
} else {
if (np->rxopt.all)
- datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_specific_ctl(sk, msg, skb);
}
- err = len;
+ err = copied;
if (flags & MSG_TRUNC)
err = ulen;
@@ -444,17 +501,25 @@ out:
csum_copy_err:
slow = lock_sock_fast(sk);
if (!skb_kill_datagram(sk, skb, flags)) {
- if (is_udp4)
+ if (is_udp4) {
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
- else
+ } else {
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
+ }
}
unlock_sock_fast(sk, slow);
- if (flags & MSG_DONTWAIT)
+ if (noblock)
return -EAGAIN;
+
+ /* starting over for a new packet */
+ msg->msg_flags &= ~MSG_TRUNC;
goto try_again;
}
@@ -463,9 +528,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct udp_table *udptable)
{
struct ipv6_pinfo *np;
- struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
- struct in6_addr *saddr = &hdr->saddr;
- struct in6_addr *daddr = &hdr->daddr;
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ const struct in6_addr *saddr = &hdr->saddr;
+ const struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr*)(skb->data+offset);
struct sock *sk;
int err;
@@ -475,6 +540,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk == NULL)
return;
+ if (type == ICMPV6_PKT_TOOBIG) {
+ if (!ip6_sk_accept_pmtu(sk))
+ goto out;
+ ip6_sk_update_pmtu(skb, sk, info);
+ }
+ if (type == NDISC_REDIRECT) {
+ ip6_sk_redirect(skb, sk);
+ goto out;
+ }
+
np = inet6_sk(sk);
if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
@@ -492,6 +567,30 @@ out:
sock_put(sk);
}
+static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ int rc;
+
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
+ }
+
+ rc = sock_queue_rcv_skb(sk, skb);
+ if (rc < 0) {
+ int is_udplite = IS_UDPLITE(sk);
+
+ /* Note that an ENOMEM error is charged twice */
+ if (rc == -ENOMEM)
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ kfree_skb(skb);
+ return -1;
+ }
+ return 0;
+}
+
static __inline__ void udpv6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt, u8 type,
u8 code, int offset, __be32 info )
@@ -499,7 +598,15 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
-int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+static struct static_key udpv6_encap_needed __read_mostly;
+void udpv6_encap_enable(void)
+{
+ if (!static_key_enabled(&udpv6_encap_needed))
+ static_key_slow_inc(&udpv6_encap_needed);
+}
+EXPORT_SYMBOL(udpv6_encap_enable);
+
+int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int rc;
@@ -508,6 +615,41 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto drop;
+ if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+
+ /*
+ * This is an encapsulation socket so pass the skb to
+ * the socket's udp_encap_rcv() hook. Otherwise, just
+ * fall through and pass this up the UDP socket.
+ * up->encap_rcv() returns the following value:
+ * =0 if skb was successfully passed to the encap
+ * handler or was discarded by it.
+ * >0 if skb should be passed on to UDP.
+ * <0 if skb should be resubmitted as proto -N
+ */
+
+ /* if we're overly short, let UDP handle it */
+ encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
+ int ret;
+
+ /* Verify checksum before giving to encap */
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
+ ret = encap_rcv(sk, skb);
+ if (ret <= 0) {
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
+ is_udplite);
+ return -ret;
+ }
+ }
+
+ /* FALLTHROUGH -- it's a UDP Packet */
+ }
+
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
@@ -527,64 +669,74 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (rcu_dereference_raw(sk->sk_filter)) {
+ if (rcu_access_pointer(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
- goto drop;
+ goto csum_error;
}
- if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
- /* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
- goto drop_no_sk_drops_inc;
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ goto drop;
}
- return 0;
+ skb_dst_drop(skb);
+
+ bh_lock_sock(sk);
+ rc = 0;
+ if (!sock_owned_by_user(sk))
+ rc = __udpv6_queue_rcv_skb(sk, skb);
+ else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
+ bh_unlock_sock(sk);
+ goto drop;
+ }
+ bh_unlock_sock(sk);
+
+ return rc;
+
+csum_error:
+ UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
- atomic_inc(&sk->sk_drops);
-drop_no_sk_drops_inc:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return -1;
}
static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
- __be16 loc_port, struct in6_addr *loc_addr,
- __be16 rmt_port, struct in6_addr *rmt_addr,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
int dif)
{
struct hlist_nulls_node *node;
- struct sock *s = sk;
unsigned short num = ntohs(loc_port);
- sk_nulls_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
+ sk_nulls_for_each_from(sk, node) {
+ struct inet_sock *inet = inet_sk(sk);
- if (!net_eq(sock_net(s), net))
+ if (!net_eq(sock_net(sk), net))
continue;
- if (udp_sk(s)->udp_port_hash == num &&
- s->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(s);
+ if (udp_sk(sk)->udp_port_hash == num &&
+ sk->sk_family == PF_INET6) {
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
continue;
}
- if (!ipv6_addr_any(&np->daddr) &&
- !ipv6_addr_equal(&np->daddr, rmt_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
continue;
}
- if (!inet6_mc_check(s, loc_addr, rmt_addr))
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
continue;
- return s;
+ return sk;
}
}
return NULL;
@@ -593,44 +745,45 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
static void flush_stack(struct sock **stack, unsigned int count,
struct sk_buff *skb, unsigned int final)
{
- unsigned int i;
+ struct sk_buff *skb1 = NULL;
struct sock *sk;
- struct sk_buff *skb1;
+ unsigned int i;
for (i = 0; i < count; i++) {
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
-
sk = stack[i];
- if (skb1) {
- if (sk_rcvqueues_full(sk, skb)) {
- kfree_skb(skb1);
- goto drop;
- }
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- udpv6_queue_rcv_skb(sk, skb1);
- else if (sk_add_backlog(sk, skb1)) {
- kfree_skb(skb1);
- bh_unlock_sock(sk);
- goto drop;
- }
- bh_unlock_sock(sk);
- continue;
+ if (likely(skb1 == NULL))
+ skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+ if (!skb1) {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
}
-drop:
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INERRORS, IS_UDPLITE(sk));
+
+ if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
+ skb1 = NULL;
}
+ if (unlikely(skb1))
+ kfree_skb(skb1);
+}
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ * this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
}
+
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
- struct in6_addr *saddr, struct in6_addr *daddr,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
struct udp_table *udptable)
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
@@ -644,7 +797,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) {
- stack[count++] = sk;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (uh->check || udp_sk(sk)->no_check6_rx)
+ stack[count++] = sk;
+
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) {
@@ -673,47 +831,13 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
return 0;
}
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
- int proto)
-{
- int err;
-
- UDP_SKB_CB(skb)->partial_cov = 0;
- UDP_SKB_CB(skb)->cscov = skb->len;
-
- if (proto == IPPROTO_UDPLITE) {
- err = udplite_checksum_init(skb, uh);
- if (err)
- return err;
- }
-
- if (uh->check == 0) {
- /* RFC 2460 section 8.1 says that we SHOULD log
- this error. Well, it is reasonable.
- */
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
- return 1;
- }
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
- skb->len, proto, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-
- if (!skb_csum_unnecessary(skb))
- skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len, proto, 0));
-
- return 0;
-}
-
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
struct net *net = dev_net(skb->dev);
struct sock *sk;
struct udphdr *uh;
- struct in6_addr *saddr, *daddr;
+ const struct in6_addr *saddr, *daddr;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -747,7 +871,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
}
if (udp6_csum_init(skb, uh, proto))
- goto discard;
+ goto csum_error;
/*
* Multicast receive code
@@ -763,39 +887,42 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
* for sock caches... i'll skip this for now.
*/
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+ if (sk != NULL) {
+ int ret;
- if (sk == NULL) {
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto discard;
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ sock_put(sk);
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
- if (udp_lib_checksum_complete(skb))
- goto discard;
- UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
- proto == IPPROTO_UDPLITE);
+ ret = udpv6_queue_rcv_skb(sk, skb);
+ sock_put(sk);
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+ /* a return value > 0 means to resubmit the input, but
+ * it wants the return to be -protocol, or 0
+ */
+ if (ret > 0)
+ return -ret;
- kfree_skb(skb);
return 0;
}
- /* deliver */
-
- if (sk_rcvqueues_full(sk, skb)) {
- sock_put(sk);
- goto discard;
+ if (!uh->check) {
+ udp6_csum_zero_error(skb);
+ goto csum_error;
}
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- udpv6_queue_rcv_skb(sk, skb);
- else if (sk_add_backlog(sk, skb)) {
- atomic_inc(&sk->sk_drops);
- bh_unlock_sock(sk);
- sock_put(sk);
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
- }
- bh_unlock_sock(sk);
- sock_put(sk);
+
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
+ UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
return 0;
short_packet:
@@ -807,7 +934,9 @@ short_packet:
skb->len,
daddr,
ntohs(uh->dest));
-
+ goto discard;
+csum_error:
+ UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
@@ -886,11 +1015,16 @@ static int udp_v6_push_pending_frames(struct sock *sk)
struct udphdr *uh;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- struct flowi *fl = &inet->cork.fl;
+ struct flowi6 *fl6;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0;
+ if (up->pending == AF_INET)
+ return udp_push_pending_frames(sk);
+
+ fl6 = &inet->cork.fl.u.ip6;
+
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
@@ -899,23 +1033,26 @@ static int udp_v6_push_pending_frames(struct sock *sk)
* Create a UDP header
*/
uh = udp_hdr(skb);
- uh->source = fl->fl_ip_sport;
- uh->dest = fl->fl_ip_dport;
+ uh->source = fl6->fl6_sport;
+ uh->dest = fl6->fl6_dport;
uh->len = htons(up->len);
uh->check = 0;
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
- else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
- udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst,
+ else if (up->no_check6_tx) { /* UDP csum disabled */
+ skb->ip_summed = CHECKSUM_NONE;
+ goto send;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+ udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
up->len);
goto send;
} else
csum = udp_csum_outgoing(sk, skb);
/* add protocol-dependent pseudo-header */
- uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst,
- up->len, fl->proto, csum );
+ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
+ up->len, fl6->flowi6_proto, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
@@ -943,11 +1080,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
- struct flowi fl;
+ struct flowi6 fl6;
struct dst_entry *dst;
int addr_len = msg->msg_namelen;
int ulen = len;
@@ -984,7 +1121,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
} else if (!up->pending) {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
} else
daddr = NULL;
@@ -1030,22 +1167,21 @@ do_udp_sendmsg:
}
ulen += sizeof(struct udphdr);
- memset(&fl, 0, sizeof(fl));
+ memset(&fl6, 0, sizeof(fl6));
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
- fl.fl_ip_dport = sin6->sin6_port;
+ fl6.fl6_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
if (np->sndflow) {
- fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- daddr = &flowlabel->dst;
}
}
@@ -1054,44 +1190,44 @@ do_udp_sendmsg:
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
- ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
- fl.oif = sin6->sin6_scope_id;
+ __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
+ fl6.flowi6_oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- fl.fl_ip_dport = inet->inet_dport;
- daddr = &np->daddr;
- fl.fl6_flowlabel = np->flow_label;
+ fl6.fl6_dport = inet->inet_dport;
+ daddr = &sk->sk_v6_daddr;
+ fl6.flowlabel = np->flow_label;
connected = 1;
}
- if (!fl.oif)
- fl.oif = sk->sk_bound_dev_if;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
- if (!fl.oif)
- fl.oif = np->sticky_pktinfo.ipi6_ifindex;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl.mark = sk->sk_mark;
+ fl6.flowi6_mark = sk->sk_mark;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
- err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
- &tclass, &dontfrag);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
- if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
@@ -1105,55 +1241,40 @@ do_udp_sendmsg:
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
- fl.proto = sk->sk_protocol;
+ fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
- ipv6_addr_copy(&fl.fl6_dst, daddr);
+ fl6.daddr = *daddr;
else
- fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
- if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.fl_ip_sport = inet->inet_sport;
+ fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+ fl6.saddr = np->saddr;
+ fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl, opt, &final);
+ final_p = fl6_update_dst(&fl6, opt, &final);
if (final_p)
connected = 0;
- if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
- fl.oif = np->mcast_oif;
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
+ fl6.flowi6_oif = np->mcast_oif;
connected = 0;
- }
+ } else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, &fl);
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- err = ip6_sk_dst_lookup(sk, &dst, &fl);
- if (err)
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
goto out;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
- if (err < 0) {
- if (err == -EREMOTE)
- err = ip6_dst_blackhole(sk, &dst, &fl);
- if (err < 0)
- goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl.fl6_dst))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
@@ -1172,10 +1293,12 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
+ if (dontfrag < 0)
+ dontfrag = np->dontfrag;
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt, &fl,
+ sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info*)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
if (err)
@@ -1188,10 +1311,10 @@ do_append_data:
if (dst) {
if (connected) {
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
- &np->daddr : NULL,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
+ ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
#endif
NULL);
@@ -1232,10 +1355,18 @@ do_confirm:
void udpv6_destroy_sock(struct sock *sk)
{
+ struct udp_sock *up = udp_sk(sk);
lock_sock(sk);
udp_v6_flush_pending_frames(sk);
release_sock(sk);
+ if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+ void (*encap_destroy)(struct sock *sk);
+ encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ if (encap_destroy)
+ encap_destroy(sk);
+ }
+
inet6_destroy_sock(sk);
}
@@ -1280,158 +1411,41 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
- struct ipv6hdr *ipv6h;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(*uh)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
-
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
- return 0;
-}
-
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- unsigned int mss;
- unsigned int unfrag_ip6hlen, unfrag_len;
- struct frag_hdr *fptr;
- u8 *mac_start, *prevhdr;
- u8 nexthdr;
- u8 frag_hdr_sz = sizeof(struct frag_hdr);
- int offset;
- __wsum csum;
-
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
- /* Packet is from an untrusted source, reset gso_segs. */
- int type = skb_shinfo(skb)->gso_type;
-
- if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
- !(type & (SKB_GSO_UDP))))
- goto out;
-
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
- segs = NULL;
- goto out;
- }
-
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
- offset = skb->csum_start - skb_headroom(skb);
- csum = skb_checksum(skb, offset, skb->len- offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-
- /* Check if there is enough headroom to insert fragment header. */
- if ((skb_headroom(skb) < frag_hdr_sz) &&
- pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
- goto out;
-
- /* Find the unfragmentable header and shift it left by frag_hdr_sz
- * bytes to insert fragment header.
- */
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- nexthdr = *prevhdr;
- *prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
- unfrag_ip6hlen;
- mac_start = skb_mac_header(skb);
- memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
-
- skb->mac_header -= frag_hdr_sz;
- skb->network_header -= frag_hdr_sz;
-
- fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
- fptr->nexthdr = nexthdr;
- fptr->reserved = 0;
- ipv6_select_ident(fptr);
-
- /* Fragment the skb. ipv6 header and the remaining fields of the
- * fragment header are updated in ipv6_gso_segment()
- */
- segs = skb_segment(skb, features);
-
-out:
- return segs;
-}
-
static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
- .gso_send_check = udp6_ufo_send_check,
- .gso_segment = udp6_ufo_fragment,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
-
-static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
-{
- struct inet_sock *inet = inet_sk(sp);
- struct ipv6_pinfo *np = inet6_sk(sp);
- struct in6_addr *dest, *src;
- __u16 destp, srcp;
-
- dest = &np->daddr;
- src = &np->rcv_saddr;
- destp = ntohs(inet->inet_dport);
- srcp = ntohs(inet->inet_sport);
- seq_printf(seq,
- "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
- bucket,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- sk_wmem_alloc_get(sp),
- sk_rmem_alloc_get(sp),
- 0, 0L, 0,
- sock_i_uid(sp), 0,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
-}
-
int udp6_seq_show(struct seq_file *seq, void *v)
{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq,
- " sl "
- "local_address "
- "remote_address "
- "st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
- else
- udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ int bucket = ((struct udp_iter_state *)seq->private)->bucket;
+ struct inet_sock *inet = inet_sk(v);
+ __u16 srcp = ntohs(inet->inet_sport);
+ __u16 destp = ntohs(inet->inet_dport);
+ ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+ }
return 0;
}
+static const struct file_operations udp6_afinfo_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = udp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net
+};
+
static struct udp_seq_afinfo udp6_seq_afinfo = {
.name = "udp6",
.family = AF_INET6,
.udp_table = &udp_table,
- .seq_fops = {
- .owner = THIS_MODULE,
- },
+ .seq_fops = &udp6_afinfo_seq_fops,
.seq_ops = {
.show = udp6_seq_show,
},
@@ -1447,6 +1461,17 @@ void udp6_proc_exit(struct net *net) {
}
#endif /* CONFIG_PROC_FS */
+void udp_v6_clear_sk(struct sock *sk, int size)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* we do not want to clear pinet6 field, because of RCU lookups */
+ sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+ size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+ memset(&inet->pinet6 + 1, 0, size);
+}
+
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
@@ -1461,7 +1486,7 @@ struct proto udpv6_prot = {
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
- .backlog_rcv = udpv6_queue_rcv_skb,
+ .backlog_rcv = __udpv6_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
@@ -1477,6 +1502,7 @@ struct proto udpv6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
+ .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udpv6_protosw = {
@@ -1484,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index d7571046bfc..c779c3c90b9 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -7,31 +7,32 @@
#include <net/inet_common.h>
#include <net/transp_v6.h>
-extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
-extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
- u8 , u8 , int , __be32 , struct udp_table *);
+int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
+void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+ __be32, struct udp_table *);
-extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
+int udp_v6_get_port(struct sock *sk, unsigned short snum);
-extern int udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-extern int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#ifdef CONFIG_COMPAT
-extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
#endif
-extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len);
-extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len);
-extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern void udpv6_destroy_sock(struct sock *sk);
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len);
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len);
+int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+void udpv6_destroy_sock(struct sock *sk);
+
+void udp_v6_clear_sk(struct sock *sk, int size);
#ifdef CONFIG_PROC_FS
-extern int udp6_seq_show(struct seq_file *seq, void *v);
+int udp6_seq_show(struct seq_file *seq, void *v);
#endif
#endif /* _UDP6_IMPL_H */
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
new file mode 100644
index 00000000000..0ae3d98f83e
--- /dev/null
+++ b/net/ipv6/udp_offload.c
@@ -0,0 +1,140 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * UDPv6 GSO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int udp6_ufo_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(*uh)))
+ return -EINVAL;
+
+ if (likely(!skb->encapsulation)) {
+ ipv6h = ipv6_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ }
+
+ return 0;
+}
+
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ unsigned int unfrag_ip6hlen, unfrag_len;
+ struct frag_hdr *fptr;
+ u8 *packet_start, *prevhdr;
+ u8 nexthdr;
+ u8 frag_hdr_sz = sizeof(struct frag_hdr);
+ int offset;
+ __wsum csum;
+ int tnl_hlen;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+
+ if (unlikely(type & ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
+ SKB_GSO_MPLS) ||
+ !(type & (SKB_GSO_UDP))))
+ goto out;
+
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+ segs = NULL;
+ goto out;
+ }
+
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
+ segs = skb_udp_tunnel_segment(skb, features);
+ else {
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ tnl_hlen = skb_tnl_header_len(skb);
+ if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
+ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+ goto out;
+ }
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+ unfrag_ip6hlen + tnl_hlen;
+ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+
+ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+ }
+
+out:
+ return segs;
+}
+static const struct net_offload udpv6_offload = {
+ .callbacks = {
+ .gso_send_check = udp6_ufo_send_check,
+ .gso_segment = udp6_ufo_fragment,
+ },
+};
+
+int __init udp_offload_init(void)
+{
+ return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5f48fadc27f..9cf097e206e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -11,6 +11,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include "udp_impl.h"
static int udplitev6_rcv(struct sk_buff *skb)
@@ -55,6 +56,7 @@ struct proto udplitev6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
+ .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udplite6_protosw = {
@@ -62,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};
@@ -92,13 +93,20 @@ void udplitev6_exit(void)
}
#ifdef CONFIG_PROC_FS
+
+static const struct file_operations udplite6_afinfo_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = udp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net
+};
+
static struct udp_seq_afinfo udplite6_seq_afinfo = {
.name = "udplite6",
.family = AF_INET6,
.udp_table = &udplite_table,
- .seq_fops = {
- .owner = THIS_MODULE,
- },
+ .seq_fops = &udplite6_afinfo_seq_fops,
.seq_ops = {
.show = udp6_seq_show,
},
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index bbd48b101ba..9949a356d62 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -41,10 +41,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *top_iph;
struct ip_beet_phdr *ph;
- struct iphdr *iphv4;
int optlen, hdr_len;
- iphv4 = ip_hdr(skb);
hdr_len = 0;
optlen = XFRM_MODE_SKB_CB(skb)->optlen;
if (unlikely(optlen))
@@ -74,15 +72,14 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->nexthdr = IPPROTO_BEETPH;
}
- ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
- ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
}
static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *ip6h;
- const unsigned char *old_mac;
int size = sizeof(struct ipv6hdr);
int err;
@@ -92,17 +89,14 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
__skb_push(skb, size);
skb_reset_network_header(skb);
-
- old_mac = skb_mac_header(skb);
- skb_set_mac_header(skb, -skb->mac_len);
- memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ skb_mac_header_rebuild(skb);
xfrm6_beet_make_header(skb);
ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(skb->len - size);
- ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
- ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
+ ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
+ ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
err = 0;
out:
return err;
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 63d5d493098..0e015906f9c 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -15,8 +15,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* Authors:
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index b809812c8d3..901ef6f8add 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -14,12 +14,13 @@
#include <net/dsfield.h>
#include <net/dst.h>
#include <net/inet_ecn.h>
+#include <net/ip6_route.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
- struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
@@ -48,29 +49,37 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
sizeof(top_iph->flow_lbl));
top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
- dsfield = XFRM_MODE_SKB_CB(skb)->tos;
- dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ dsfield = 0;
+ else
+ dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+ dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
- ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
- ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+ top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
}
+#define for_each_input_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next))
+
+
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
- const unsigned char *old_mac;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
goto out;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- if (skb_cloned(skb) &&
- (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err)
goto out;
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
@@ -79,10 +88,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
- old_mac = skb_mac_header(skb);
- skb_set_mac_header(skb, -skb->mac_len);
- memmove(skb_mac_header(skb), old_mac, skb->mac_len);
skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
err = 0;
out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6434bd5ce08..433672d07d0 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/dst.h>
#include <net/ipv6.h>
+#include <net/ip6_route.h>
#include <net/xfrm.h>
int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -27,6 +28,47 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+static int xfrm6_local_dontfrag(struct sk_buff *skb)
+{
+ int proto;
+ struct sock *sk = skb->sk;
+
+ if (sk) {
+ if (sk->sk_family != AF_INET6)
+ return 0;
+
+ proto = sk->sk_protocol;
+ if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
+ return inet6_sk(sk)->dontfrag;
+ }
+
+ return 0;
+}
+
+static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct sock *sk = skb->sk;
+
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.daddr = ipv6_hdr(skb)->daddr;
+
+ ipv6_local_rxpmtu(sk, &fl6, mtu);
+}
+
+void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
+{
+ struct flowi6 fl6;
+ const struct ipv6hdr *hdr;
+ struct sock *sk = skb->sk;
+
+ hdr = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+ fl6.fl6_dport = inet_sk(sk)->inet_dport;
+ fl6.daddr = hdr->daddr;
+
+ ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
+}
+
static int xfrm6_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
@@ -36,9 +78,15 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (!skb->local_df && skb->len > mtu) {
+ if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+
+ if (xfrm6_local_dontfrag(skb))
+ xfrm6_local_rxpmtu(skb, mtu);
+ else if (skb->sk)
+ xfrm_local_error(skb, mtu);
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ret = -EMSGSIZE;
}
@@ -66,30 +114,61 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
- IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->local_df = 1;
+ skb->ignore_df = 1;
return x->outer_mode->output2(x, skb);
}
EXPORT_SYMBOL(xfrm6_prepare_output);
-static int xfrm6_output_finish(struct sk_buff *skb)
+int xfrm6_output_finish(struct sk_buff *skb)
{
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ skb->protocol = htons(ETH_P_IPV6);
+
#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IPV6);
return xfrm_output(skb);
}
-int xfrm6_output(struct sk_buff *skb)
+static int __xfrm6_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+ int mtu;
+
+#ifdef CONFIG_NETFILTER
+ if (!x) {
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ mtu = ip6_skb_dst_mtu(skb);
+ else
+ mtu = dst_mtu(skb_dst(skb));
+
+ if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+ xfrm6_local_rxpmtu(skb, mtu);
+ return -EMSGSIZE;
+ } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+ xfrm_local_error(skb, mtu);
+ return -EMSGSIZE;
+ }
+
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ ((skb->len > mtu && !skb_is_gso(skb)) ||
+ dst_allfrag(skb_dst(skb)))) {
+ return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
+ }
+ return x->outer_mode->afinfo->output_finish(skb);
+}
+
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
- skb_dst(skb)->dev, xfrm6_output_finish);
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+ NULL, skb_dst(skb)->dev, __xfrm6_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7e74023ea6e..2a0bbda2c76 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,25 +20,26 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
- xfrm_address_t *saddr,
- xfrm_address_t *daddr)
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr)
{
- struct flowi fl = {};
+ struct flowi6 fl6;
struct dst_entry *dst;
int err;
- memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst));
+ memset(&fl6, 0, sizeof(fl6));
+ memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
- memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
+ memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
- dst = ip6_route_output(net, NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl6);
err = dst->error;
if (dst->error) {
@@ -67,11 +68,18 @@ static int xfrm6_get_saddr(struct net *net,
return 0;
}
-static int xfrm6_get_tos(struct flowi *fl)
+static int xfrm6_get_tos(const struct flowi *fl)
{
return 0;
}
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
+{
+ struct rt6_info *rt = (struct rt6_info *)xdst;
+
+ rt6_init_peer(rt, net->ipv6.peers);
+}
+
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@@ -87,7 +95,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
}
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
- struct flowi *fl)
+ const struct flowi *fl)
{
struct rt6_info *rt = (struct rt6_info*)xdst->route;
@@ -95,8 +103,12 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
dev_hold(dev);
xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
- if (!xdst->u.rt6.rt6i_idev)
+ if (!xdst->u.rt6.rt6i_idev) {
+ dev_put(dev);
return -ENODEV;
+ }
+
+ rt6_transfer_peer(&xdst->u.rt6, rt);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
@@ -116,18 +128,24 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
static inline void
_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
{
+ struct flowi6 *fl6 = &fl->u.ip6;
int onlyproto = 0;
u16 offset = skb_network_header_len(skb);
- struct ipv6hdr *hdr = ipv6_hdr(skb);
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u8 nexthdr = nh[IP6CB(skb)->nhoff];
+ int oif = 0;
+
+ if (skb_dst(skb))
+ oif = skb_dst(skb)->dev->ifindex;
- memset(fl, 0, sizeof(struct flowi));
- fl->mark = skb->mark;
+ memset(fl6, 0, sizeof(struct flowi6));
+ fl6->flowi6_mark = skb->mark;
+ fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
- ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
- ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
+ fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+ fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
while (nh + offset + 1 < skb->data ||
pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
@@ -154,31 +172,31 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
__be16 *ports = (__be16 *)exthdr;
- fl->fl_ip_sport = ports[!!reverse];
- fl->fl_ip_dport = ports[!reverse];
+ fl6->fl6_sport = ports[!!reverse];
+ fl6->fl6_dport = ports[!reverse];
}
- fl->proto = nexthdr;
+ fl6->flowi6_proto = nexthdr;
return;
case IPPROTO_ICMPV6:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
u8 *icmp = (u8 *)exthdr;
- fl->fl_icmp_type = icmp[0];
- fl->fl_icmp_code = icmp[1];
+ fl6->fl6_icmp_type = icmp[0];
+ fl6->fl6_icmp_code = icmp[1];
}
- fl->proto = nexthdr;
+ fl6->flowi6_proto = nexthdr;
return;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
struct ip6_mh *mh;
mh = (struct ip6_mh *)exthdr;
- fl->fl_mh_type = mh->ip6mh_type;
+ fl6->fl6_mh_type = mh->ip6mh_type;
}
- fl->proto = nexthdr;
+ fl6->flowi6_proto = nexthdr;
return;
#endif
@@ -187,8 +205,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
case IPPROTO_ESP:
case IPPROTO_COMP:
default:
- fl->fl_ipsec_spi = 0;
- fl->proto = nexthdr;
+ fl6->fl6_ipsec_spi = 0;
+ fl6->flowi6_proto = nexthdr;
return;
}
}
@@ -202,12 +220,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
-static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+ path->ops->update_pmtu(path, sk, skb, mtu);
+}
+
+static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, mtu);
+ path->ops->redirect(path, sk, skb);
}
static void xfrm6_dst_destroy(struct dst_entry *dst)
@@ -216,6 +244,11 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
+ dst_destroy_metrics_generic(dst);
+ if (rt6_has_peer(&xdst->u.rt6)) {
+ struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
+ inet_putpeer(peer);
+ }
xfrm_dst_destroy(xdst);
}
@@ -251,10 +284,12 @@ static struct dst_ops xfrm6_dst_ops = {
.protocol = cpu_to_be16(ETH_P_IPV6),
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
+ .redirect = xfrm6_redirect,
+ .cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
- .gc_thresh = 1024,
+ .gc_thresh = 32768,
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -264,8 +299,10 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
+ .init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
+ .blackhole_route = ip6_blackhole_route,
};
static int __init xfrm6_policy_init(void)
@@ -290,27 +327,57 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static struct ctl_table_header *sysctl_hdr;
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = xfrm6_policy_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+
+ table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh;
+ }
+
+ hdr = register_net_sysctl(net, "net/ipv6", table);
+ if (!hdr)
+ goto err_reg;
+
+ net->ipv6.sysctl.xfrm6_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ struct ctl_table *table;
+
+ if (net->ipv6.sysctl.xfrm6_hdr == NULL)
+ return;
+
+ table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct pernet_operations xfrm6_net_ops = {
+ .init = xfrm6_net_init,
+ .exit = xfrm6_net_exit,
+};
#endif
int __init xfrm6_init(void)
{
int ret;
- unsigned int gc_thresh;
-
- /*
- * We need a good default value for the xfrm6 gc threshold.
- * In ipv4 we set it to the route hash table size * 8, which
- * is half the size of the maximaum route cache for ipv4. It
- * would be good to do the same thing for v6, except the table is
- * constructed differently here. Here each table for a net namespace
- * can have FIB_TABLE_HASHSZ entries, so lets go with the same
- * computation that we used for ipv4 here. Also, lets keep the initial
- * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
- * to that as a minimum as well
- */
- gc_thresh = FIB6_TABLE_HASHSZ * 8;
- xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
dst_entries_init(&xfrm6_dst_ops);
ret = xfrm6_policy_init();
@@ -322,12 +389,17 @@ int __init xfrm6_init(void)
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
- sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
- xfrm6_policy_table);
+ register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
@@ -336,10 +408,9 @@ out_policy:
void xfrm6_fini(void)
{
#ifdef CONFIG_SYSCTL
- if (sysctl_hdr)
- unregister_net_sysctl_table(sysctl_hdr);
+ unregister_pernet_subsys(&xfrm6_net_ops);
#endif
- //xfrm6_input_fini();
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 00000000000..54d13f8dbba
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,279 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+ struct xfrm6_protocol __rcu **head = proto_handlers(protocol);
+
+ if (!head)
+ return 0;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a67575d472a..3fc970135fc 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -15,31 +15,34 @@
#include <linux/pfkeyv2.h>
#include <linux/ipsec.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
#include <net/dsfield.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
static void
-__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
{
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
/* Initialize temporary selector matching only
* to current session. */
- ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst);
- ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src);
- sel->dport = xfrm_flowi_dport(fl);
+ *(struct in6_addr *)&sel->daddr = fl6->daddr;
+ *(struct in6_addr *)&sel->saddr = fl6->saddr;
+ sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
sel->dport_mask = htons(0xffff);
- sel->sport = xfrm_flowi_sport(fl);
+ sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
sel->sport_mask = htons(0xffff);
sel->family = AF_INET6;
sel->prefixlen_d = 128;
sel->prefixlen_s = 128;
- sel->proto = fl->proto;
- sel->ifindex = fl->oif;
+ sel->proto = fl6->flowi6_proto;
+ sel->ifindex = fl6->flowi6_oif;
}
static void
-xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
- xfrm_address_t *daddr, xfrm_address_t *saddr)
+xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
+ const xfrm_address_t *daddr, const xfrm_address_t *saddr)
{
x->id = tmpl->id;
if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
@@ -98,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p)
return 1;
else
return 3;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_IN_TRIGGER:
return 2;
@@ -131,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p)
switch (v->mode) {
case XFRM_MODE_TRANSPORT:
return 1;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_IN_TRIGGER:
return 2;
@@ -176,9 +179,11 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
.tmpl_sort = __xfrm6_tmpl_sort,
.state_sort = __xfrm6_state_sort,
.output = xfrm6_output,
+ .output_finish = xfrm6_output_finish,
.extract_input = xfrm6_extract_input,
.extract_output = xfrm6_extract_output,
.transport_finish = xfrm6_transport_finish,
+ .local_error = xfrm6_local_error,
};
int __init xfrm6_state_init(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2969cad408d..1c66465a42d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors Mitsuru KANDA <mk@linux-ipv6.org>
* YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
@@ -68,11 +67,11 @@ static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
-static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
+static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr)
{
- unsigned h;
+ unsigned int h;
- h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
+ h = ipv6_addr_hash((const struct in6_addr *)addr);
h ^= h >> 16;
h ^= h >> 8;
h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
@@ -80,28 +79,27 @@ static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
return h;
}
-static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
+static inline unsigned int xfrm6_tunnel_spi_hash_byspi(u32 spi)
{
return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
}
-static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
+static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
{
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
struct xfrm6_tunnel_spi *x6spi;
- struct hlist_node *pos;
- hlist_for_each_entry_rcu(x6spi, pos,
+ hlist_for_each_entry_rcu(x6spi,
&xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
list_byaddr) {
- if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
+ if (xfrm6_addr_equal(&x6spi->addr, saddr))
return x6spi;
}
return NULL;
}
-__be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
{
struct xfrm6_tunnel_spi *x6spi;
u32 spi;
@@ -120,9 +118,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
struct xfrm6_tunnel_spi *x6spi;
int index = xfrm6_tunnel_spi_hash_byspi(spi);
- struct hlist_node *pos;
- hlist_for_each_entry(x6spi, pos,
+ hlist_for_each_entry(x6spi,
&xfrm6_tn->spi_byspi[index],
list_byspi) {
if (x6spi->spi == spi)
@@ -203,15 +200,15 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
{
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
struct xfrm6_tunnel_spi *x6spi;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
spin_lock_bh(&xfrm6_tunnel_spi_lock);
- hlist_for_each_entry_safe(x6spi, pos, n,
+ hlist_for_each_entry_safe(x6spi, n,
&xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
list_byaddr)
{
- if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+ if (xfrm6_addr_equal(&x6spi->addr, saddr)) {
if (atomic_dec_and_test(&x6spi->refcnt)) {
hlist_del_rcu(&x6spi->list_byaddr);
hlist_del_rcu(&x6spi->list_byspi);
@@ -237,11 +234,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_tunnel_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
__be32 spi;
- spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&iph->saddr);
- return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0;
+ spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
+ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,