aboutsummaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig30
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c977
-rw-r--r--net/ipv6/addrconf_core.c2
-rw-r--r--net/ipv6/addrlabel.c59
-rw-r--r--net/ipv6/af_inet6.c126
-rw-r--r--net/ipv6/ah6.c86
-rw-r--r--net/ipv6/anycast.c11
-rw-r--r--net/ipv6/datagram.c94
-rw-r--r--net/ipv6/esp6.c80
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/exthdrs_offload.c4
-rw-r--r--net/ipv6/fib6_rules.c8
-rw-r--r--net/ipv6/icmp.c63
-rw-r--r--net/ipv6/inet6_connection_sock.c42
-rw-r--r--net/ipv6/inet6_hashtables.c122
-rw-r--r--net/ipv6/ip6_checksum.c61
-rw-r--r--net/ipv6/ip6_fib.c347
-rw-r--r--net/ipv6/ip6_flowlabel.c110
-rw-r--r--net/ipv6/ip6_gre.c103
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_offload.c109
-rw-r--r--net/ipv6/ip6_output.c248
-rw-r--r--net/ipv6/ip6_tunnel.c84
-rw-r--r--net/ipv6/ip6_vti.c1160
-rw-r--r--net/ipv6/ip6mr.c24
-rw-r--r--net/ipv6/ipcomp6.c28
-rw-r--r--net/ipv6/ipv6_sockglue.c52
-rw-r--r--net/ipv6/mcast.c97
-rw-r--r--net/ipv6/mip6.c3
-rw-r--r--net/ipv6/ndisc.c67
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/Kconfig30
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c11
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c180
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c14
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c1
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c5
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c10
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c41
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c5
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c66
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c37
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c11
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c128
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c205
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c88
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c76
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/ping.c34
-rw-r--r--net/ipv6/proc.c14
-rw-r--r--net/ipv6/protocol.c4
-rw-r--r--net/ipv6/raw.c53
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c318
-rw-r--r--net/ipv6/sit.c237
-rw-r--r--net/ipv6/syncookies.c81
-rw-r--r--net/ipv6/sysctl_net_ipv6.c23
-rw-r--r--net/ipv6/tcp_ipv6.c305
-rw-r--r--net/ipv6/tcpv6_offload.c42
-rw-r--r--net/ipv6/tunnel6.c3
-rw-r--r--net/ipv6/udp.c198
-rw-r--r--net/ipv6/udp_impl.h41
-rw-r--r--net/ipv6/udp_offload.c11
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_mode_ro.c3
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c6
-rw-r--r--net/ipv6/xfrm6_output.c30
-rw-r--r--net/ipv6/xfrm6_policy.c14
-rw-r--r--net/ipv6/xfrm6_protocol.c279
-rw-r--r--net/ipv6/xfrm6_tunnel.c3
74 files changed, 4770 insertions, 2106 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11b13ea69db..438a73aa777 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -21,24 +21,6 @@ menuconfig IPV6
if IPV6
-config IPV6_PRIVACY
- bool "IPv6: Privacy Extensions (RFC 3041) support"
- ---help---
- Privacy Extensions for Stateless Address Autoconfiguration in IPv6
- support. With this option, additional periodically-altered
- pseudo-random global-scope unicast address(es) will be assigned to
- your interface(s).
-
- We use our standard pseudo-random algorithm to generate the
- randomized interface identifier, instead of one described in RFC 3041.
-
- By default the kernel does not generate temporary addresses.
- To use temporary addresses, do
-
- echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
-
- See <file:Documentation/networking/ip-sysctl.txt> for details.
-
config IPV6_ROUTER_PREF
bool "IPv6: Router Preference (RFC 4191) support"
---help---
@@ -153,6 +135,18 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
---help---
Support for MIPv6 route optimization mode.
+config IPV6_VTI
+tristate "Virtual (secure) IPv6: tunneling"
+ select IPV6_TUNNEL
+ select NET_IP_TUNNEL
+ depends on INET6_XFRM_MODE_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This can be used with xfrm mode tunnel to give
+ the notion of a secure tunnel for IPSEC and then use routing protocol
+ on top.
+
config IPV6_SIT
tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
select INET_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 470a9c008e9..2fe68364bb2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
@@ -36,6 +36,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
obj-$(CONFIG_NETFILTER) += netfilter/
+obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6ff12617f3..5667b3003af 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -83,11 +83,7 @@
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
-
-#ifdef CONFIG_IPV6_PRIVACY
#include <linux/random.h>
-#endif
-
#include <linux/uaccess.h>
#include <asm/unaligned.h>
@@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
}
#endif
-#ifdef CONFIG_IPV6_PRIVACY
static void __ipv6_regen_rndid(struct inet6_dev *idev);
static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static void ipv6_regen_rndid(unsigned long data);
-#endif
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -139,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -157,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
u32 flags, u32 noflags);
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
@@ -183,13 +179,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
-#endif
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_pinfo = 1,
@@ -221,13 +215,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
-#endif
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_pinfo = 1,
@@ -257,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
__in6_dev_put(idev);
}
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
{
- if (del_timer(&ifp->dad_timer))
+ if (cancel_delayed_work(&ifp->dad_work))
__in6_ifa_put(ifp);
}
@@ -271,20 +263,29 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
mod_timer(&idev->rs_timer, jiffies + when);
}
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
- unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+ unsigned long delay)
{
- if (!timer_pending(&ifp->dad_timer))
+ if (!delayed_work_pending(&ifp->dad_work))
in6_ifa_hold(ifp);
- mod_timer(&ifp->dad_timer, jiffies + when);
+ mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
{
- if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ int i;
+
+ idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+ if (!idev->stats.ipv6)
goto err_ip;
+
+ for_each_possible_cpu(i) {
+ struct ipstats_mib *addrconf_stats;
+ addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
+ u64_stats_init(&addrconf_stats->syncp);
+ }
+
+
idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
GFP_KERNEL);
if (!idev->stats.icmpv6dev)
@@ -299,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
err_icmpmsg:
kfree(idev->stats.icmpv6dev);
err_icmp:
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
err_ip:
return -ENOMEM;
}
@@ -371,7 +372,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
#endif
-#ifdef CONFIG_IPV6_PRIVACY
INIT_LIST_HEAD(&ndev->tempaddr_list);
setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
if ((dev->flags&IFF_LOOPBACK) ||
@@ -384,7 +384,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
in6_dev_hold(ndev);
ipv6_regen_rndid((unsigned long) ndev);
}
-#endif
+
ndev->token = in6addr_any;
if (netif_running(dev) && addrconf_qdisc_ok(dev))
@@ -439,6 +439,8 @@ static int inet6_netconf_msgsize_devconf(int type)
if (type == -1 || type == NETCONFA_MC_FORWARDING)
size += nla_total_size(4);
#endif
+ if (type == -1 || type == NETCONFA_PROXY_NEIGH)
+ size += nla_total_size(4);
return size;
}
@@ -472,6 +474,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
devconf->mc_forwarding) < 0)
goto nla_put_failure;
#endif
+ if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
+ nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
+ goto nla_put_failure;
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -506,6 +512,7 @@ errout:
static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
[NETCONFA_IFINDEX] = { .len = sizeof(int) },
[NETCONFA_FORWARDING] = { .len = sizeof(int) },
+ [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
};
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
@@ -741,8 +748,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
in6_dev_put(ifp->idev);
- if (del_timer(&ifp->dad_timer))
- pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+ if (cancel_delayed_work(&ifp->dad_work))
+ pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+ ifp);
if (ifp->state != INET6_IFADDR_STATE_DEAD) {
pr_warn("Freeing alive inet6 address %p\n", ifp);
@@ -831,14 +839,15 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
+ neigh_parms_data_state_setall(idev->nd_parms);
+
ifa->addr = *addr;
if (peer_addr)
ifa->peer_addr = *peer_addr;
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- setup_timer(&ifa->dad_timer, addrconf_dad_timer,
- (unsigned long)ifa);
+ INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
INIT_HLIST_NODE(&ifa->addr_lst);
ifa->scope = scope;
ifa->prefix_len = pfxlen;
@@ -865,12 +874,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
-#ifdef CONFIG_IPV6_PRIVACY
if (ifa->flags&IFA_F_TEMPORARY) {
list_add(&ifa->tmp_list, &idev->tempaddr_list);
in6_ifa_hold(ifa);
}
-#endif
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
@@ -890,15 +897,97 @@ out:
goto out2;
}
+enum cleanup_prefix_rt_t {
+ CLEANUP_PREFIX_RT_NOP, /* no cleanup action for prefix route */
+ CLEANUP_PREFIX_RT_DEL, /* delete the prefix route */
+ CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */
+};
+
+/*
+ * Check, whether the prefix for ifp would still need a prefix route
+ * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_*
+ * constants.
+ *
+ * 1) we don't purge prefix if address was not permanent.
+ * prefix is managed by its own lifetime.
+ * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE.
+ * 3) if there are no addresses, delete prefix.
+ * 4) if there are still other permanent address(es),
+ * corresponding prefix is still permanent.
+ * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE,
+ * don't purge the prefix, assume user space is managing it.
+ * 6) otherwise, update prefix lifetime to the
+ * longest valid lifetime among the corresponding
+ * addresses on the device.
+ * Note: subsequent RA will update lifetime.
+ **/
+static enum cleanup_prefix_rt_t
+check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
+{
+ struct inet6_ifaddr *ifa;
+ struct inet6_dev *idev = ifp->idev;
+ unsigned long lifetime;
+ enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL;
+
+ *expires = jiffies;
+
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (ifa == ifp)
+ continue;
+ if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ ifp->prefix_len))
+ continue;
+ if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
+ return CLEANUP_PREFIX_RT_NOP;
+
+ action = CLEANUP_PREFIX_RT_EXPIRE;
+
+ spin_lock(&ifa->lock);
+
+ lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+ /*
+ * Note: Because this address is
+ * not permanent, lifetime <
+ * LONG_MAX / HZ here.
+ */
+ if (time_before(*expires, ifa->tstamp + lifetime * HZ))
+ *expires = ifa->tstamp + lifetime * HZ;
+ spin_unlock(&ifa->lock);
+ }
+
+ return action;
+}
+
+static void
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+{
+ struct rt6_info *rt;
+
+ rt = addrconf_get_prefix_route(&ifp->addr,
+ ifp->prefix_len,
+ ifp->idev->dev,
+ 0, RTF_GATEWAY | RTF_DEFAULT);
+ if (rt) {
+ if (del_rt)
+ ip6_del_rt(rt);
+ else {
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ rt6_set_expires(rt, expires);
+ ip6_rt_put(rt);
+ }
+ }
+}
+
+
/* This function wants to get referenced ifp and releases it before return */
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
- struct inet6_ifaddr *ifa, *ifn;
- struct inet6_dev *idev = ifp->idev;
int state;
- int deleted = 0, onlink = 0;
- unsigned long expires = jiffies;
+ enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+ unsigned long expires;
+
+ ASSERT_RTNL();
spin_lock_bh(&ifp->state_lock);
state = ifp->state;
@@ -912,8 +1001,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
hlist_del_init_rcu(&ifp->addr_lst);
spin_unlock_bh(&addrconf_hash_lock);
- write_lock_bh(&idev->lock);
-#ifdef CONFIG_IPV6_PRIVACY
+ write_lock_bh(&ifp->idev->lock);
+
if (ifp->flags&IFA_F_TEMPORARY) {
list_del(&ifp->tmp_list);
if (ifp->ifpub) {
@@ -922,89 +1011,24 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
__in6_ifa_put(ifp);
}
-#endif
- list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
- if (ifa == ifp) {
- list_del_init(&ifp->if_list);
- __in6_ifa_put(ifp);
+ if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
+ action = check_cleanup_prefix_route(ifp, &expires);
- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
- break;
- deleted = 1;
- continue;
- } else if (ifp->flags & IFA_F_PERMANENT) {
- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
- ifp->prefix_len)) {
- if (ifa->flags & IFA_F_PERMANENT) {
- onlink = 1;
- if (deleted)
- break;
- } else {
- unsigned long lifetime;
-
- if (!onlink)
- onlink = -1;
-
- spin_lock(&ifa->lock);
-
- lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
- /*
- * Note: Because this address is
- * not permanent, lifetime <
- * LONG_MAX / HZ here.
- */
- if (time_before(expires,
- ifa->tstamp + lifetime * HZ))
- expires = ifa->tstamp + lifetime * HZ;
- spin_unlock(&ifa->lock);
- }
- }
- }
- }
- write_unlock_bh(&idev->lock);
+ list_del_init(&ifp->if_list);
+ __in6_ifa_put(ifp);
+
+ write_unlock_bh(&ifp->idev->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ipv6_ifa_notify(RTM_DELADDR, ifp);
inet6addr_notifier_call_chain(NETDEV_DOWN, ifp);
- /*
- * Purge or update corresponding prefix
- *
- * 1) we don't purge prefix here if address was not permanent.
- * prefix is managed by its own lifetime.
- * 2) if there're no addresses, delete prefix.
- * 3) if there're still other permanent address(es),
- * corresponding prefix is still permanent.
- * 4) otherwise, update prefix lifetime to the
- * longest valid lifetime among the corresponding
- * addresses on the device.
- * Note: subsequent RA will update lifetime.
- *
- * --yoshfuji
- */
- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
- struct in6_addr prefix;
- struct rt6_info *rt;
-
- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
-
- rt = addrconf_get_prefix_route(&prefix,
- ifp->prefix_len,
- ifp->idev->dev,
- 0, RTF_GATEWAY | RTF_DEFAULT);
-
- if (rt) {
- if (onlink == 0) {
- ip6_del_rt(rt);
- rt = NULL;
- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
- rt6_set_expires(rt, expires);
- }
- }
- ip6_rt_put(rt);
+ if (action != CLEANUP_PREFIX_RT_NOP) {
+ cleanup_prefix_route(ifp, expires,
+ action == CLEANUP_PREFIX_RT_DEL);
}
/* clean up prefsrc entries */
@@ -1013,7 +1037,6 @@ out:
in6_ifa_put(ifp);
}
-#ifdef CONFIG_IPV6_PRIVACY
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
{
struct inet6_dev *idev = ifp->idev;
@@ -1025,7 +1048,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
u32 addr_flags;
unsigned long now = jiffies;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (ift) {
spin_lock_bh(&ift->lock);
memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
@@ -1037,7 +1060,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
retry:
in6_dev_hold(idev);
if (idev->cnf.use_tempaddr <= 0) {
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
pr_info("%s: use_tempaddr is disabled\n", __func__);
in6_dev_put(idev);
ret = -1;
@@ -1047,7 +1070,7 @@ retry:
if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
idev->cnf.use_tempaddr = -1; /*XXX*/
spin_unlock_bh(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
__func__);
in6_dev_put(idev);
@@ -1072,15 +1095,18 @@ retry:
regen_advance = idev->cnf.regen_max_retry *
idev->cnf.dad_transmits *
- idev->nd_parms->retrans_time / HZ;
- write_unlock(&idev->lock);
+ NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
+ write_unlock_bh(&idev->lock);
/* A temporary address is created only if this calculated Preferred
* Lifetime is greater than REGEN_ADVANCE time units. In particular,
* an implementation must not create a temporary address with a zero
* Preferred Lifetime.
+ * Use age calculation as in addrconf_verify to avoid unnecessary
+ * temporary addresses being generated.
*/
- if (tmp_prefered_lft <= regen_advance) {
+ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+ if (tmp_prefered_lft <= regen_advance + age) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
@@ -1100,7 +1126,7 @@ retry:
in6_dev_put(idev);
pr_info("%s: retry temporary address regeneration\n", __func__);
tmpaddr = &addr;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
goto retry;
}
@@ -1116,7 +1142,6 @@ retry:
out:
return ret;
}
-#endif
/*
* Choose an appropriate source address (RFC3484)
@@ -1131,9 +1156,7 @@ enum {
#endif
IPV6_SADDR_RULE_OIF,
IPV6_SADDR_RULE_LABEL,
-#ifdef CONFIG_IPV6_PRIVACY
IPV6_SADDR_RULE_PRIVACY,
-#endif
IPV6_SADDR_RULE_ORCHID,
IPV6_SADDR_RULE_PREFIX,
IPV6_SADDR_RULE_MAX
@@ -1204,7 +1227,7 @@ static int ipv6_get_saddr_eval(struct net *net,
* | d is scope of the destination.
* B-d | \
* | \ <- smaller scope is better if
- * B-15 | \ if scope is enough for destinaion.
+ * B-15 | \ if scope is enough for destination.
* | ret = B - scope (-1 <= scope >= d <= 15).
* d-C-1 | /
* |/ <- greater is better
@@ -1247,7 +1270,6 @@ static int ipv6_get_saddr_eval(struct net *net,
&score->ifa->addr, score->addr_type,
score->ifa->idev->dev->ifindex) == dst->label;
break;
-#ifdef CONFIG_IPV6_PRIVACY
case IPV6_SADDR_RULE_PRIVACY:
{
/* Rule 7: Prefer public address
@@ -1259,7 +1281,6 @@ static int ipv6_get_saddr_eval(struct net *net,
ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
break;
}
-#endif
case IPV6_SADDR_RULE_ORCHID:
/* Rule 8-: Prefer ORCHID vs ORCHID or
* non-ORCHID vs non-ORCHID
@@ -1413,12 +1434,14 @@ try_nextdev:
EXPORT_SYMBOL(ipv6_dev_get_saddr);
int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- unsigned char banned_flags)
+ u32 banned_flags)
{
struct inet6_ifaddr *ifp;
int err = -EADDRNOTAVAIL;
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope > IFA_LINK)
+ break;
if (ifp->scope == IFA_LINK &&
!(ifp->flags & banned_flags)) {
*addr = ifp->addr;
@@ -1430,7 +1453,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
}
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
- unsigned char banned_flags)
+ u32 banned_flags)
{
struct inet6_dev *idev;
int err = -EADDRNOTAVAIL;
@@ -1499,6 +1522,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
@@ -1553,7 +1603,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -1561,7 +1611,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (dad_failed)
ipv6_ifa_notify(0, ifp);
in6_ifa_put(ifp);
-#ifdef CONFIG_IPV6_PRIVACY
} else if (ifp->flags&IFA_F_TEMPORARY) {
struct inet6_ifaddr *ifpub;
spin_lock_bh(&ifp->lock);
@@ -1575,21 +1624,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
-#endif
- } else
+ } else {
ipv6_del_addr(ifp);
+ }
}
static int addrconf_dad_end(struct inet6_ifaddr *ifp)
{
int err = -ENOENT;
- spin_lock(&ifp->state_lock);
+ spin_lock_bh(&ifp->state_lock);
if (ifp->state == INET6_IFADDR_STATE_DAD) {
ifp->state = INET6_IFADDR_STATE_POSTDAD;
err = 0;
}
- spin_unlock(&ifp->state_lock);
+ spin_unlock_bh(&ifp->state_lock);
return err;
}
@@ -1622,7 +1671,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
}
}
- addrconf_dad_stop(ifp, 1);
+ spin_lock_bh(&ifp->state_lock);
+ /* transition from _POSTDAD to _ERRDAD */
+ ifp->state = INET6_IFADDR_STATE_ERRDAD;
+ spin_unlock_bh(&ifp->state_lock);
+
+ addrconf_mod_dad_work(ifp, 0);
}
/* Join to solicited addr multicast group. */
@@ -1631,6 +1685,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
+ ASSERT_RTNL();
+
if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1642,6 +1698,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
+ ASSERT_RTNL();
+
if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1652,7 +1710,10 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
- if (ifp->prefix_len == 127) /* RFC 6164 */
+
+ ASSERT_RTNL();
+
+ if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
@@ -1663,7 +1724,10 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
- if (ifp->prefix_len == 127) /* RFC 6164 */
+
+ ASSERT_RTNL();
+
+ if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
@@ -1797,6 +1861,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_sit(eui, dev);
case ARPHRD_IPGRE:
return addrconf_ifid_gre(eui, dev);
+ case ARPHRD_6LOWPAN:
case ARPHRD_IEEE802154:
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
@@ -1813,7 +1878,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope > IFA_LINK)
+ break;
if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
memcpy(eui, ifp->addr.s6_addr+8, 8);
err = 0;
@@ -1824,7 +1891,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
return err;
}
-#ifdef CONFIG_IPV6_PRIVACY
/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
static void __ipv6_regen_rndid(struct inet6_dev *idev)
{
@@ -1870,7 +1936,8 @@ static void ipv6_regen_rndid(unsigned long data)
expires = jiffies +
idev->cnf.temp_prefered_lft * HZ -
- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
+ idev->cnf.regen_max_retry * idev->cnf.dad_transmits *
+ NEIGH_VAR(idev->nd_parms, RETRANS_TIME) -
idev->cnf.max_desync_factor * HZ;
if (time_before(expires, jiffies)) {
pr_warn("%s: too short regeneration interval; timer disabled for %s\n",
@@ -1892,7 +1959,6 @@ static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp
if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
__ipv6_regen_rndid(idev);
}
-#endif
/*
* Add prefix route.
@@ -1979,23 +2045,6 @@ static void addrconf_add_mroute(struct net_device *dev)
ip6_route_add(&cfg);
}
-#if IS_ENABLED(CONFIG_IPV6_SIT)
-static void sit_route_add(struct net_device *dev)
-{
- struct fib6_config cfg = {
- .fc_table = RT6_TABLE_MAIN,
- .fc_metric = IP6_RT_PRIO_ADDRCONF,
- .fc_ifindex = dev->ifindex,
- .fc_dst_len = 96,
- .fc_flags = RTF_UP | RTF_NONEXTHOP,
- .fc_nlinfo.nl_net = dev_net(dev),
- };
-
- /* prefix length - 96 bits "::d.d.d.d" */
- ip6_route_add(&cfg);
-}
-#endif
-
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2016,6 +2065,73 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return idev;
}
+static void manage_tempaddrs(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp,
+ __u32 valid_lft, __u32 prefered_lft,
+ bool create, unsigned long now)
+{
+ u32 flags;
+ struct inet6_ifaddr *ift;
+
+ read_lock_bh(&idev->lock);
+ /* update all temporary addresses in the list */
+ list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) {
+ int age, max_valid, max_prefered;
+
+ if (ifp != ift->ifpub)
+ continue;
+
+ /* RFC 4941 section 3.3:
+ * If a received option will extend the lifetime of a public
+ * address, the lifetimes of temporary addresses should
+ * be extended, subject to the overall constraint that no
+ * temporary addresses should ever remain "valid" or "preferred"
+ * for a time longer than (TEMP_VALID_LIFETIME) or
+ * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
+ */
+ age = (now - ift->cstamp) / HZ;
+ max_valid = idev->cnf.temp_valid_lft - age;
+ if (max_valid < 0)
+ max_valid = 0;
+
+ max_prefered = idev->cnf.temp_prefered_lft -
+ idev->cnf.max_desync_factor - age;
+ if (max_prefered < 0)
+ max_prefered = 0;
+
+ if (valid_lft > max_valid)
+ valid_lft = max_valid;
+
+ if (prefered_lft > max_prefered)
+ prefered_lft = max_prefered;
+
+ spin_lock(&ift->lock);
+ flags = ift->flags;
+ ift->valid_lft = valid_lft;
+ ift->prefered_lft = prefered_lft;
+ ift->tstamp = now;
+ if (prefered_lft > 0)
+ ift->flags &= ~IFA_F_DEPRECATED;
+
+ spin_unlock(&ift->lock);
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ift);
+ }
+
+ if ((create || list_empty(&idev->tempaddr_list)) &&
+ idev->cnf.use_tempaddr > 0) {
+ /* When a new public address is created as described
+ * in [ADDRCONF], also create a new temporary address.
+ * Also create a temporary address if it's enabled but
+ * no temporary address currently exists.
+ */
+ read_unlock_bh(&idev->lock);
+ ipv6_create_tempaddr(ifp, NULL);
+ } else {
+ read_unlock_bh(&idev->lock);
+ }
+}
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
@@ -2172,17 +2288,17 @@ ok:
update_lft = 0;
create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
ifp->cstamp = jiffies;
ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
addrconf_dad_start(ifp);
}
if (ifp) {
- int flags;
+ u32 flags;
unsigned long now;
-#ifdef CONFIG_IPV6_PRIVACY
- struct inet6_ifaddr *ift;
-#endif
u32 stored_lft;
/* update lifetime (RFC2462 5.5.3 e) */
@@ -2193,43 +2309,21 @@ ok:
else
stored_lft = 0;
if (!update_lft && !create && stored_lft) {
- if (valid_lft > MIN_VALID_LIFETIME ||
- valid_lft > stored_lft)
- update_lft = 1;
- else if (stored_lft <= MIN_VALID_LIFETIME) {
- /* valid_lft <= stored_lft is always true */
- /*
- * RFC 4862 Section 5.5.3e:
- * "Note that the preferred lifetime of
- * the corresponding address is always
- * reset to the Preferred Lifetime in
- * the received Prefix Information
- * option, regardless of whether the
- * valid lifetime is also reset or
- * ignored."
- *
- * So if the preferred lifetime in
- * this advertisement is different
- * than what we have stored, but the
- * valid lifetime is invalid, just
- * reset prefered_lft.
- *
- * We must set the valid lifetime
- * to the stored lifetime since we'll
- * be updating the timestamp below,
- * else we'll set it back to the
- * minimum.
- */
- if (prefered_lft != ifp->prefered_lft) {
- valid_lft = stored_lft;
- update_lft = 1;
- }
- } else {
- valid_lft = MIN_VALID_LIFETIME;
- if (valid_lft < prefered_lft)
- prefered_lft = valid_lft;
- update_lft = 1;
- }
+ const u32 minimum_lft = min(
+ stored_lft, (u32)MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
}
if (update_lft) {
@@ -2245,74 +2339,11 @@ ok:
} else
spin_unlock(&ifp->lock);
-#ifdef CONFIG_IPV6_PRIVACY
- read_lock_bh(&in6_dev->lock);
- /* update all temporary addresses in the list */
- list_for_each_entry(ift, &in6_dev->tempaddr_list,
- tmp_list) {
- int age, max_valid, max_prefered;
+ manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+ create, now);
- if (ifp != ift->ifpub)
- continue;
-
- /*
- * RFC 4941 section 3.3:
- * If a received option will extend the lifetime
- * of a public address, the lifetimes of
- * temporary addresses should be extended,
- * subject to the overall constraint that no
- * temporary addresses should ever remain
- * "valid" or "preferred" for a time longer than
- * (TEMP_VALID_LIFETIME) or
- * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR),
- * respectively.
- */
- age = (now - ift->cstamp) / HZ;
- max_valid = in6_dev->cnf.temp_valid_lft - age;
- if (max_valid < 0)
- max_valid = 0;
-
- max_prefered = in6_dev->cnf.temp_prefered_lft -
- in6_dev->cnf.max_desync_factor -
- age;
- if (max_prefered < 0)
- max_prefered = 0;
-
- if (valid_lft > max_valid)
- valid_lft = max_valid;
-
- if (prefered_lft > max_prefered)
- prefered_lft = max_prefered;
-
- spin_lock(&ift->lock);
- flags = ift->flags;
- ift->valid_lft = valid_lft;
- ift->prefered_lft = prefered_lft;
- ift->tstamp = now;
- if (prefered_lft > 0)
- ift->flags &= ~IFA_F_DEPRECATED;
-
- spin_unlock(&ift->lock);
- if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify(0, ift);
- }
-
- if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
- /*
- * When a new public address is created as
- * described in [ADDRCONF], also create a new
- * temporary address. Also create a temporary
- * address if it's enabled but no temporary
- * address currently exists.
- */
- read_unlock_bh(&in6_dev->lock);
- ipv6_create_tempaddr(ifp, NULL);
- } else {
- read_unlock_bh(&in6_dev->lock);
- }
-#endif
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify();
}
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2388,10 +2419,11 @@ err_exit:
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx,
+static int inet6_addr_add(struct net *net, int ifindex,
+ const struct in6_addr *pfx,
const struct in6_addr *peer_pfx,
- unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
- __u32 valid_lft)
+ unsigned int plen, __u32 ifa_flags,
+ __u32 prefered_lft, __u32 valid_lft)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2410,6 +2442,9 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
if (!valid_lft || prefered_lft > valid_lft)
return -EINVAL;
+ if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64)
+ return -EINVAL;
+
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return -ENODEV;
@@ -2442,24 +2477,30 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
valid_lft, prefered_lft);
if (!IS_ERR(ifp)) {
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
- expires, flags);
+ if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
+ expires, flags);
+ }
+
/*
* Note that section 3.1 of RFC 4429 indicates
* that the Optimistic flag should not be set for
* manually configured addresses
*/
addrconf_dad_start(ifp);
+ if (ifa_flags & IFA_F_MANAGETEMPADDR)
+ manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
+ true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
return PTR_ERR(ifp);
}
-static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
- unsigned int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+ const struct in6_addr *pfx, unsigned int plen)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2482,7 +2523,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifa_flags & IFA_F_MANAGETEMPADDR))
+ manage_tempaddrs(idev, ifp, 0, 0, false,
+ jiffies);
ipv6_del_addr(ifp);
+ addrconf_verify_rtnl();
return 0;
}
}
@@ -2522,7 +2568,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen);
rtnl_unlock();
return err;
@@ -2534,7 +2580,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
struct inet6_ifaddr *ifp;
ifp = ipv6_add_addr(idev, addr, NULL, plen,
- scope, IFA_F_PERMANENT, 0, 0);
+ scope, IFA_F_PERMANENT,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -2550,7 +2597,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope;
+ int scope, plen;
+ u32 pflags = 0;
ASSERT_RTNL();
@@ -2560,12 +2608,16 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (idev->dev->flags&IFF_POINTOPOINT) {
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
+ plen = 64;
} else {
scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
- add_addr(idev, &addr, 128, scope);
+ add_addr(idev, &addr, plen, scope);
+ addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);
return;
}
@@ -2577,7 +2629,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
int flag = scope;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
- int plen;
addr.s6_addr32[3] = ifa->ifa_local;
@@ -2588,12 +2639,10 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
continue;
flag |= IFA_HOST;
}
- if (idev->dev->flags&IFF_POINTOPOINT)
- plen = 64;
- else
- plen = 96;
add_addr(idev, &addr, plen, flag);
+ addrconf_prefix_route(&addr, plen, idev->dev, 0,
+ pflags);
}
}
}
@@ -2633,10 +2682,20 @@ static void init_loopback(struct net_device *dev)
if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
continue;
- if (sp_ifa->rt)
- continue;
+ if (sp_ifa->rt) {
+ /* This dst has been added to garbage list when
+ * lo device down, release this obsolete dst and
+ * reallocate a new router for ifa.
+ */
+ if (sp_ifa->rt->dst.obsolete > 0) {
+ ip6_rt_put(sp_ifa->rt);
+ sp_ifa->rt = NULL;
+ } else {
+ continue;
+ }
+ }
- sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
+ sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
/* Failure cases are ignored */
if (!IS_ERR(sp_rt)) {
@@ -2660,7 +2719,8 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
#endif
- ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0);
+ ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp);
@@ -2681,7 +2741,8 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_INFINIBAND) &&
(dev->type != ARPHRD_IEEE802154) &&
(dev->type != ARPHRD_IEEE1394) &&
- (dev->type != ARPHRD_TUNNEL6)) {
+ (dev->type != ARPHRD_TUNNEL6) &&
+ (dev->type != ARPHRD_6LOWPAN)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -2719,7 +2780,6 @@ static void addrconf_sit_config(struct net_device *dev)
struct in6_addr addr;
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
addrconf_add_linklocal(idev, &addr);
return;
@@ -2729,8 +2789,6 @@ static void addrconf_sit_config(struct net_device *dev)
if (dev->flags&IFF_POINTOPOINT)
addrconf_add_mroute(dev);
- else
- sit_route_add(dev);
}
#endif
@@ -2748,25 +2806,13 @@ static void addrconf_gre_config(struct net_device *dev)
}
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
-
if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
addrconf_add_linklocal(idev, &addr);
+ else
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
}
#endif
-static inline int
-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
-{
- struct in6_addr lladdr;
-
- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
- addrconf_add_linklocal(idev, &lladdr);
- return 0;
- }
- return -1;
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -2883,7 +2929,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
/*
- * MTU falled under IPV6_MIN_MTU.
+ * if MTU under IPV6_MIN_MTU.
* Stop IPv6 on this interface.
*/
@@ -2975,7 +3021,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
goto restart;
}
}
@@ -2990,7 +3036,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
-#ifdef CONFIG_IPV6_PRIVACY
if (how && del_timer(&idev->regen_timer))
in6_dev_put(idev);
@@ -3010,12 +3055,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)
in6_ifa_put(ifa);
write_lock_bh(&idev->lock);
}
-#endif
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
list_del(&ifa->if_list);
@@ -3111,20 +3155,20 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+ rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
ifp->dad_probes = idev->cnf.dad_transmits;
- addrconf_mod_dad_timer(ifp, rand_num);
+ addrconf_mod_dad_work(ifp, rand_num);
}
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
addrconf_join_solict(dev, &ifp->addr);
- net_srandom(ifp->addr.s6_addr32[3]);
+ prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
read_lock_bh(&idev->lock);
spin_lock(&ifp->lock);
@@ -3169,25 +3213,68 @@ out:
read_unlock_bh(&idev->lock);
}
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ bool begin_dad = false;
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
+ begin_dad = true;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (begin_dad)
+ addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
+{
+ struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+ struct inet6_ifaddr,
+ dad_work);
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
+ enum {
+ DAD_PROCESS,
+ DAD_BEGIN,
+ DAD_ABORT,
+ } action = DAD_PROCESS;
+
+ rtnl_lock();
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+ action = DAD_BEGIN;
+ ifp->state = INET6_IFADDR_STATE_DAD;
+ } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+ action = DAD_ABORT;
+ ifp->state = INET6_IFADDR_STATE_POSTDAD;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (action == DAD_BEGIN) {
+ addrconf_dad_begin(ifp);
+ goto out;
+ } else if (action == DAD_ABORT) {
+ addrconf_dad_stop(ifp, 1);
+ goto out;
+ }
+
if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
@@ -3198,7 +3285,7 @@ static void addrconf_dad_timer(unsigned long data)
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
addrconf_dad_completed(ifp);
@@ -3206,15 +3293,35 @@ static void addrconf_dad_timer(unsigned long data)
}
ifp->dad_probes--;
- addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time);
+ addrconf_mod_dad_work(ifp,
+ NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
+ rtnl_unlock();
+}
+
+/* ifp->idev must be at least read locked */
+static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ifpiter;
+ struct inet6_dev *idev = ifp->idev;
+
+ list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) {
+ if (ifpiter->scope > IFA_LINK)
+ break;
+ if (ifp != ifpiter && ifpiter->scope == IFA_LINK &&
+ (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|
+ IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) ==
+ IFA_F_PERMANENT)
+ return false;
+ }
+ return true;
}
static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
@@ -3223,7 +3330,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
struct in6_addr lladdr;
bool send_rs, send_mld;
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
/*
* Configure the address for reception. Now it is valid.
@@ -3236,14 +3343,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
*/
read_lock_bh(&ifp->idev->lock);
- spin_lock(&ifp->lock);
- send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL &&
- ifp->idev->valid_ll_addr_cnt == 1;
+ send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits > 0 &&
(dev->flags&IFF_LOOPBACK) == 0;
- spin_unlock(&ifp->lock);
read_unlock_bh(&ifp->idev->lock);
/* While dad is in progress mld report's source address is in6_addrany.
@@ -3386,7 +3490,7 @@ static int if6_seq_show(struct seq_file *seq, void *v)
ifp->idev->dev->ifindex,
ifp->prefix_len,
ifp->scope,
- ifp->flags,
+ (u8) ifp->flags,
ifp->idev->dev->name);
return 0;
}
@@ -3467,26 +3571,31 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
* Periodic address status verification
*/
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
int i;
+ ASSERT_RTNL();
+
rcu_read_lock_bh();
- spin_lock(&addrconf_verify_lock);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp,
- &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
unsigned long age;
- if (ifp->flags & IFA_F_PERMANENT)
+ /* When setting preferred_lft to a value not zero or
+ * infinity, while valid_lft is infinity
+ * IFA_F_PERMANENT has a non-infinity life time.
+ */
+ if ((ifp->flags & IFA_F_PERMANENT) &&
+ (ifp->prefered_lft == INFINITY_LIFE_TIME))
continue;
spin_lock(&ifp->lock);
@@ -3511,7 +3620,8 @@ restart:
ifp->flags |= IFA_F_DEPRECATED;
}
- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+ if ((ifp->valid_lft != INFINITY_LIFE_TIME) &&
+ (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)))
next = ifp->tstamp + ifp->valid_lft * HZ;
spin_unlock(&ifp->lock);
@@ -3523,12 +3633,11 @@ restart:
in6_ifa_put(ifp);
goto restart;
}
-#ifdef CONFIG_IPV6_PRIVACY
} else if ((ifp->flags&IFA_F_TEMPORARY) &&
!(ifp->flags&IFA_F_TENTATIVE)) {
unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
ifp->idev->cnf.dad_transmits *
- ifp->idev->nd_parms->retrans_time / HZ;
+ NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ;
if (age >= ifp->prefered_lft - regen_advance) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
@@ -3551,7 +3660,6 @@ restart:
} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
spin_unlock(&ifp->lock);
-#endif
} else {
/* ifp->prefered_lft <= ifp->valid_lft */
if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -3574,13 +3682,22 @@ restart:
ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
-
- addr_chk_timer.expires = next_sched;
- add_timer(&addr_chk_timer);
- spin_unlock(&addrconf_verify_lock);
+ mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
+static void addrconf_verify_work(struct work_struct *w)
+{
+ rtnl_lock();
+ addrconf_verify_rtnl();
+ rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+ mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
struct in6_addr **peer_pfx)
{
@@ -3604,6 +3721,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
[IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
+ [IFA_FLAGS] = { .len = sizeof(u32) },
};
static int
@@ -3613,6 +3731,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx, *peer_pfx;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3624,19 +3743,33 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+ /* We ignore other flags so far. */
+ ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+ return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen);
}
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
u32 prefered_lft, u32 valid_lft)
{
u32 flags;
clock_t expires;
unsigned long timeout;
+ bool was_managetempaddr;
+ bool had_prefixroute;
+
+ ASSERT_RTNL();
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;
+ if (ifa_flags & IFA_F_MANAGETEMPADDR &&
+ (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
+ return -EINVAL;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -3656,7 +3789,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
}
spin_lock_bh(&ifp->lock);
- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
+ was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
+ had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
+ !(ifp->flags & IFA_F_NOPREFIXROUTE);
+ ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD |
+ IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
+ IFA_F_NOPREFIXROUTE);
+ ifp->flags |= ifa_flags;
ifp->tstamp = jiffies;
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
@@ -3665,9 +3804,31 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
- expires, flags);
- addrconf_verify(0);
+ if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
+ expires, flags);
+ } else if (had_prefixroute) {
+ enum cleanup_prefix_rt_t action;
+ unsigned long rt_expires;
+
+ write_lock_bh(&ifp->idev->lock);
+ action = check_cleanup_prefix_route(ifp, &rt_expires);
+ write_unlock_bh(&ifp->idev->lock);
+
+ if (action != CLEANUP_PREFIX_RT_NOP) {
+ cleanup_prefix_route(ifp, rt_expires,
+ action == CLEANUP_PREFIX_RT_DEL);
+ }
+ }
+
+ if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
+ if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+ valid_lft = prefered_lft = 0;
+ manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft,
+ !was_managetempaddr, jiffies);
+ }
+
+ addrconf_verify_rtnl();
return 0;
}
@@ -3682,7 +3843,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct inet6_ifaddr *ifa;
struct net_device *dev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
- u8 ifa_flags;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3709,14 +3870,17 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (dev == NULL)
return -ENODEV;
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
/* We ignore other flags so far. */
- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
+ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
+ IFA_F_NOPREFIXROUTE;
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (ifa == NULL) {
/*
* It would be best to check for !NLM_F_CREATE here but
- * userspace alreay relies on not having to provide this.
+ * userspace already relies on not having to provide this.
*/
return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
@@ -3734,7 +3898,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
-static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags,
u8 scope, int ifindex)
{
struct ifaddrmsg *ifm;
@@ -3777,7 +3941,8 @@ static inline int inet6_ifaddr_msgsize(void)
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ nla_total_size(16) /* IFA_LOCAL */
+ nla_total_size(16) /* IFA_ADDRESS */
- + nla_total_size(sizeof(struct ifa_cacheinfo));
+ + nla_total_size(sizeof(struct ifa_cacheinfo))
+ + nla_total_size(4) /* IFA_FLAGS */;
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
@@ -3793,7 +3958,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
- if (!(ifa->flags&IFA_F_PERMANENT)) {
+ if (!((ifa->flags&IFA_F_PERMANENT) &&
+ (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
valid = ifa->valid_lft;
if (preferred != INFINITY_LIFE_TIME) {
@@ -3825,6 +3991,9 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
goto error;
+ if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+ goto error;
+
return nlmsg_end(skb, nlh);
error:
@@ -4123,13 +4292,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
-#ifdef CONFIG_IPV6_PRIVACY
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
-#endif
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
@@ -4191,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
memset(&stats[items], 0, pad);
}
-static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
int items, int bytes, size_t syncpoff)
{
int i;
@@ -4211,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
{
switch (attrtype) {
case IFLA_INET6_STATS:
- __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+ __snmp6_fill_stats64(stats, idev->stats.ipv6,
IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
@@ -4230,7 +4397,7 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
ci.max_reasm_len = IPV6_MAXPLEN;
ci.tstamp = cstamp_delta(idev->tstamp);
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
- ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
+ ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
@@ -4291,6 +4458,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
bool update_rs = false;
struct in6_addr ll_addr;
+ ASSERT_RTNL();
+
if (token == NULL)
return -EINVAL;
if (ipv6_addr_any(token))
@@ -4339,7 +4508,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
}
write_unlock_bh(&idev->lock);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
@@ -4537,29 +4706,17 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
-static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count)
-{
- write_lock_bh(&ifp->idev->lock);
- spin_lock(&ifp->lock);
- if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|
- IFA_F_DADFAILED)) == IFA_F_PERMANENT) &&
- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL))
- ifp->idev->valid_ll_addr_cnt += count;
- WARN_ON(ifp->idev->valid_ll_addr_cnt < 0);
- spin_unlock(&ifp->lock);
- write_unlock_bh(&ifp->idev->lock);
-}
-
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
+ if (event)
+ ASSERT_RTNL();
+
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
case RTM_NEWADDR:
- update_valid_ll_addr_cnt(ifp, 1);
-
/*
* If the address was optimistic
* we inserted the route at the start of
@@ -4575,8 +4732,6 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
ifp->idev->dev, 0, 0);
break;
case RTM_DELADDR:
- update_valid_ll_addr_cnt(ifp, -1);
-
if (ifp->idev->cnf.forwarding)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
@@ -4723,6 +4878,46 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
return ret;
}
+static
+int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int ret;
+ int old, new;
+
+ old = *valp;
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ new = *valp;
+
+ if (write && old != new) {
+ struct net *net = ctl->extra2;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
+ inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+ else if (valp == &net->ipv6.devconf_all->proxy_ndp)
+ inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
+ else {
+ struct inet6_dev *idev = ctl->extra1;
+
+ inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ idev->dev->ifindex,
+ &idev->cnf);
+ }
+ rtnl_unlock();
+ }
+
+ return ret;
+}
+
+
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
@@ -4823,7 +5018,6 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
-#ifdef CONFIG_IPV6_PRIVACY
{
.procname = "use_tempaddr",
.data = &ipv6_devconf.use_tempaddr,
@@ -4859,7 +5053,6 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#endif
{
.procname = "max_addresses",
.data = &ipv6_devconf.max_addresses,
@@ -4911,7 +5104,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.proxy_ndp,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_sysctl_proxy_ndp,
},
{
.procname = "accept_source_route",
@@ -5027,7 +5220,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
static void addrconf_sysctl_register(struct inet6_dev *idev)
{
- neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6",
+ neigh_sysctl_register(idev->dev, idev->nd_parms,
&ndisc_ifinfo_sysctl_change);
__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev, &idev->cnf);
@@ -5128,6 +5321,12 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
+ addrconf_wq = create_workqueue("ipv6_addrconf");
+ if (!addrconf_wq) {
+ err = -ENOMEM;
+ goto out_nowq;
+ }
+
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
@@ -5158,11 +5357,9 @@ int __init addrconf_init(void)
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify(0);
+ addrconf_verify();
- err = rtnl_af_register(&inet6_ops);
- if (err < 0)
- goto errout_af;
+ rtnl_af_register(&inet6_ops);
err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
NULL);
@@ -5186,9 +5383,10 @@ int __init addrconf_init(void)
return 0;
errout:
rtnl_af_unregister(&inet6_ops);
-errout_af:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
+ destroy_workqueue(addrconf_wq);
+out_nowq:
unregister_pernet_subsys(&addrconf_ops);
out_addrlabel:
ipv6_addr_label_cleanup();
@@ -5224,7 +5422,8 @@ void addrconf_cleanup(void)
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
spin_unlock_bh(&addrconf_hash_lock);
-
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
+
+ destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 4c11cbcf830..e6960457f62 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev)
{
kfree(idev->stats.icmpv6msgdev);
kfree(idev->stats.icmpv6dev);
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
}
/* Nobody refers to this device, we may destroy it. */
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b30ad3741b4..731e1e1722d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -6,7 +6,7 @@
*/
/*
* Author:
- * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
*/
#include <linux/kernel.h>
@@ -22,14 +22,13 @@
#if 0
#define ADDRLABEL(x...) printk(x)
#else
-#define ADDRLABEL(x...) do { ; } while(0)
+#define ADDRLABEL(x...) do { ; } while (0)
#endif
/*
* Policy Table
*/
-struct ip6addrlbl_entry
-{
+struct ip6addrlbl_entry {
#ifdef CONFIG_NET_NS
struct net *lbl_net;
#endif
@@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table
{ /* ::/0 */
.prefix = &in6addr_any,
.label = 1,
- },{ /* fc00::/7 */
- .prefix = &(struct in6_addr){{{ 0xfc }}},
+ }, { /* fc00::/7 */
+ .prefix = &(struct in6_addr){ { { 0xfc } } } ,
.prefixlen = 7,
.label = 5,
- },{ /* fec0::/10 */
- .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ }, { /* fec0::/10 */
+ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
.prefixlen = 10,
.label = 11,
- },{ /* 2002::/16 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ }, { /* 2002::/16 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
.prefixlen = 16,
.label = 2,
- },{ /* 3ffe::/16 */
- .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ }, { /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
.prefixlen = 16,
.label = 12,
- },{ /* 2001::/32 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ }, { /* 2001::/32 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
.prefixlen = 32,
.label = 6,
- },{ /* 2001:10::/28 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ }, { /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
.prefixlen = 28,
.label = 7,
- },{ /* ::ffff:0:0 */
- .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ }, { /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
.prefixlen = 96,
.label = 4,
- },{ /* ::/96 */
+ }, { /* ::/96 */
.prefix = &in6addr_any,
.prefixlen = 96,
.label = 3,
- },{ /* ::1/128 */
+ }, { /* ::1/128 */
.prefix = &in6addr_loopback,
.prefixlen = 128,
.label = 0,
@@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
if (label == IPV6_ADDR_LABEL_DEFAULT)
return -EINVAL;
- switch(nlh->nlmsg_type) {
+ switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
!__dev_get_by_index(net, ifal->ifal_index))
@@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
- if ((err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDRLABEL,
- NLM_F_MULTI)) <= 0)
+ err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI);
+ if (err <= 0)
break;
}
idx++;
@@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
goto out;
}
- if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+ if (!skb) {
ip6addrlbl_put(p);
return -ENOBUFS;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7c96100b021..7cb4392690d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -106,15 +106,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
- if (sock->type != SOCK_RAW &&
- sock->type != SOCK_DGRAM &&
- !inet_ehash_secret)
- build_ehash_secret();
-
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -167,7 +161,6 @@ lookup_protocol:
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -181,7 +174,6 @@ lookup_protocol:
sock_init_data(sock, sk);
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
@@ -218,7 +210,7 @@ lookup_protocol:
inet->mc_list = NULL;
inet->rcv_tos = 0;
- if (ipv4_config.no_pmtu_disc)
+ if (net->ipv4.sysctl_ip_no_pmtu_disc)
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -364,7 +356,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_rcv_saddr = v4addr;
inet->inet_saddr = v4addr;
- np->rcv_saddr = addr->sin6_addr;
+ sk->sk_v6_rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
@@ -461,14 +453,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
peer == 1)
return -ENOTCONN;
sin->sin6_port = inet->inet_dport;
- sin->sin6_addr = np->daddr;
+ sin->sin6_addr = sk->sk_v6_daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
} else {
- if (ipv6_addr_any(&np->rcv_saddr))
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
sin->sin6_addr = np->saddr;
else
- sin->sin6_addr = np->rcv_saddr;
+ sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
}
@@ -655,7 +647,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = np->saddr;
fl6.flowlabel = np->flow_label;
fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -666,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
final_p = fl6_update_dst(&fl6, np->opt, &final);
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
sk->sk_route_caps = 0;
sk->sk_err_soft = -PTR_ERR(dst);
@@ -688,8 +680,7 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
np->rxopt.bits.ohopopts)) ||
- ((IPV6_FLOWINFO_MASK &
- *(__be32 *)skb_network_header(skb)) &&
+ (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
np->rxopt.bits.osrcrt)) ||
@@ -719,21 +710,27 @@ static void ipv6_packet_cleanup(void)
static int __net_init ipv6_init_mibs(struct net *net)
{
- if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ int i;
+
+ net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udp_stats_in6)
return -ENOMEM;
- if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udplite_stats_in6)
goto err_udplite_mib;
- if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+ if (!net->mib.ipv6_statistics)
goto err_ip_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
- sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+
+ for_each_possible_cpu(i) {
+ struct ipstats_mib *af_inet6_stats;
+ af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
+ u64_stats_init(&af_inet6_stats->syncp);
+ }
+
+
+ net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+ if (!net->mib.icmpv6_statistics)
goto err_icmp_mib;
net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
GFP_KERNEL);
@@ -742,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net)
return 0;
err_icmpmsg_mib:
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
err_icmp_mib:
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+ free_percpu(net->mib.ipv6_statistics);
err_ip_mib:
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
err_udplite_mib:
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+ free_percpu(net->mib.udp_stats_in6);
return -ENOMEM;
}
static void ipv6_cleanup_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.udp_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
+ free_percpu(net->mib.ipv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
kfree(net->mib.icmpv6msg_statistics);
}
@@ -767,6 +764,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
+ net->ipv6.sysctl.flowlabel_consistency = 1;
atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
@@ -870,8 +868,6 @@ static int __init inet6_init(void)
if (err)
goto out_sock_register_fail;
- tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
-
/*
* ipngwg API draft makes clear that the correct semantics
* for TCP and UDP is to consider one TCP and UDP instance
@@ -965,10 +961,10 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- ipv6_packet_cleanup();
+ pingv6_exit();
#endif
pingv6_fail:
- pingv6_exit();
+ ipv6_packet_cleanup();
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
@@ -1028,52 +1024,4 @@ out_unregister_tcp_proto:
}
module_init(inet6_init);
-static void __exit inet6_exit(void)
-{
- if (disable_ipv6_mod)
- return;
-
- /* First of all disallow new sockets creation. */
- sock_unregister(PF_INET6);
- /* Disallow any further netlink messages */
- rtnl_unregister_all(PF_INET6);
-
- udpv6_exit();
- udplitev6_exit();
- tcpv6_exit();
-
- /* Cleanup code parts. */
- ipv6_packet_cleanup();
- ipv6_frag_exit();
- ipv6_exthdrs_exit();
- addrconf_cleanup();
- ip6_flowlabel_cleanup();
- ndisc_late_cleanup();
- ip6_route_cleanup();
-#ifdef CONFIG_PROC_FS
-
- /* Cleanup code parts. */
- if6_proc_exit();
- ipv6_misc_proc_exit();
- udplite6_proc_exit();
- raw6_proc_exit();
-#endif
- ipv6_netfilter_fini();
- ipv6_stub = NULL;
- igmp6_cleanup();
- ndisc_cleanup();
- ip6_mr_cleanup();
- icmpv6_cleanup();
- rawv6_exit();
-
- unregister_pernet_subsys(&inet6_net_ops);
- proto_unregister(&rawv6_prot);
- proto_unregister(&udplitev6_prot);
- proto_unregister(&udpv6_prot);
- proto_unregister(&tcpv6_prot);
-
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
-}
-module_exit(inet6_exit);
-
MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 73784c3d464..72a4930bdc0 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors
*
@@ -347,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -360,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -412,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
@@ -515,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -551,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -573,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -610,28 +643,29 @@ out:
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
@@ -716,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
@@ -729,10 +768,11 @@ static const struct xfrm_type ah6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
@@ -742,7 +782,7 @@ static int __init ah6_init(void)
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
@@ -753,7 +793,7 @@ static int __init ah6_init(void)
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 5a80f15a9de..21018324468 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -383,6 +383,17 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
return found;
}
+/* check if this anycast address is link-local on given interface or
+ * is global
+ */
+bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
+ const struct in6_addr *addr)
+{
+ return ipv6_chk_acast_addr(net,
+ (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
+ dev : NULL),
+ addr);
+}
#ifdef CONFIG_PROC_FS
struct ac6_iter_state {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 48b6bd2a9a1..c3bf2d2e519 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- usin->sin6_addr = flowlabel->dst;
}
}
@@ -107,16 +106,16 @@ ipv4_connected:
if (err)
goto out;
- ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+ ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);
if (ipv6_addr_any(&np->saddr) ||
ipv6_mapped_addr_any(&np->saddr))
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- if (ipv6_addr_any(&np->rcv_saddr) ||
- ipv6_mapped_addr_any(&np->rcv_saddr)) {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+ ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &np->rcv_saddr);
+ &sk->sk_v6_rcv_saddr);
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
@@ -145,7 +144,7 @@ ipv4_connected:
}
}
- np->daddr = *daddr;
+ sk->sk_v6_daddr = *daddr;
np->flow_label = fl6.flowlabel;
inet->inet_dport = usin->sin6_port;
@@ -156,7 +155,7 @@ ipv4_connected:
*/
fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
@@ -171,7 +170,7 @@ ipv4_connected:
opt = flowlabel ? flowlabel->opt : np->opt;
final_p = fl6_update_dst(&fl6, opt, &final);
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -183,16 +182,16 @@ ipv4_connected:
if (ipv6_addr_any(&np->saddr))
np->saddr = fl6.saddr;
- if (ipv6_addr_any(&np->rcv_saddr)) {
- np->rcv_saddr = fl6.saddr;
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ sk->sk_v6_rcv_saddr = fl6.saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
- &np->daddr : NULL,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
@@ -206,6 +205,16 @@ out:
}
EXPORT_SYMBOL_GPL(ip6_datagram_connect);
+int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr);
+ if (sin6->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+ return ip6_datagram_connect(sk, uaddr, addr_len);
+}
+EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only);
+
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
@@ -318,12 +327,12 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
/*
* Handle MSG_ERRQUEUE
*/
-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct sk_buff *skb, *skb2;
- struct sockaddr_in6 *sin;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
struct sockaddr_in6 offender;
@@ -349,7 +358,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
serr = SKB_EXT_ERR(skb);
- sin = (struct sockaddr_in6 *)msg->msg_name;
if (sin) {
const unsigned char *nh = skb_network_header(skb);
sin->sin6_family = AF_INET6;
@@ -369,6 +377,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
&sin->sin6_addr);
sin->sin6_scope_id = 0;
}
+ *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -377,10 +386,13 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
+ sin->sin6_port = 0;
+ if (np->rxopt.all)
+ ip6_datagram_recv_common_ctl(sk, msg, skb);
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
- ip6_datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_specific_ctl(sk, msg, skb);
sin->sin6_scope_id =
ipv6_iface_scope_id(&sin->sin6_addr,
IP6CB(skb)->iif);
@@ -423,12 +435,13 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error);
/*
* Handle IPV6_RECVPATHMTU
*/
-int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
+ int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
- struct sockaddr_in6 *sin;
struct ip6_mtuinfo mtu_info;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
int err;
int copied;
@@ -450,13 +463,13 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
- sin = (struct sockaddr_in6 *)msg->msg_name;
if (sin) {
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = 0;
sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
+ *addr_len = sizeof(*sin);
}
put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
@@ -470,20 +483,34 @@ out:
}
-int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
- struct sk_buff *skb)
+void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_skb_parm *opt = IP6CB(skb);
- unsigned char *nh = skb_network_header(skb);
+ bool is_ipv6 = skb->protocol == htons(ETH_P_IPV6);
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = opt->iif;
- src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
+ if (is_ipv6) {
+ src_info.ipi6_ifindex = IP6CB(skb)->iif;
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
+ } else {
+ src_info.ipi6_ifindex =
+ PKTINFO_SKB_CB(skb)->ipi_ifindex;
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
+ &src_info.ipi6_addr);
+ }
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
+}
+
+void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ unsigned char *nh = skb_network_header(skb);
if (np->rxopt.bits.rxhlim) {
int hlim = ipv6_hdr(skb)->hop_limit;
@@ -601,7 +628,13 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
}
}
- return 0;
+}
+
+void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ ip6_datagram_recv_common_ctl(sk, msg, skb);
+ ip6_datagram_recv_specific_ctl(sk, msg, skb);
}
EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
@@ -666,7 +699,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
!ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0))
+ strict ? dev : NULL, 0) &&
+ !ipv6_chk_acast_addr_src(net, dev,
+ &src_info->ipi6_addr))
err = -EINVAL;
else
fl6->saddr = src_info->ipi6_addr;
@@ -883,11 +918,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
__u16 srcp, __u16 destp, int bucket)
{
- struct ipv6_pinfo *np = inet6_sk(sp);
const struct in6_addr *dest, *src;
- dest = &np->daddr;
- src = &np->rcv_saddr;
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
seq_printf(seq,
"%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index d3618a78fca..d15da137714 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors
*
@@ -164,10 +163,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
u8 *iv;
u8 *tail;
__be32 *seqhi;
- struct esp_data *esp = x->data;
/* skb is pure payload to encrypt */
- aead = esp->aead;
+ aead = x->data;
alen = crypto_aead_authsize(aead);
tfclen = 0;
@@ -181,8 +179,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
}
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
- if (esp->padlen)
- clen = ALIGN(clen, esp->padlen);
plen = clen - skb->len - tfclen;
err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
@@ -271,8 +267,7 @@ error:
static int esp_input_done2(struct sk_buff *skb, int err)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int elen = skb->len - hlen;
@@ -325,8 +320,7 @@ static void esp_input_done(struct crypto_async_request *base, int err)
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
@@ -414,9 +408,8 @@ out:
static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
- struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
- u32 align = max_t(u32, blksize, esp->padlen);
+ struct crypto_aead *aead = x->data;
+ u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
unsigned int net_adj;
if (x->props.mode != XFRM_MODE_TUNNEL)
@@ -424,49 +417,48 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
else
net_adj = 0;
- return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
- net_adj) & ~(align - 1)) + net_adj - 2;
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
+ struct crypto_aead *aead = x->data;
- if (!esp)
+ if (!aead)
return;
- crypto_free_aead(esp->aead);
- kfree(esp);
+ crypto_free_aead(aead);
}
static int esp_init_aead(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
int err;
@@ -475,7 +467,7 @@ static int esp_init_aead(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
err = crypto_aead_setkey(aead, x->aead->alg_key,
(x->aead->alg_key_len + 7) / 8);
@@ -492,7 +484,6 @@ error:
static int esp_init_authenc(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
struct rtattr *rta;
@@ -527,7 +518,7 @@ static int esp_init_authenc(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
(x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
@@ -582,7 +573,6 @@ error:
static int esp6_init_state(struct xfrm_state *x)
{
- struct esp_data *esp;
struct crypto_aead *aead;
u32 align;
int err;
@@ -590,11 +580,7 @@ static int esp6_init_state(struct xfrm_state *x)
if (x->encap)
return -EINVAL;
- esp = kzalloc(sizeof(*esp), GFP_KERNEL);
- if (esp == NULL)
- return -ENOMEM;
-
- x->data = esp;
+ x->data = NULL;
if (x->aead)
err = esp_init_aead(x);
@@ -604,9 +590,7 @@ static int esp6_init_state(struct xfrm_state *x)
if (err)
goto error;
- aead = esp->aead;
-
- esp->padlen = 0;
+ aead = x->data;
x->props.header_len = sizeof(struct ip_esp_hdr) +
crypto_aead_ivsize(aead);
@@ -626,14 +610,17 @@ static int esp6_init_state(struct xfrm_state *x)
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
- if (esp->padlen)
- align = max_t(u32, align, esp->padlen);
- x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
+ x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
error:
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
@@ -648,10 +635,11 @@ static const struct xfrm_type esp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
@@ -660,7 +648,7 @@ static int __init esp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
@@ -671,7 +659,7 @@ static int __init esp6_init(void)
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 140748debc4..8af3eb57f43 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
found = (nexthdr == target);
if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
+ if (target < 0 || found)
break;
return -ENOENT;
}
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index cf77f3abfd0..447a7fbd1bb 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void)
int ret;
ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
- if (!ret)
+ if (ret)
goto out;
ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
- if (!ret)
+ if (ret)
goto out_rt;
out:
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index e27591635f9..b4d5e1d97c1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -122,7 +122,11 @@ out:
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
struct rt6_info *rt = (struct rt6_info *) arg->result;
- struct net_device *dev = rt->rt6i_idev->dev;
+ struct net_device *dev = NULL;
+
+ if (rt->rt6i_idev)
+ dev = rt->rt6i_idev->dev;
+
/* do not accept result if the route does
* not meet the required prefix length
*/
@@ -165,7 +169,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
return 0;
}
- if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff))
+ if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
return 1;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index eef8d945b36..f6c84a6eb23 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -67,6 +67,7 @@
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/inet_common.h>
+#include <net/dsfield.h>
#include <asm/uaccess.h>
@@ -315,8 +316,10 @@ static void mip6_addr_swap(struct sk_buff *skb)
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif
-struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
- struct sock *sk, struct flowi6 *fl6)
+static struct dst_entry *icmpv6_route_lookup(struct net *net,
+ struct sk_buff *skb,
+ struct sock *sk,
+ struct flowi6 *fl6)
{
struct dst_entry *dst, *dst2;
struct flowi6 fl2;
@@ -397,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
int len;
int hlimit;
int err = 0;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -410,7 +414,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
*/
addr_type = ipv6_addr_type(&hdr->daddr);
- if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
+ if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
+ ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
saddr = &hdr->daddr;
/*
@@ -462,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
fl6.daddr = hdr->saddr;
if (saddr)
fl6.saddr = *saddr;
+ fl6.flowi6_mark = mark;
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
@@ -470,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -489,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -516,7 +518,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -551,10 +553,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct dst_entry *dst;
int err = 0;
int hlimit;
+ u8 tclass;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
saddr = &ipv6_hdr(skb)->daddr;
- if (!ipv6_unicast_destination(skb))
+ if (!ipv6_unicast_destination(skb) &&
+ !(net->ipv6.sysctl.anycast_src_echo_reply &&
+ ipv6_anycast_destination(skb)))
saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
@@ -567,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.saddr = *saddr;
fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
@@ -586,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
idev = __in6_dev_get(skb->dev);
@@ -599,8 +602,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
msg.offset = 0;
msg.type = ICMPV6_ECHO_REPLY;
+ tclass = ipv6_get_dsfield(ipv6_hdr(skb));
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
+ sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
np->dontfrag);
@@ -694,22 +698,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
- /* Perform checksum. */
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
- skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
- IPPROTO_ICMPV6, 0));
- if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG
- "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
- saddr, daddr);
- goto csum_error;
- }
+ if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+ saddr, daddr);
+ goto csum_error;
}
if (!pskb_pull(skb, sizeof(*hdr)))
@@ -984,7 +977,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
EXPORT_SYMBOL(icmpv6_err_convert);
#ifdef CONFIG_SYSCTL
-struct ctl_table ipv6_icmp_table_template[] = {
+static struct ctl_table ipv6_icmp_table_template[] = {
{
.procname = "ratelimit",
.data = &init_net.ipv6.sysctl.icmpv6_time,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e4311cbc8b4..a245e5ddffb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -70,23 +70,23 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
struct flowi6 *fl6,
const struct request_sock *req)
{
- struct inet6_request_sock *treq = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = IPPROTO_TCP;
- fl6->daddr = treq->rmt_addr;
+ fl6->daddr = ireq->ir_v6_rmt_addr;
final_p = fl6_update_dst(fl6, np->opt, &final);
- fl6->saddr = treq->loc_addr;
- fl6->flowi6_oif = treq->iif;
- fl6->flowi6_mark = sk->sk_mark;
- fl6->fl6_dport = inet_rsk(req)->rmt_port;
- fl6->fl6_sport = inet_rsk(req)->loc_port;
+ fl6->saddr = ireq->ir_v6_loc_addr;
+ fl6->flowi6_oif = ireq->ir_iif;
+ fl6->flowi6_mark = ireq->ir_mark;
+ fl6->fl6_dport = ireq->ir_rmt_port;
+ fl6->fl6_sport = htons(ireq->ir_num);
security_req_classify_flow(req, flowi6_to_flowi(fl6));
- dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (IS_ERR(dst))
return NULL;
@@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
lopt->nr_table_entries)];
(req = *prev) != NULL;
prev = &req->dl_next) {
- const struct inet6_request_sock *treq = inet6_rsk(req);
+ const struct inet_request_sock *ireq = inet_rsk(req);
- if (inet_rsk(req)->rmt_port == rport &&
+ if (ireq->ir_rmt_port == rport &&
req->rsk_ops->family == AF_INET6 &&
- ipv6_addr_equal(&treq->rmt_addr, raddr) &&
- ipv6_addr_equal(&treq->loc_addr, laddr) &&
- (!treq->iif || treq->iif == iif)) {
+ ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
+ ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
+ (!ireq->ir_iif || ireq->ir_iif == iif)) {
WARN_ON(req->sk != NULL);
*prevp = prev;
return req;
@@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
- inet_rsk(req)->rmt_port,
+ const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
+ inet_rsk(req)->ir_rmt_port,
lopt->hash_rnd, lopt->nr_table_entries);
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
@@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = np->daddr;
+ sin6->sin6_addr = sk->sk_v6_daddr;
sin6->sin6_port = inet_sk(sk)->inet_dport;
/* We do not store received flowlabel for TCP */
sin6->sin6_flowinfo = 0;
@@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = sk->sk_protocol;
- fl6->daddr = np->daddr;
+ fl6->daddr = sk->sk_v6_daddr;
fl6->saddr = np->saddr;
fl6->flowlabel = np->flow_label;
IP6_ECN_flow_xmit(sk, fl6->flowlabel);
@@ -217,7 +216,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
- dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!IS_ERR(dst))
__inet6_csk_dst_store(sk, dst, NULL, NULL);
@@ -225,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
{
- struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 fl6;
struct dst_entry *dst;
@@ -245,7 +243,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
skb_dst_set_noref(skb, dst);
/* Restore final destination back after routing done */
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
rcu_read_unlock();
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32b4a1675d8..262e13c02ec 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,6 +23,39 @@
#include <net/secure_seq.h>
#include <net/ip.h>
+static unsigned int inet6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
+{
+ static u32 inet6_ehash_secret __read_mostly;
+ static u32 ipv6_hash_secret __read_mostly;
+
+ u32 lhash, fhash;
+
+ net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
+ net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ inet6_ehash_secret + net_hash_mix(net));
+}
+
+static int inet6_sk_ehashfn(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *faddr = &sk->sk_v6_daddr;
+ const __u16 lport = inet->inet_num;
+ const __be16 fport = inet->inet_dport;
+ struct net *net = sock_net(sk);
+
+ return inet6_ehashfn(net, laddr, lport, faddr, fport);
+}
+
int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
@@ -89,43 +122,22 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
- goto begintw;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- sock_put(sk);
- goto begin;
- }
- goto out;
- }
- }
- if (get_nulls_value(node) != slot)
- goto begin;
-
-begintw:
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_nulls_for_each_rcu(sk, node, &head->twchain) {
- if (sk->sk_hash != hash)
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
continue;
- if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
- sk = NULL;
- goto out;
- }
- if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- sock_put(sk);
- goto begintw;
- }
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
goto out;
+
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ sock_gen_put(sk);
+ goto begin;
}
+ goto found;
}
if (get_nulls_value(node) != slot)
- goto begintw;
- sk = NULL;
+ goto begin;
out:
+ sk = NULL;
+found:
rcu_read_unlock();
return sk;
}
@@ -140,11 +152,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
score = 1;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
@@ -236,9 +247,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct in6_addr *daddr = &np->rcv_saddr;
- const struct in6_addr *saddr = &np->daddr;
+ const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *saddr = &sk->sk_v6_daddr;
const int dif = sk->sk_bound_dev_if;
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
@@ -248,38 +258,28 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
+ struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock);
- /* Check TIME-WAIT sockets first. */
- sk_nulls_for_each(sk2, node, &head->twchain) {
- if (sk2->sk_hash != hash)
- continue;
-
- if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
- ports, dif))) {
- tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
- goto not_unique;
- }
- }
- tw = NULL;
-
- /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) {
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
+
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+ break;
+ }
goto not_unique;
+ }
}
-unique:
/* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
+ * in hash table socket with a funny identity.
+ */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
@@ -312,9 +312,9 @@ not_unique:
static inline u32 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
- np->daddr.s6_addr32,
+
+ return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_dport);
}
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 72d198b8e4d..9a4d7322fb2 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -75,23 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
return err;
}
- if (uh->check == 0) {
- /* RFC 2460 section 8.1 says that we SHOULD log
- this error. Well, it is reasonable.
- */
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
- return 1;
- }
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
- skb->len, proto, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+ * we accept a checksum of zero here. When we find the socket
+ * for the UDP packet we'll check if that socket allows zero checksum
+ * for IPv6 (set by socket option).
+ */
+ return skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+}
+EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
- if (!skb_csum_unnecessary(skb))
- skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len, proto, 0));
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
- return 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v6_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
}
-EXPORT_SYMBOL(udp6_csum_init);
+EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5bec666aba6..cb4459bd1d2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -9,14 +9,12 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- */
-
-/*
- * Changes:
- * Yuji SEKIYA @USAGI: Support default route on router node;
- * remove ip6_null_entry from the top of
- * routing table.
- * Ville Nuorvala: Fixed routing subtrees.
+ *
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -46,10 +44,9 @@
#define RT6_TRACE(x...) do { ; } while (0)
#endif
-static struct kmem_cache * fib6_node_kmem __read_mostly;
+static struct kmem_cache *fib6_node_kmem __read_mostly;
-enum fib_walk_state_t
-{
+enum fib_walk_state_t {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
#endif
@@ -59,8 +56,7 @@ enum fib_walk_state_t
FWS_U
};
-struct fib6_cleaner_t
-{
+struct fib6_cleaner_t {
struct fib6_walker_t w;
struct net *net;
int (*func)(struct rt6_info *, void *arg);
@@ -75,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct fib6_walker_t *w);
@@ -138,7 +133,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
const __be32 *addr = token;
/*
* Here,
- * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+ * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
* is optimized version of
* htonl(1 << ((~fn_bit)&0x1F))
* See include/asm-generic/bitops/le.h.
@@ -147,7 +142,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static __inline__ struct fib6_node * node_alloc(void)
+static __inline__ struct fib6_node *node_alloc(void)
{
struct fib6_node *fn;
@@ -156,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void)
return fn;
}
-static __inline__ void node_free(struct fib6_node * fn)
+static __inline__ void node_free(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
@@ -292,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
- struct fib6_walker_t *w = (void*)cb->args[2];
+ struct fib6_walker_t *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
@@ -302,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
cb->args[2] = 0;
kfree(w);
}
- cb->done = (void*)cb->args[3];
+ cb->done = (void *)cb->args[3];
cb->args[1] = 3;
}
@@ -485,7 +480,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right: fn->left;
+ fn = dir ? fn->right : fn->left;
} while (fn);
if (!allow_create) {
@@ -638,12 +633,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
RTF_GATEWAY;
}
+static int fib6_commit_metrics(struct dst_entry *dst,
+ struct nlattr *mx, int mx_len)
+{
+ struct nlattr *nla;
+ int remaining;
+ u32 *mp;
+
+ if (dst->flags & DST_HOST) {
+ mp = dst_metrics_write_ptr(dst);
+ } else {
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (!mp)
+ return -ENOMEM;
+ dst_init_metrics(dst, mp, 0);
+ }
+
+ nla_for_each_attr(nla, mx, mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ mp[type - 1] = nla_get_u32(nla);
+ }
+ }
+ return 0;
+}
+
/*
* Insert routing information in a node.
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info)
+ struct nl_info *info, struct nlattr *mx, int mx_len)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
@@ -653,6 +677,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ int err;
ins = &fn->leaf;
@@ -751,6 +776,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
rt->dst.rt6_next = iter;
*ins = rt;
rt->rt6i_node = fn;
@@ -770,6 +800,11 @@ add:
pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
return -ENOENT;
}
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
*ins = rt;
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
@@ -806,7 +841,8 @@ void fib6_force_start_gc(struct net *net)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -900,11 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
}
#endif
- err = fib6_add_rt2node(fn, rt, info);
+ err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
if (!err) {
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -955,8 +991,8 @@ struct lookup_args {
const struct in6_addr *addr; /* search key */
};
-static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
- struct lookup_args *args)
+static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
@@ -1018,8 +1054,8 @@ backtrack:
return NULL;
}
-struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
@@ -1051,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da
*/
-static struct fib6_node * fib6_locate_1(struct fib6_node *root,
- const struct in6_addr *addr,
- int plen, int offset)
+static struct fib6_node *fib6_locate_1(struct fib6_node *root,
+ const struct in6_addr *addr,
+ int plen, int offset)
{
struct fib6_node *fn;
@@ -1081,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
return NULL;
}
-struct fib6_node * fib6_locate(struct fib6_node *root,
- const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+struct fib6_node *fib6_locate(struct fib6_node *root,
+ const struct in6_addr *daddr, int dst_len,
+ const struct in6_addr *saddr, int src_len)
{
struct fib6_node *fn;
@@ -1151,8 +1187,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
children = 0;
child = NULL;
- if (fn->right) child = fn->right, children |= 1;
- if (fn->left) child = fn->left, children |= 2;
+ if (fn->right)
+ child = fn->right, children |= 1;
+ if (fn->left)
+ child = fn->left, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1180,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn) pn->right = child;
- else if (pn->left == fn) pn->left = child;
+ if (pn->right == fn)
+ pn->right = child;
+ else if (pn->left == fn)
+ pn->left = child;
#if RT6_DEBUG >= 2
else
WARN_ON(1);
@@ -1213,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
w->node = child;
if (children&2) {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
}
@@ -1314,7 +1354,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
- if (rt->dst.obsolete>0) {
+ if (rt->dst.obsolete > 0) {
WARN_ON(fn != NULL);
return -ENOENT;
}
@@ -1334,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
pn = pn->parent;
}
#endif
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
/*
@@ -1418,7 +1458,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
if (w->skip) {
w->skip--;
- continue;
+ goto skip;
}
err = w->func(w);
@@ -1428,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->count++;
continue;
}
+skip:
w->state = FWS_U;
case FWS_U:
if (fn == w->root)
@@ -1529,27 +1570,8 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
fib6_walk(&c.w);
}
-void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg),
- int prune, void *arg)
-{
- struct fib6_table *table;
- struct hlist_head *head;
- unsigned int h;
-
- rcu_read_lock();
- for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- read_lock_bh(&table->tb6_lock);
- fib6_clean_tree(net, &table->tb6_root,
- func, prune, arg);
- read_unlock_bh(&table->tb6_lock);
- }
- }
- rcu_read_unlock();
-}
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
- int prune, void *arg)
+ void *arg)
{
struct fib6_table *table;
struct hlist_head *head;
@@ -1561,7 +1583,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, prune, arg);
+ func, 0, arg);
write_unlock_bh(&table->tb6_lock);
}
}
@@ -1578,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
return 0;
}
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
{
- fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
+ fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
}
/*
@@ -1655,7 +1676,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
gc_args.more = icmp6_dst_gc();
- fib6_clean_all(net, fib6_age, 0, NULL);
+ fib6_clean_all(net, fib6_age, NULL);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -1726,7 +1747,7 @@ out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
return -ENOMEM;
- }
+}
static void fib6_net_exit(struct net *net)
{
@@ -1782,3 +1803,189 @@ void fib6_gc_cleanup(void)
unregister_pernet_subsys(&fib6_net_ops);
kmem_cache_destroy(fib6_node_kmem);
}
+
+#ifdef CONFIG_PROC_FS
+
+struct ipv6_route_iter {
+ struct seq_net_private p;
+ struct fib6_walker_t w;
+ loff_t skip;
+ struct fib6_table *tbl;
+ __u32 sernum;
+};
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+ struct rt6_info *rt = v;
+ struct ipv6_route_iter *iter = seq->private;
+
+ seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+#else
+ seq_puts(seq, "00000000000000000000000000000000 00 ");
+#endif
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+ else
+ seq_puts(seq, "00000000000000000000000000000000");
+
+ seq_printf(seq, " %08x %08x %08x %08x %8s\n",
+ rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+ rt->dst.__use, rt->rt6i_flags,
+ rt->dst.dev ? rt->dst.dev->name : "");
+ iter->w.leaf = NULL;
+ return 0;
+}
+
+static int ipv6_route_yield(struct fib6_walker_t *w)
+{
+ struct ipv6_route_iter *iter = w->args;
+
+ if (!iter->skip)
+ return 1;
+
+ do {
+ iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->skip--;
+ if (!iter->skip && iter->w.leaf)
+ return 1;
+ } while (iter->w.leaf);
+
+ return 0;
+}
+
+static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
+{
+ memset(&iter->w, 0, sizeof(iter->w));
+ iter->w.func = ipv6_route_yield;
+ iter->w.root = &iter->tbl->tb6_root;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ iter->w.args = iter;
+ iter->sernum = iter->w.root->fn_sernum;
+ INIT_LIST_HEAD(&iter->w.lh);
+ fib6_walker_link(&iter->w);
+}
+
+static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+ struct net *net)
+{
+ unsigned int h;
+ struct hlist_node *node;
+
+ if (tbl) {
+ h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
+ node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+ } else {
+ h = 0;
+ node = NULL;
+ }
+
+ while (!node && h < FIB6_TABLE_HASHSZ) {
+ node = rcu_dereference_bh(
+ hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
+ }
+ return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
+}
+
+static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+{
+ if (iter->sernum != iter->w.root->fn_sernum) {
+ iter->sernum = iter->w.root->fn_sernum;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ WARN_ON(iter->w.skip);
+ iter->w.skip = iter->w.count;
+ }
+}
+
+static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ int r;
+ struct rt6_info *n;
+ struct net *net = seq_file_net(seq);
+ struct ipv6_route_iter *iter = seq->private;
+
+ if (!v)
+ goto iter_table;
+
+ n = ((struct rt6_info *)v)->dst.rt6_next;
+ if (n) {
+ ++*pos;
+ return n;
+ }
+
+iter_table:
+ ipv6_route_check_sernum(iter);
+ read_lock(&iter->tbl->tb6_lock);
+ r = fib6_walk_continue(&iter->w);
+ read_unlock(&iter->tbl->tb6_lock);
+ if (r > 0) {
+ if (v)
+ ++*pos;
+ return iter->w.leaf;
+ } else if (r < 0) {
+ fib6_walker_unlink(&iter->w);
+ return NULL;
+ }
+ fib6_walker_unlink(&iter->w);
+
+ iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
+ if (!iter->tbl)
+ return NULL;
+
+ ipv6_route_seq_setup_walk(iter);
+ goto iter_table;
+}
+
+static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU_BH)
+{
+ struct net *net = seq_file_net(seq);
+ struct ipv6_route_iter *iter = seq->private;
+
+ rcu_read_lock_bh();
+ iter->tbl = ipv6_route_seq_next_table(NULL, net);
+ iter->skip = *pos;
+
+ if (iter->tbl) {
+ ipv6_route_seq_setup_walk(iter);
+ return ipv6_route_seq_next(seq, NULL, pos);
+ } else {
+ return NULL;
+ }
+}
+
+static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
+{
+ struct fib6_walker_t *w = &iter->w;
+ return w->node && !(w->state == FWS_U && w->node == w->root);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU_BH)
+{
+ struct ipv6_route_iter *iter = seq->private;
+
+ if (ipv6_route_iter_active(iter))
+ fib6_walker_unlink(&iter->w);
+
+ rcu_read_unlock_bh();
+}
+
+static const struct seq_operations ipv6_route_seq_ops = {
+ .start = ipv6_route_seq_start,
+ .next = ipv6_route_seq_next,
+ .stop = ipv6_route_seq_stop,
+ .show = ipv6_route_seq_show
+};
+
+int ipv6_route_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter));
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 46e88433ec7..4052694c6f2 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -15,9 +15,7 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/in6.h>
-#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -28,12 +26,7 @@
#include <net/sock.h>
#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
#include <net/rawv6.h>
-#include <net/icmp.h>
#include <net/transp_v6.h>
#include <asm/uaccess.h>
@@ -41,7 +34,7 @@
#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
in old IPv6 RFC. Well, it was reasonable value.
*/
-#define FL_MAX_LINGER 60 /* Maximal linger timeout */
+#define FL_MAX_LINGER 150 /* Maximal linger timeout */
/* FL hash table */
@@ -210,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
spin_lock_bh(&ip6_fl_lock);
if (label == 0) {
for (;;) {
- fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
+ fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
if (fl->label) {
lfl = __fl_lookup(net, fl->label);
if (lfl == NULL)
@@ -345,6 +338,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
expires = check_linger(expires);
if (!expires)
return -EPERM;
+
+ spin_lock_bh(&ip6_fl_lock);
fl->lastuse = jiffies;
if (time_before(fl->linger, linger))
fl->linger = linger;
@@ -352,6 +347,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
expires = fl->linger;
if (time_before(fl->expires, fl->lastuse + expires))
fl->expires = fl->lastuse + expires;
+ spin_unlock_bh(&ip6_fl_lock);
+
return 0;
}
@@ -453,8 +450,10 @@ static int mem_check(struct sock *sk)
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
+ rcu_read_lock_bh();
for_each_sk_fl_rcu(np, sfl)
count++;
+ rcu_read_unlock_bh();
if (room <= 0 ||
((count >= FL_MAX_PER_SOCK ||
@@ -465,34 +464,6 @@ static int mem_check(struct sock *sk)
return 0;
}
-static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
-{
- if (h1 == h2)
- return false;
- if (h1 == NULL || h2 == NULL)
- return true;
- if (h1->hdrlen != h2->hdrlen)
- return true;
- return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
-}
-
-static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
-{
- if (o1 == o2)
- return false;
- if (o1 == NULL || o2 == NULL)
- return true;
- if (o1->opt_nflen != o2->opt_nflen)
- return true;
- if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
- return true;
- if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
- return true;
- if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
- return true;
- return false;
-}
-
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
struct ip6_flowlabel *fl)
{
@@ -503,6 +474,43 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
spin_unlock_bh(&ip6_sk_fl_lock);
}
+int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
+ int flags)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_fl_socklist *sfl;
+
+ if (flags & IPV6_FL_F_REMOTE) {
+ freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
+ return 0;
+ }
+
+ if (np->repflow) {
+ freq->flr_label = np->flow_label;
+ return 0;
+ }
+
+ rcu_read_lock_bh();
+
+ for_each_sk_fl_rcu(np, sfl) {
+ if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+ spin_lock_bh(&ip6_fl_lock);
+ freq->flr_label = sfl->fl->label;
+ freq->flr_dst = sfl->fl->dst;
+ freq->flr_share = sfl->fl->share;
+ freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
+ freq->flr_linger = sfl->fl->linger / HZ;
+
+ spin_unlock_bh(&ip6_fl_lock);
+ rcu_read_unlock_bh();
+ return 0;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return -ENOENT;
+}
+
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
int uninitialized_var(err);
@@ -523,6 +531,15 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
switch (freq.flr_action) {
case IPV6_FL_A_PUT:
+ if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ if (!np->repflow)
+ return -ESRCH;
+ np->flow_label = 0;
+ np->repflow = 0;
+ return 0;
+ }
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
(sfl = rcu_dereference(*sflp))!=NULL;
@@ -563,6 +580,20 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
return -ESRCH;
case IPV6_FL_A_GET:
+ if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+ struct net *net = sock_net(sk);
+ if (net->ipv6.sysctl.flowlabel_consistency) {
+ net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
+ return -EPERM;
+ }
+
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+
+ np->repflow = 1;
+ return 0;
+ }
+
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
@@ -603,11 +634,6 @@ recheck:
uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
- err = -EINVAL;
- if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
- ipv6_opt_cmp(fl1->opt, fl->opt))
- goto release;
-
err = -ENOMEM;
if (sfl1 == NULL)
goto release;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6b26e9feafb..3873181ed85 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -61,9 +61,6 @@ static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
-#define IPV6_TCLASS_SHIFT 20
-
#define HASH_SIZE_SHIFT 5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
@@ -75,6 +72,7 @@ struct ip6gre_net {
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -356,10 +354,10 @@ failed_free:
static void ip6gre_tunnel_uninit(struct net_device *dev)
{
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
- ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+ ip6gre_tunnel_unlink(ign, t);
dev_put(dev);
}
@@ -470,17 +468,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
goto drop;
if (flags&GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
- if (!csum)
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
+ csum = skb_checksum_simple_validate(skb);
offset += 4;
}
if (flags&GRE_KEY) {
@@ -499,7 +487,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
&ipv6h->saddr, &ipv6h->daddr, key,
gre_proto);
if (tunnel) {
- struct pcpu_tstats *tstats;
+ struct pcpu_sw_netstats *tstats;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
@@ -614,11 +602,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int encap_limit,
__u32 *pmtu)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct net *net = tunnel->net;
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
@@ -693,7 +681,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -846,7 +834,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
@@ -976,12 +964,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
if (t->parms.o_flags&GRE_SEQ)
addend += 4;
}
+ t->hlen = addend;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, strict);
@@ -1002,8 +991,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
}
ip6_rt_put(rt);
}
-
- t->hlen = addend;
}
static int ip6gre_tnl_change(struct ip6_tnl *t,
@@ -1067,13 +1054,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
int err = 0;
struct ip6_tnl_parm2 p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
- t = NULL;
if (dev == ign->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
@@ -1081,9 +1067,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
ip6gre_tnl_parm_from_user(&p1, &p);
t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
@@ -1173,9 +1159,8 @@ done:
static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ip6_tnl *tunnel = netdev_priv(dev);
if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+ new_mtu > 0xFFF8 - dev->hard_header_len)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -1247,13 +1232,13 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->iflink = 0;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
static int ip6gre_tunnel_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
+ int i;
tunnel = netdev_priv(dev);
@@ -1267,10 +1252,17 @@ static int ip6gre_tunnel_init(struct net_device *dev)
if (ipv6_addr_any(&tunnel->parms.raddr))
dev->header_ops = &ip6gre_header_ops;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ for_each_possible_cpu(i) {
+ struct pcpu_sw_netstats *ip6gre_tunnel_stats;
+ ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i);
+ u64_stats_init(&ip6gre_tunnel_stats->syncp);
+ }
+
+
return 0;
}
@@ -1294,11 +1286,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
-static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
- struct list_head *head)
+static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct net_device *dev, *aux;
int prio;
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6gre_link_ops ||
+ dev->rtnl_link_ops == &ip6gre_tap_ops)
+ unregister_netdevice_queue(dev, head);
+
for (prio = 0; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
@@ -1307,7 +1305,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
t = rtnl_dereference(ign->tunnels[prio][h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev,
+ head);
t = rtnl_dereference(t->next);
}
}
@@ -1326,6 +1329,11 @@ static int __net_init ip6gre_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(ign->fb_tunnel_dev, net);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
@@ -1346,12 +1354,10 @@ err_alloc_dev:
static void __net_exit ip6gre_exit_net(struct net *net)
{
- struct ip6gre_net *ign;
LIST_HEAD(list);
- ign = net_generic(net, ip6gre_net_id);
rtnl_lock();
- ip6gre_destroy_tunnels(ign, &list);
+ ip6gre_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
@@ -1460,7 +1466,7 @@ static int ip6gre_tap_init(struct net_device *dev)
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
@@ -1528,15 +1534,14 @@ out:
static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip6_tnl *t, *nt;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t, *nt = netdev_priv(dev);
+ struct net *net = nt->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct __ip6_tnl_parm p;
if (dev == ign->fb_tunnel_dev)
return -EINVAL;
- nt = netdev_priv(dev);
ip6gre_netlink_parms(data, &p);
t = ip6gre_tunnel_locate(net, &p, 0);
@@ -1556,6 +1561,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return 0;
}
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6gre_get_size(const struct net_device *dev)
{
return
@@ -1633,6 +1647,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.validate = ip6gre_tunnel_validate,
.newlink = ip6gre_newlink,
.changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
};
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 302d6fb1ff2..51d54dc376f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,7 +49,7 @@
int ip6_rcv_finish(struct sk_buff *skb)
{
- if (sysctl_ip_early_demux && !skb_dst(skb)) {
+ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d82de722810..65eda2a8af4 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -66,7 +66,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
__skb_pull(skb, sizeof(*ipv6h));
err = -EPROTONOSUPPORT;
- rcu_read_lock();
ops = rcu_dereference(inet6_offloads[
ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
@@ -74,7 +73,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
skb_reset_transport_header(skb);
err = ops->callbacks.gso_send_check(skb);
}
- rcu_read_unlock();
out:
return err;
@@ -91,59 +89,103 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
- bool tunnel;
+ bool encap, udpfrag;
+ int nhoff;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
goto out;
+ skb_reset_network_header(skb);
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- tunnel = skb->encapsulation;
+ encap = SKB_GSO_CB(skb)->encap_level > 0;
+ if (encap)
+ features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
+
ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
segs = ERR_PTR(-EPROTONOSUPPORT);
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- rcu_read_lock();
+
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
}
- rcu_read_unlock();
if (IS_ERR(segs))
goto out;
for (skb = segs; skb; skb = skb->next) {
- ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(skb->len - skb->mac_len -
- sizeof(*ipv6h));
- if (!tunnel && proto == IPPROTO_UDP) {
+ ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
+ ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
+ skb->network_header = (u8 *)ipv6h - skb->head;
+
+ if (udpfrag) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)(skb_network_header(skb) +
- unfrag_ip6hlen);
+ fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
fptr->frag_off = htons(offset);
if (skb->next != NULL)
fptr->frag_off |= htons(IP6_MF);
offset += (ntohs(ipv6h->payload_len) -
sizeof(struct frag_hdr));
}
+ if (encap)
+ skb_reset_inner_headers(skb);
}
out:
return segs;
}
+/* Return the total length of all the extension hdrs, following the same
+ * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs.
+ */
+static int ipv6_exthdrs_len(struct ipv6hdr *iph,
+ const struct net_offload **opps)
+{
+ struct ipv6_opt_hdr *opth = (void *)iph;
+ int len = 0, proto, optlen = sizeof(*iph);
+
+ proto = iph->nexthdr;
+ for (;;) {
+ if (proto != NEXTHDR_HOP) {
+ *opps = rcu_dereference(inet6_offloads[proto]);
+ if (unlikely(!(*opps)))
+ break;
+ if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+ opth = (void *)opth + optlen;
+ optlen = ipv6_optlen(opth);
+ len += optlen;
+ proto = opth->nexthdr;
+ }
+ return len;
+}
+
static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -154,9 +196,8 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
unsigned int nlen;
unsigned int hlen;
unsigned int off;
- int flush = 1;
+ u16 flush = 1;
int proto;
- __wsum csum;
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
@@ -167,6 +208,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
goto out;
}
+ skb_set_network_header(skb, off);
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
@@ -201,12 +243,16 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
if (!NAPI_GRO_CB(p)->same_flow)
continue;
- iph2 = ipv6_hdr(p);
+ iph2 = (struct ipv6hdr *)(p->data + off);
first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
- /* All fields must match except length and Traffic Class. */
- if (nlen != skb_network_header_len(p) ||
- (first_word & htonl(0xF00FFFFF)) ||
+ /* All fields must match except length and Traffic Class.
+ * XXX skbs on the gro_list have all been parsed and pulled
+ * already so we don't need to compare nlen
+ * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops)))
+ * memcmp() alone below is suffcient, right?
+ */
+ if ((first_word & htonl(0xF00FFFFF)) ||
memcmp(&iph->nexthdr, &iph2->nexthdr,
nlen - offsetof(struct ipv6hdr, nexthdr))) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -219,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
NAPI_GRO_CB(skb)->flush |= flush;
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+ skb_gro_postpull_rcsum(skb, iph, nlen);
pp = ops->callbacks.gro_receive(head, skb);
- skb->csum = csum;
-
out_unlock:
rcu_read_unlock();
@@ -235,21 +278,21 @@ out:
return pp;
}
-static int ipv6_gro_complete(struct sk_buff *skb)
+static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
int err = -ENOSYS;
- iph->payload_len = htons(skb->len - skb_network_offset(skb) -
- sizeof(*iph));
+ iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
rcu_read_lock();
- ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
+
+ nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
- err = ops->callbacks.gro_complete(skb);
+ err = ops->callbacks.gro_complete(skb, nhoff);
out_unlock:
rcu_read_unlock();
@@ -267,6 +310,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
},
};
+static const struct net_offload sit_offload = {
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ },
+};
+
static int __init ipv6_offload_init(void)
{
@@ -278,6 +328,9 @@ static int __init ipv6_offload_init(void)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
dev_add_offload(&ipv6_packet_offload);
+
+ inet_add_offload(&sit_offload, IPPROTO_IPV6);
+
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3a692d52916..45702b8cd14 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ nexthop = rt6_nexthop((struct rt6_info *)dst);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_unlock_bh();
- IP6_INC_STATS_BH(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
@@ -125,13 +125,14 @@ static int ip6_finish_output2(struct sk_buff *skb)
static int ip6_finish_output(struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)))
+ dst_allfrag(skb_dst(skb)) ||
+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(skb, ip6_finish_output2);
else
return ip6_finish_output2(skb);
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -218,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
skb->mark = sk->sk_mark;
mtu = dst_mtu(dst);
- if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+ if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
@@ -320,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
return dst_output(skb);
}
+static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+{
+ unsigned int mtu;
+ struct inet6_dev *idev;
+
+ if (dst_metric_locked(dst, RTAX_MTU)) {
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ return mtu;
+ }
+
+ mtu = IPV6_MIN_MTU;
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+ return mtu;
+}
+
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
+ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+ return true;
+
+ if (skb->ignore_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -331,17 +371,18 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
@@ -369,8 +410,8 @@ int ip6_forward(struct sk_buff *skb)
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
@@ -383,14 +424,15 @@ int ip6_forward(struct sk_buff *skb)
if (proxied > 0)
return ip6_input(skb);
else if (proxied < 0) {
- IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
dst = skb_dst(skb);
@@ -438,25 +480,25 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = dst_mtu(dst);
+ mtu = ip6_dst_mtu_forward(dst);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+ if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INTOOBIGERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
if (skb_cow(skb, dst->dev->hard_header_len)) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_OUTDISCARDS);
goto drop;
}
@@ -492,12 +534,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
skb_copy_secmark(to, from);
}
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static u32 ip6_idents_hashrnd __read_mostly;
+ u32 hash, id;
+
+ net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+ id = ip_idents_reserve(hash, 1);
+ fhdr->identification = htonl(id);
+}
+
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -520,7 +573,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->local_df && skb->len > mtu) ||
+ if (unlikely(!skb->ignore_df && skb->len > mtu) ||
(IP6CB(skb)->frag_max_size &&
IP6CB(skb)->frag_max_size > mtu)) {
if (skb->sk && dst_allfrag(skb_dst(skb)))
@@ -874,7 +927,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -909,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
out_err_release:
if (err == -ENETUNREACH)
- IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
dst_release(*dst);
*dst = NULL;
return err;
@@ -937,7 +990,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
* @sk: socket which provides route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
- * @can_sleep: we are in a sleepable context
*
* This function performs a route lookup on the given flow.
*
@@ -945,8 +997,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
* error code.
*/
struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
- const struct in6_addr *final_dst,
- bool can_sleep)
+ const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
int err;
@@ -956,8 +1007,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
- if (can_sleep)
- fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
@@ -968,7 +1017,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
* @sk: socket which provides the dst cache and route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
- * @can_sleep: we are in a sleepable context
*
* This function performs a route lookup on the given flow with the
* possibility of using the cached route in the socket if it is valid.
@@ -979,8 +1027,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
* error code.
*/
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
- const struct in6_addr *final_dst,
- bool can_sleep)
+ const struct in6_addr *final_dst)
{
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
int err;
@@ -992,8 +1039,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
- if (can_sleep)
- fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
@@ -1008,6 +1053,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
{
struct sk_buff *skb;
+ struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1034,33 +1080,26 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->transport_header = skb->network_header + fragheaderlen;
skb->protocol = htons(ETH_P_IPV6);
- skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- }
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- struct frag_hdr fhdr;
-
- /* Specify the length of each IPv6 datagram fragment.
- * It has to be a multiple of 8.
- */
- skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
- sizeof(struct frag_hdr)) & ~7;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(&fhdr, rt);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
+ } else if (skb_is_gso(skb)) {
+ goto append;
}
- /* There is not enough support do UPD LSO,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ /* Specify the length of each IPv6 datagram fragment.
+ * It has to be a multiple of 8.
+ */
+ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+ sizeof(struct frag_hdr)) & ~7;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ ipv6_select_ident(&fhdr, rt);
+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+
+append:
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1080,21 +1119,19 @@ static void ip6_append_data_mtu(unsigned int *mtu,
unsigned int fragheaderlen,
struct sk_buff *skb,
struct rt6_info *rt,
- bool pmtuprobe)
+ unsigned int orig_mtu)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
if (skb == NULL) {
/* first fragment, reserve header_len */
- *mtu = *mtu - rt->dst.header_len;
+ *mtu = orig_mtu - rt->dst.header_len;
} else {
/*
* this fragment is not first, the headers
* space is regarded as data space.
*/
- *mtu = min(*mtu, pmtuprobe ?
- rt->dst.dev->mtu :
- dst_mtu(rt->dst.path));
+ *mtu = orig_mtu;
}
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ fragheaderlen - sizeof(struct frag_hdr);
@@ -1111,7 +1148,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork;
struct sk_buff *skb, *skb_prev = NULL;
- unsigned int maxfraglen, fragheaderlen, mtu;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
int exthdrlen;
int dst_exthdrlen;
int hh_len;
@@ -1167,10 +1204,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
- mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+ mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(&rt->dst);
else
- mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+ mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
@@ -1193,16 +1230,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
dst_exthdrlen = 0;
mtu = cork->fragsize;
}
+ orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
- if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
- ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
+ unsigned int maxnonfragsize, headersize;
+
+ headersize = sizeof(struct ipv6hdr) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+ (dst_allfrag(&rt->dst) ?
+ sizeof(struct frag_hdr) : 0) +
+ rt->rt6i_nfheader_len;
+
+ if (ip6_sk_ignore_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
+
+ /* dontfrag active */
+ if ((cork->length + length > mtu - headersize) && dontfrag &&
+ (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
+ sizeof(struct ipv6hdr));
+ goto emsgsize;
+ }
+
+ if (cork->length + length > maxnonfragsize - headersize) {
+emsgsize:
+ ipv6_local_error(sk, EMSGSIZE, fl6,
+ mtu - headersize +
+ sizeof(struct ipv6hdr));
return -EMSGSIZE;
}
}
@@ -1227,27 +1291,21 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji
*/
+ skb = skb_peek_tail(&sk->sk_write_queue);
cork->length += length;
- if (length > mtu) {
- int proto = sk->sk_protocol;
- if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
- ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
- return -EMSGSIZE;
- }
-
- if (proto == IPPROTO_UDP &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
-
- err = ip6_ufo_append_data(sk, getfrag, from, length,
- hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
- if (err)
- goto error;
- return 0;
- }
+ if (((length > mtu) ||
+ (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ if (!skb)
goto alloc_new_skb;
while (length > 0) {
@@ -1272,8 +1330,7 @@ alloc_new_skb:
if (skb == NULL || skb_prev == NULL)
ip6_append_data_mtu(&mtu, &maxfraglen,
fragheaderlen, skb, rt,
- np->pmtudisc ==
- IPV6_PMTUDISC_PROBE);
+ orig_mtu);
skb_prev = skb;
@@ -1501,8 +1558,7 @@ int ip6_push_pending_frames(struct sock *sk)
}
/* Allow local fragmentation. */
- if (np->pmtudisc < IPV6_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->ignore_df = ip6_sk_ignore_df(sk);
*final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
@@ -1529,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk)
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
err = ip6_local_out(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 61355f7f4da..afa08245836 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -29,7 +29,6 @@
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
-#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
@@ -62,6 +61,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
@@ -70,9 +70,6 @@ MODULE_ALIAS_NETDEV("ip6tnl0");
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif
-#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
-#define IPV6_TCLASS_SHIFT 20
-
#define HASH_SIZE_SHIFT 5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
@@ -103,16 +100,26 @@ struct ip6_tnl_net {
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
- struct pcpu_tstats sum = { 0 };
+ struct pcpu_sw_netstats tmp, sum = { 0 };
int i;
for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
-
- sum.rx_packets += tstats->rx_packets;
- sum.rx_bytes += tstats->rx_bytes;
- sum.tx_packets += tstats->tx_packets;
- sum.tx_bytes += tstats->tx_bytes;
+ unsigned int start;
+ const struct pcpu_sw_netstats *tstats =
+ per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
+ tmp.rx_packets = tstats->rx_packets;
+ tmp.rx_bytes = tstats->rx_bytes;
+ tmp.tx_packets = tstats->tx_packets;
+ tmp.tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+ sum.rx_packets += tmp.rx_packets;
+ sum.rx_bytes += tmp.rx_bytes;
+ sum.tx_packets += tmp.tx_packets;
+ sum.tx_bytes += tmp.tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
@@ -785,7 +792,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_tstats *tstats;
+ struct pcpu_sw_netstats *tstats;
if (t->parms.proto != ipproto && t->parms.proto != 0) {
rcu_read_unlock();
@@ -824,8 +831,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
}
tstats = this_cpu_ptr(t->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
netif_rx(skb);
@@ -1131,7 +1140,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
@@ -1332,8 +1341,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip6_tnl_parm p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t = NULL;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
switch (cmd) {
@@ -1345,11 +1354,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
ip6_tnl_parm_from_user(&p1, &p);
t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
} else {
memset(&p, 0, sizeof(p));
}
- if (t == NULL)
- t = netdev_priv(dev);
ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
@@ -1430,9 +1439,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static int
ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU) {
- return -EINVAL;
+ struct ip6_tnl *tnl = netdev_priv(dev);
+
+ if (tnl->parms.proto == IPPROTO_IPIP) {
+ if (new_mtu < 68)
+ return -EINVAL;
+ } else {
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
}
+ if (new_mtu > 0xFFF8 - dev->hard_header_len)
+ return -EINVAL;
dev->mtu = new_mtu;
return 0;
}
@@ -1489,7 +1506,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
return 0;
@@ -1541,7 +1558,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
u8 proto;
- if (!data)
+ if (!data || !data[IFLA_IPTUN_PROTO])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
@@ -1627,6 +1644,15 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
return ip6_tnl_update(t, &p);
}
+static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev != ip6n->fb_tnl_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6_tnl_get_size(const struct net_device *dev)
{
return
@@ -1656,9 +1682,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
- &parm->raddr) ||
- nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
&parm->laddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
@@ -1691,6 +1717,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = {
.validate = ip6_tnl_validate,
.newlink = ip6_tnl_newlink,
.changelink = ip6_tnl_changelink,
+ .dellink = ip6_tnl_dellink,
.get_size = ip6_tnl_get_size,
.fill_info = ip6_tnl_fill_info,
};
@@ -1707,9 +1734,9 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
{
- struct net *net = dev_net(ip6n->fb_tnl_dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
@@ -1731,8 +1758,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
unregister_netdevice_many(&list);
}
@@ -1752,6 +1777,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
@@ -1778,10 +1804,8 @@ err_alloc_dev:
static void __net_exit ip6_tnl_exit_net(struct net *net)
{
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
rtnl_lock();
- ip6_tnl_destroy_tunnels(ip6n);
+ ip6_tnl_destroy_tunnels(net);
rtnl_unlock();
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
new file mode 100644
index 00000000000..9aaa6bb229e
--- /dev/null
+++ b/net/ipv6/ip6_vti.c
@@ -0,0 +1,1160 @@
+/*
+ * IPv6 virtual tunneling interface
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv6/ip6_tunnel.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+ u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+static int vti6_dev_init(struct net_device *dev);
+static void vti6_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops vti6_link_ops __read_mostly;
+
+static int vti6_net_id __read_mostly;
+struct vti6_net {
+ /* the vti6 tunnel fallback device */
+ struct net_device *fb_tnl_dev;
+ /* lists for storing tunnels in use */
+ struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_wc[1];
+ struct ip6_tnl __rcu **tnls[2];
+};
+
+#define for_each_vti6_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/**
+ * vti6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @net: network namespace
+ * @remote: the address of the tunnel exit-point
+ * @local: the address of the tunnel entry-point
+ *
+ * Return:
+ * tunnel matching given end-points if found,
+ * else fallback tunnel if its device is up,
+ * else %NULL
+ **/
+static struct ip6_tnl *
+vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
+ const struct in6_addr *local)
+{
+ unsigned int hash = HASH(remote, local);
+ struct ip6_tnl *t;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ t = rcu_dereference(ip6n->tnls_wc[0]);
+ if (t && (t->dev->flags & IFF_UP))
+ return t;
+
+ return NULL;
+}
+
+/**
+ * vti6_tnl_bucket - get head of list matching given tunnel parameters
+ * @p: parameters containing tunnel end-points
+ *
+ * Description:
+ * vti6_tnl_bucket() returns the head of the list matching the
+ * &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list
+ **/
+static struct ip6_tnl __rcu **
+vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = 0;
+ int prio = 0;
+
+ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+ prio = 1;
+ h = HASH(remote, local);
+ }
+ return &ip6n->tnls[prio][h];
+}
+
+static void
+vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms);
+
+ rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void
+vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = vti6_tnl_bucket(ip6n, &t->parms);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static void vti6_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static int vti6_tnl_create2(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err;
+
+ err = vti6_dev_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &vti6_link_ops;
+
+ dev_hold(dev);
+ vti6_tnl_link(ip6n, t);
+
+ return 0;
+
+out:
+ return err;
+}
+
+static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
+{
+ struct net_device *dev;
+ struct ip6_tnl *t;
+ char name[IFNAMSIZ];
+ int err;
+
+ if (p->name[0])
+ strlcpy(name, p->name, IFNAMSIZ);
+ else
+ sprintf(name, "ip6_vti%%d");
+
+ dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+ if (dev == NULL)
+ goto failed;
+
+ dev_net_set(dev, net);
+
+ t = netdev_priv(dev);
+ t->parms = *p;
+ t->net = dev_net(dev);
+
+ err = vti6_tnl_create2(dev);
+ if (err < 0)
+ goto failed_free;
+
+ return t;
+
+failed_free:
+ vti6_dev_free(dev);
+failed:
+ return NULL;
+}
+
+/**
+ * vti6_locate - find or create tunnel matching given parameters
+ * @net: network namespace
+ * @p: tunnel parameters
+ * @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ * vti6_locate() first tries to locate an existing tunnel
+ * based on @parms. If this is unsuccessful, but @create is set a new
+ * tunnel device is created and registered for use.
+ *
+ * Return:
+ * matching tunnel or NULL
+ **/
+static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
+ int create)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *t;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ for (tp = vti6_tnl_bucket(ip6n, p);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr))
+ return t;
+ }
+ if (!create)
+ return NULL;
+ return vti6_tnl_create(net, p);
+}
+
+/**
+ * vti6_dev_uninit - tunnel device uninitializer
+ * @dev: the device to be destroyed
+ *
+ * Description:
+ * vti6_dev_uninit() removes tunnel from its list
+ **/
+static void vti6_dev_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
+ else
+ vti6_tnl_unlink(ip6n, t);
+ dev_put(dev);
+}
+
+static int vti6_rcv(struct sk_buff *skb)
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ rcu_read_lock();
+ if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+ &ipv6h->daddr)) != NULL) {
+ if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
+ t->dev->stats.rx_dropped++;
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
+
+ rcu_read_unlock();
+
+ return xfrm6_rcv(skb);
+ }
+ rcu_read_unlock();
+ return -EINVAL;
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
+/**
+ * vti6_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+static inline bool
+vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
+/**
+ * vti6_xmit - send a packet
+ * @skb: the outgoing socket buffer
+ * @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
+ **/
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *tdev;
+ int err = -1;
+
+ if (!dst)
+ goto tx_err_link_failure;
+
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
+ goto tx_err_link_failure;
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ t->parms.name);
+ goto tx_err_dst_release;
+ }
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
+
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
+ }
+
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(dst);
+ return err;
+}
+
+static netdev_tx_t
+vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
+ int ret;
+
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IPV6):
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ break;
+ default:
+ goto tx_err;
+ }
+
+ ret = vti6_xmit(skb, dev, &fl);
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ __u32 mark;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ mark = be32_to_cpu(t->parms.o_key);
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
+static void vti6_link_config(struct ip6_tnl *t)
+{
+ struct net_device *dev = t->dev;
+ struct __ip6_tnl_parm *p = &t->parms;
+
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
+ IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+}
+
+/**
+ * vti6_tnl_change - update the tunnel parameters
+ * @t: tunnel to be changed
+ * @p: tunnel configuration parameters
+ *
+ * Description:
+ * vti6_tnl_change() updates the tunnel parameters
+ **/
+static int
+vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.link = p->link;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ t->parms.proto = p->proto;
+ ip6_tnl_dst_reset(t);
+ vti6_link_config(t);
+ return 0;
+}
+
+static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err;
+
+ vti6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = vti6_tnl_change(t, p);
+ vti6_tnl_link(ip6n, t);
+ netdev_state_change(t->dev);
+ return err;
+}
+
+static void
+vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->link = u->link;
+ p->i_key = u->i_key;
+ p->o_key = u->o_key;
+ p->proto = u->proto;
+
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
+{
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->link = p->link;
+ u->i_key = p->i_key;
+ u->o_key = p->o_key;
+ u->proto = p->proto;
+
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
+/**
+ * vti6_tnl_ioctl - configure vti6 tunnels from userspace
+ * @dev: virtual device associated with tunnel
+ * @ifr: parameters passed from userspace
+ * @cmd: command to be performed
+ *
+ * Description:
+ * vti6_ioctl() is used for managing vti6 tunnels
+ * from userspace.
+ *
+ * The possible commands are the following:
+ * %SIOCGETTUNNEL: get tunnel parameters for device
+ * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ * %SIOCCHGTUNNEL: change tunnel parameters to those given
+ * %SIOCDELTUNNEL: delete tunnel
+ *
+ * The fallback device "ip6_vti0", created during module
+ * initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ * 0 on success,
+ * %-EFAULT if unable to copy data to or from userspace,
+ * %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ * %-EINVAL if passed tunnel parameters are invalid,
+ * %-EEXIST if changing a tunnel's parameters would cause a conflict
+ * %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+static int
+vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm2 p;
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t = NULL;
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ip6n->fb_tnl_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, 0);
+ } else {
+ memset(&p, 0, sizeof(p));
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+ vti6_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ break;
+ err = -EINVAL;
+ if (p.proto != IPPROTO_IPV6 && p.proto != 0)
+ break;
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL);
+ if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else
+ t = netdev_priv(dev);
+
+ err = vti6_update(t, &p1);
+ }
+ if (t) {
+ err = 0;
+ vti6_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+
+ if (dev == ip6n->fb_tnl_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ break;
+ err = -ENOENT;
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, 0);
+ if (t == NULL)
+ break;
+ err = -EPERM;
+ if (t->dev == ip6n->fb_tnl_dev)
+ break;
+ dev = t->dev;
+ }
+ err = 0;
+ unregister_netdevice(dev);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ return err;
+}
+
+/**
+ * vti6_tnl_change_mtu - change mtu manually for tunnel device
+ * @dev: virtual device associated with tunnel
+ * @new_mtu: the new mtu
+ *
+ * Return:
+ * 0 on success,
+ * %-EINVAL if mtu too small
+ **/
+static int vti6_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static const struct net_device_ops vti6_netdev_ops = {
+ .ndo_uninit = vti6_dev_uninit,
+ .ndo_start_xmit = vti6_tnl_xmit,
+ .ndo_do_ioctl = vti6_ioctl,
+ .ndo_change_mtu = vti6_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+};
+
+/**
+ * vti6_dev_setup - setup virtual tunnel device
+ * @dev: virtual device associated with tunnel
+ *
+ * Description:
+ * Initialize function pointers and device parameters
+ **/
+static void vti6_dev_setup(struct net_device *dev)
+{
+ dev->netdev_ops = &vti6_netdev_ops;
+ dev->destructor = vti6_dev_free;
+
+ dev->type = ARPHRD_TUNNEL6;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
+ dev->mtu = ETH_DATA_LEN;
+ dev->flags |= IFF_NOARP;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+/**
+ * vti6_dev_init_gen - general initializer for all tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+static inline int vti6_dev_init_gen(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ t->dev = dev;
+ t->net = dev_net(dev);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * vti6_dev_init - initializer for all non fallback tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+static int vti6_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int err = vti6_dev_init_gen(dev);
+
+ if (err)
+ return err;
+ vti6_link_config(t);
+ return 0;
+}
+
+/**
+ * vti6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * @dev: fallback device
+ *
+ * Return: 0
+ **/
+static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err = vti6_dev_init_gen(dev);
+
+ if (err)
+ return err;
+
+ t->parms.proto = IPPROTO_IPV6;
+ dev_hold(dev);
+
+ vti6_link_config(t);
+
+ rcu_assign_pointer(ip6n->tnls_wc[0], t);
+ return 0;
+}
+
+static int vti6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ return 0;
+}
+
+static void vti6_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_VTI_LINK])
+ parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
+
+ if (data[IFLA_VTI_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_VTI_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_VTI_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
+
+ if (data[IFLA_VTI_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+}
+
+static int vti6_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *nt;
+
+ nt = netdev_priv(dev);
+ vti6_netlink_parms(data, &nt->parms);
+
+ nt->parms.proto = IPPROTO_IPV6;
+
+ if (vti6_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return vti6_tnl_create2(dev);
+}
+
+static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t;
+ struct __ip6_tnl_parm p;
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ return -EINVAL;
+
+ vti6_netlink_parms(data, &p);
+
+ t = vti6_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ return vti6_update(t, &p);
+}
+
+static size_t vti6_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_VTI_LINK */
+ nla_total_size(4) +
+ /* IFLA_VTI_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_VTI_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_VTI_IKEY */
+ nla_total_size(4) +
+ /* IFLA_VTI_OKEY */
+ nla_total_size(4) +
+ 0;
+}
+
+static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
+ nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
+ &parm->laddr) ||
+ nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
+ nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
+ nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
+ [IFLA_VTI_LINK] = { .type = NLA_U32 },
+ [IFLA_VTI_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) },
+ [IFLA_VTI_IKEY] = { .type = NLA_U32 },
+ [IFLA_VTI_OKEY] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops vti6_link_ops __read_mostly = {
+ .kind = "vti6",
+ .maxtype = IFLA_VTI_MAX,
+ .policy = vti6_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = vti6_dev_setup,
+ .validate = vti6_validate,
+ .newlink = vti6_newlink,
+ .changelink = vti6_changelink,
+ .get_size = vti6_get_size,
+ .fill_info = vti6_fill_info,
+};
+
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+{
+ int h;
+ struct ip6_tnl *t;
+ LIST_HEAD(list);
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, &list);
+ t = rtnl_dereference(t->next);
+ }
+ }
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_many(&list);
+}
+
+static int __net_init vti6_init_net(struct net *net)
+{
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct ip6_tnl *t = NULL;
+ int err;
+
+ ip6n->tnls[0] = ip6n->tnls_wc;
+ ip6n->tnls[1] = ip6n->tnls_r_l;
+
+ err = -ENOMEM;
+ ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
+ vti6_dev_setup);
+
+ if (!ip6n->fb_tnl_dev)
+ goto err_alloc_dev;
+ dev_net_set(ip6n->fb_tnl_dev, net);
+
+ err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ err = register_netdev(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ t = netdev_priv(ip6n->fb_tnl_dev);
+
+ strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
+ return 0;
+
+err_register:
+ vti6_dev_free(ip6n->fb_tnl_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit vti6_exit_net(struct net *net)
+{
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ rtnl_lock();
+ vti6_destroy_tunnels(ip6n);
+ rtnl_unlock();
+}
+
+static struct pernet_operations vti6_net_ops = {
+ .init = vti6_init_net,
+ .exit = vti6_exit_net,
+ .id = &vti6_net_id,
+ .size = sizeof(struct vti6_net),
+};
+
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+/**
+ * vti6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+static int __init vti6_tunnel_init(void)
+{
+ int err;
+
+ err = register_pernet_device(&vti6_net_ops);
+ if (err < 0)
+ goto out_pernet;
+
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
+ if (err < 0) {
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = rtnl_link_register(&vti6_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+ return 0;
+
+rtnl_link_failed:
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+out:
+ unregister_pernet_device(&vti6_net_ops);
+out_pernet:
+ return err;
+}
+
+/**
+ * vti6_tunnel_cleanup - free resources and unregister protocol
+ **/
+static void __exit vti6_tunnel_cleanup(void)
+{
+ rtnl_link_unregister(&vti6_link_ops);
+ if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+
+ unregister_pernet_device(&vti6_net_ops);
+}
+
+module_init(vti6_tunnel_init);
+module_exit(vti6_tunnel_cleanup);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("vti6");
+MODULE_ALIAS_NETDEV("ip6_vti0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("IPv6 virtual tunnel interface");
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f365310bfcc..8250474ab7d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -141,9 +141,12 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr6_table **mrt)
{
- struct ip6mr_result res;
- struct fib_lookup_arg arg = { .result = &res, };
int err;
+ struct ip6mr_result res;
+ struct fib_lookup_arg arg = {
+ .result = &res,
+ .flags = FIB_LOOKUP_NOREF,
+ };
err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
flowi6_to_flowi(flp6), 0, &arg);
@@ -697,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct mr6_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
int err;
@@ -1630,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
{
struct mr6_table *mrt;
struct flowi6 fl6 = {
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
};
@@ -2346,13 +2349,14 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
+ u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
+ int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
int err;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2420,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
if (skb == NULL)
goto errout;
- err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
+ err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
if (err < 0)
goto errout;
@@ -2459,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0)
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
@@ -2473,7 +2478,8 @@ next_entry:
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0) {
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0) {
spin_unlock_bh(&mfc_unres_lock);
goto done;
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5636a912074..d1c793cffcb 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -16,8 +16,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* [Memo]
@@ -54,7 +53,7 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
@@ -64,22 +63,23 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
(struct ip_comp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
@@ -176,6 +176,11 @@ out:
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
@@ -188,11 +193,12 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
@@ -201,7 +207,7 @@ static int __init ipcomp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
@@ -211,7 +217,7 @@ static int __init ipcomp6_init(void)
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1e2e8ef29c..edb58aff4ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
if (ipv6_only_sock(sk) ||
- !ipv6_addr_v4mapped(&np->daddr)) {
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
retv = -EADDRNOTAVAIL;
break;
}
@@ -722,7 +722,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
@@ -1002,16 +1002,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
release_sock(sk);
if (skb) {
- int err = ip6_datagram_recv_ctl(sk, &msg, skb);
+ ip6_datagram_recv_ctl(sk, &msg, skb);
kfree_skb(skb);
- if (err)
- return err;
} else {
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
@@ -1019,20 +1017,27 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxtclass) {
- int tclass = np->rcv_tclass;
+ int tclass = (int)ip6_tclass(np->rcv_flowinfo);
+
put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
if (np->rxopt.bits.rxoinfo) {
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
+ np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
int hlim = np->mcast_hops;
put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
+ if (np->rxopt.bits.rxflow) {
+ __be32 flowinfo = np->rcv_flowinfo;
+
+ put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+ }
}
len -= msg.msg_controllen;
return put_user(len, optlen);
@@ -1211,6 +1216,37 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->sndflow;
break;
+ case IPV6_FLOWLABEL_MGR:
+ {
+ struct in6_flowlabel_req freq;
+ int flags;
+
+ if (len < sizeof(freq))
+ return -EINVAL;
+
+ if (copy_from_user(&freq, optval, sizeof(freq)))
+ return -EFAULT;
+
+ if (freq.flr_action != IPV6_FL_A_GET)
+ return -EINVAL;
+
+ len = sizeof(freq);
+ flags = freq.flr_flags;
+
+ memset(&freq, 0, sizeof(freq));
+
+ val = ipv6_flowlabel_opt_get(sk, &freq, flags);
+ if (val < 0)
+ return val;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &freq, len))
+ return -EFAULT;
+
+ return 0;
+ }
+
case IPV6_ADDR_PREFERENCES:
val = 0;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 096cd67b737..617f0958e16 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -999,7 +999,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
static void mld_gq_start_timer(struct inet6_dev *idev)
{
- unsigned long tv = net_random() % idev->mc_maxdelay;
+ unsigned long tv = prandom_u32() % idev->mc_maxdelay;
idev->mc_gq_running = 1;
if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
@@ -1015,7 +1015,7 @@ static void mld_gq_stop_timer(struct inet6_dev *idev)
static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- unsigned long tv = net_random() % delay;
+ unsigned long tv = prandom_u32() % delay;
if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
in6_dev_hold(idev);
@@ -1030,7 +1030,7 @@ static void mld_ifc_stop_timer(struct inet6_dev *idev)
static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- unsigned long tv = net_random() % delay;
+ unsigned long tv = prandom_u32() % delay;
if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
in6_dev_hold(idev);
@@ -1061,7 +1061,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
}
if (delay >= resptime)
- delay = net_random() % resptime;
+ delay = prandom_u32() % resptime;
ma->mca_timer.expires = jiffies + delay;
if (!mod_timer(&ma->mca_timer, jiffies + delay))
@@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb)
len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
len -= skb_network_header_len(skb);
- /* Drop queries with not link local source */
- if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ /* RFC3810 6.2
+ * Upon reception of an MLD message that contains a Query, the node
+ * checks if the source address of the message is a valid link-local
+ * address, if the Hop Limit is set to 1, and if the Router Alert
+ * option is present in the Hop-By-Hop Options header of the IPv6
+ * packet. If any of these checks fails, the packet is dropped.
+ */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+ ipv6_hdr(skb)->hop_limit != 1 ||
+ !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+ IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
return -EINVAL;
idev = __in6_dev_get(skb->dev);
@@ -1620,11 +1629,12 @@ static void mld_sendpack(struct sk_buff *skb)
dst_output);
out:
if (!err) {
- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
- } else
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ }
rcu_read_unlock();
return;
@@ -1665,7 +1675,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
skb_tailroom(skb)) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, int gdeleted, int sdeleted)
+ int type, int gdeleted, int sdeleted, int crsend)
{
struct inet6_dev *idev = pmc->idev;
struct net_device *dev = idev->dev;
@@ -1757,7 +1767,7 @@ empty_source:
if (type == MLD2_ALLOW_NEW_SOURCES ||
type == MLD2_BLOCK_OLD_SOURCES)
return skb;
- if (pmc->mca_crcount || isquery) {
+ if (pmc->mca_crcount || isquery || crsend) {
/* make sure we have room for group header */
if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
mld_sendpack(skb);
@@ -1789,7 +1799,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
type = MLD2_MODE_IS_EXCLUDE;
else
type = MLD2_MODE_IS_INCLUDE;
- skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
} else {
@@ -1798,7 +1808,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
type = MLD2_MODE_IS_EXCLUDE;
else
type = MLD2_MODE_IS_INCLUDE;
- skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
read_unlock_bh(&idev->lock);
@@ -1843,13 +1853,13 @@ static void mld_send_cr(struct inet6_dev *idev)
if (pmc->mca_sfmode == MCAST_INCLUDE) {
type = MLD2_BLOCK_OLD_SOURCES;
dtype = MLD2_BLOCK_OLD_SOURCES;
- skb = add_grec(skb, pmc, type, 1, 0);
- skb = add_grec(skb, pmc, dtype, 1, 1);
+ skb = add_grec(skb, pmc, type, 1, 0, 0);
+ skb = add_grec(skb, pmc, dtype, 1, 1, 0);
}
if (pmc->mca_crcount) {
if (pmc->mca_sfmode == MCAST_EXCLUDE) {
type = MLD2_CHANGE_TO_INCLUDE;
- skb = add_grec(skb, pmc, type, 1, 0);
+ skb = add_grec(skb, pmc, type, 1, 0, 0);
}
pmc->mca_crcount--;
if (pmc->mca_crcount == 0) {
@@ -1880,8 +1890,8 @@ static void mld_send_cr(struct inet6_dev *idev)
type = MLD2_ALLOW_NEW_SOURCES;
dtype = MLD2_BLOCK_OLD_SOURCES;
}
- skb = add_grec(skb, pmc, type, 0, 0);
- skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
+ skb = add_grec(skb, pmc, dtype, 0, 1, 0); /* deleted sources */
/* filter mode changes */
if (pmc->mca_crcount) {
@@ -1889,7 +1899,7 @@ static void mld_send_cr(struct inet6_dev *idev)
type = MLD2_CHANGE_TO_EXCLUDE;
else
type = MLD2_CHANGE_TO_INCLUDE;
- skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, type, 0, 0, 0);
pmc->mca_crcount--;
}
spin_unlock_bh(&pmc->mca_lock);
@@ -1997,27 +2007,36 @@ err_out:
goto out;
}
-static void mld_resend_report(struct inet6_dev *idev)
+static void mld_send_initial_cr(struct inet6_dev *idev)
{
- if (mld_in_v1_mode(idev)) {
- struct ifmcaddr6 *mcaddr;
- read_lock_bh(&idev->lock);
- for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) {
- if (!(mcaddr->mca_flags & MAF_NOREPORT))
- igmp6_send(&mcaddr->mca_addr, idev->dev,
- ICMPV6_MGM_REPORT);
- }
- read_unlock_bh(&idev->lock);
- } else {
- mld_send_report(idev, NULL);
+ struct sk_buff *skb;
+ struct ifmcaddr6 *pmc;
+ int type;
+
+ if (mld_in_v1_mode(idev))
+ return;
+
+ skb = NULL;
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ type = MLD2_CHANGE_TO_EXCLUDE;
+ else
+ type = MLD2_CHANGE_TO_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0, 1);
+ spin_unlock_bh(&pmc->mca_lock);
}
+ read_unlock_bh(&idev->lock);
+ if (skb)
+ mld_sendpack(skb);
}
void ipv6_mc_dad_complete(struct inet6_dev *idev)
{
idev->mc_dad_count = idev->mc_qrv;
if (idev->mc_dad_count) {
- mld_resend_report(idev);
+ mld_send_initial_cr(idev);
idev->mc_dad_count--;
if (idev->mc_dad_count)
mld_dad_start_timer(idev, idev->mc_maxdelay);
@@ -2028,13 +2047,13 @@ static void mld_dad_timer_expire(unsigned long data)
{
struct inet6_dev *idev = (struct inet6_dev *)data;
- mld_resend_report(idev);
+ mld_send_initial_cr(idev);
if (idev->mc_dad_count) {
idev->mc_dad_count--;
if (idev->mc_dad_count)
mld_dad_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2328,7 +2347,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
- delay = net_random() % unsolicited_report_interval(ma->idev);
+ delay = prandom_u32() % unsolicited_report_interval(ma->idev);
spin_lock_bh(&ma->mca_lock);
if (del_timer(&ma->mca_timer)) {
@@ -2379,7 +2398,7 @@ static void mld_gq_timer_expire(unsigned long data)
idev->mc_gq_running = 0;
mld_send_report(idev, NULL);
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_timer_expire(unsigned long data)
@@ -2392,7 +2411,7 @@ static void mld_ifc_timer_expire(unsigned long data)
if (idev->mc_ifc_count)
mld_ifc_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_event(struct inet6_dev *idev)
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 9ac01dc9402..db9b6cbc9db 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -13,8 +13,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* Authors:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f8a55ff1971..ca8d4ea48a5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -125,17 +125,19 @@ struct neigh_table nd_tbl = {
.id = "ndisc_cache",
.parms = {
.tbl = &nd_tbl,
- .base_reachable_time = ND_REACHABLE_TIME,
- .retrans_time = ND_RETRANS_TIMER,
- .gc_staletime = 60 * HZ,
.reachable_time = ND_REACHABLE_TIME,
- .delay_probe_time = 5 * HZ,
- .queue_len_bytes = 64*1024,
- .ucast_probes = 3,
- .mcast_probes = 3,
- .anycast_delay = 1 * HZ,
- .proxy_delay = (8 * HZ) / 10,
- .proxy_qlen = 64,
+ .data = {
+ [NEIGH_VAR_MCAST_PROBES] = 3,
+ [NEIGH_VAR_UCAST_PROBES] = 3,
+ [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
+ [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
+ [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
+ [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+ [NEIGH_VAR_PROXY_QLEN] = 64,
+ [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
+ [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
+ },
},
.gc_interval = 30 * HZ,
.gc_thresh1 = 128,
@@ -656,14 +658,14 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
saddr = &ipv6_hdr(skb)->saddr;
- if ((probes -= neigh->parms->ucast_probes) < 0) {
+ if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) {
if (!(neigh->nud_state & NUD_VALID)) {
ND_PRINTK(1, dbg,
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
ndisc_send_ns(dev, neigh, target, target, saddr);
- } else if ((probes -= neigh->parms->app_probes) < 0) {
+ } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
@@ -790,7 +792,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
inc &&
- idev->nd_parms->proxy_delay != 0) {
+ NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
/*
* for anycast or proxy,
* sender should delay its response
@@ -849,7 +851,7 @@ out:
static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
- const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
@@ -942,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/*
* Change: router to host
*/
- struct rt6_info *rt;
- rt = rt6_get_dflt_router(saddr, dev);
- if (rt)
- ip6_del_rt(rt);
+ rt6_clean_tohost(dev_net(dev), saddr);
}
out:
@@ -1210,7 +1209,7 @@ skip_defrtr:
rtime = (rtime*HZ)/1000;
if (rtime < HZ/10)
rtime = HZ/10;
- in6_dev->nd_parms->retrans_time = rtime;
+ NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
in6_dev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
}
@@ -1222,9 +1221,11 @@ skip_defrtr:
if (rtime < HZ/10)
rtime = HZ/10;
- if (rtime != in6_dev->nd_parms->base_reachable_time) {
- in6_dev->nd_parms->base_reachable_time = rtime;
- in6_dev->nd_parms->gc_staletime = 3 * rtime;
+ if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
+ NEIGH_VAR_SET(in6_dev->nd_parms,
+ BASE_REACHABLE_TIME, rtime);
+ NEIGH_VAR_SET(in6_dev->nd_parms,
+ GC_STALETIME, 3 * rtime);
in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
in6_dev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
@@ -1277,6 +1278,9 @@ skip_linkparms:
ri->prefix_len == 0)
continue;
#endif
+ if (ri->prefix_len == 0 &&
+ !in6_dev->cnf.accept_ra_defrtr)
+ continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
@@ -1648,22 +1652,23 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu
ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
if (strcmp(ctl->procname, "retrans_time") == 0)
- ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
else if (strcmp(ctl->procname, "base_reachable_time") == 0)
- ret = proc_dointvec_jiffies(ctl, write,
- buffer, lenp, ppos);
+ ret = neigh_proc_dointvec_jiffies(ctl, write,
+ buffer, lenp, ppos);
else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
(strcmp(ctl->procname, "base_reachable_time_ms") == 0))
- ret = proc_dointvec_ms_jiffies(ctl, write,
- buffer, lenp, ppos);
+ ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
+ buffer, lenp, ppos);
else
ret = -1;
if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
- if (ctl->data == &idev->nd_parms->base_reachable_time)
- idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+ if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
+ idev->nd_parms->reachable_time =
+ neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
idev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
@@ -1722,12 +1727,12 @@ int __init ndisc_init(void)
neigh_table_init(&nd_tbl);
#ifdef CONFIG_SYSCTL
- err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
+ err = neigh_sysctl_register(NULL, &nd_tbl.parms,
&ndisc_ifinfo_sysctl_change);
if (err)
goto out_unregister_pernet;
-#endif
out:
+#endif
return err;
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 95f3f1da0d7..d38e6a8d8b9 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb)
.daddr = iph->daddr,
.saddr = iph->saddr,
};
+ int err;
dst = ip6_route_output(net, skb->sk, &fl6);
- if (dst->error) {
+ err = dst->error;
+ if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
- return dst->error;
+ return err;
}
/* Drop old route. */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a7f842b29b6..4bff1f297e3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,36 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
+config NF_TABLES_IPV6
+ depends on NF_TABLES
+ tristate "IPv6 nf_tables support"
+ help
+ This option enables the IPv6 support for nf_tables.
+
+config NFT_CHAIN_ROUTE_IPV6
+ depends on NF_TABLES_IPV6
+ tristate "IPv6 nf_tables route chain support"
+ help
+ This option enables the "route" chain for IPv6 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, flowlabel, hop-limit and
+ the packet mark.
+
+config NFT_CHAIN_NAT_IPV6
+ depends on NF_TABLES_IPV6
+ depends on NF_NAT_IPV6 && NFT_NAT
+ tristate "IPv6 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv6 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NFT_REJECT_IPV6
+ depends on NF_TABLES_IPV6
+ default NFT_REJECT
+ tristate
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b53738f798..70d3dd66f2c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,12 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 44400c216dc..e080fbbbc0e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -1236,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 56eef30ee5f..544b0a9da1b 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -23,182 +23,18 @@
#include <linux/skbuff.h>
#include <linux/icmpv6.h>
#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/tcp.h>
#include <net/icmp.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
#include <net/flow.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_ipv6/ip6t_REJECT.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
-/* Send RST reply */
-static void send_reset(struct net *net, struct sk_buff *oldskb)
-{
- struct sk_buff *nskb;
- struct tcphdr otcph, *tcph;
- unsigned int otcplen, hh_len;
- int tcphoff, needs_ack;
- const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
- struct ipv6hdr *ip6h;
-#define DEFAULT_TOS_VALUE 0x0U
- const __u8 tclass = DEFAULT_TOS_VALUE;
- struct dst_entry *dst = NULL;
- u8 proto;
- __be16 frag_off;
- struct flowi6 fl6;
-
- if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
- (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
- pr_debug("addr is not unicast.\n");
- return;
- }
-
- proto = oip6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
-
- if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
- pr_debug("Cannot get TCP header.\n");
- return;
- }
-
- otcplen = oldskb->len - tcphoff;
-
- /* IP header checks: fragment, too short. */
- if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
- pr_debug("proto(%d) != IPPROTO_TCP, "
- "or too short. otcplen = %d\n",
- proto, otcplen);
- return;
- }
-
- if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
- BUG();
-
- /* No RST for RST. */
- if (otcph.rst) {
- pr_debug("RST is set\n");
- return;
- }
-
- /* Check checksum. */
- if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
- skb_checksum(oldskb, tcphoff, otcplen, 0))) {
- pr_debug("TCP checksum is invalid\n");
- return;
- }
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.saddr = oip6h->daddr;
- fl6.daddr = oip6h->saddr;
- fl6.fl6_sport = otcph.dest;
- fl6.fl6_dport = otcph.source;
- security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst == NULL || dst->error) {
- dst_release(dst);
- return;
- }
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
- if (IS_ERR(dst))
- return;
-
- hh_len = (dst->dev->hard_header_len + 15)&~15;
- nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
- + sizeof(struct tcphdr) + dst->trailer_len,
- GFP_ATOMIC);
-
- if (!nskb) {
- net_dbg_ratelimited("cannot alloc skb\n");
- dst_release(dst);
- return;
- }
-
- skb_dst_set(nskb, dst);
-
- skb_reserve(nskb, hh_len + dst->header_len);
-
- skb_put(nskb, sizeof(struct ipv6hdr));
- skb_reset_network_header(nskb);
- ip6h = ipv6_hdr(nskb);
- ip6_flow_hdr(ip6h, tclass, 0);
- ip6h->hop_limit = ip6_dst_hoplimit(dst);
- ip6h->nexthdr = IPPROTO_TCP;
- ip6h->saddr = oip6h->daddr;
- ip6h->daddr = oip6h->saddr;
-
- skb_reset_transport_header(nskb);
- tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
- /* Truncate to length (no data) */
- tcph->doff = sizeof(struct tcphdr)/4;
- tcph->source = otcph.dest;
- tcph->dest = otcph.source;
-
- if (otcph.ack) {
- needs_ack = 0;
- tcph->seq = otcph.ack_seq;
- tcph->ack_seq = 0;
- } else {
- needs_ack = 1;
- tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
- + otcplen - (otcph.doff<<2));
- tcph->seq = 0;
- }
-
- /* Reset flags */
- ((u_int8_t *)tcph)[13] = 0;
- tcph->rst = 1;
- tcph->ack = needs_ack;
- tcph->window = 0;
- tcph->urg_ptr = 0;
- tcph->check = 0;
-
- /* Adjust TCP checksum */
- tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
- &ipv6_hdr(nskb)->daddr,
- sizeof(struct tcphdr), IPPROTO_TCP,
- csum_partial(tcph,
- sizeof(struct tcphdr), 0));
-
- nf_ct_attach(nskb, oldskb);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
- /* If we use ip6_local_out for bridged traffic, the MAC source on
- * the RST will be ours, instead of the destination's. This confuses
- * some routers/firewalls, and they drop the packet. So we need to
- * build the eth header using the original destination's MAC as the
- * source, and send the RST packet directly.
- */
- if (oldskb->nf_bridge) {
- struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
- nskb->protocol = htons(ETH_P_IPV6);
- ip6h->payload_len = htons(sizeof(struct tcphdr));
- if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
- oeth->h_source, oeth->h_dest, nskb->len) < 0)
- return;
- dev_queue_xmit(nskb);
- } else
-#endif
- ip6_local_out(nskb);
-}
-
-static inline void
-send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
- unsigned int hooknum)
-{
- if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
- skb_in->dev = net->loopback_dev;
-
- icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
-}
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
@@ -209,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
- send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
break;
case IP6T_ICMP6_ADM_PROHIBITED:
- send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
break;
case IP6T_ICMP6_NOT_NEIGHBOUR:
- send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
break;
case IP6T_ICMP6_ADDR_UNREACH:
- send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
break;
case IP6T_ICMP6_PORT_UNREACH:
- send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+ nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
break;
case IP6T_ICMP6_ECHOREPLY:
/* Do nothing */
break;
case IP6T_TCP_RESET:
- send_reset(net, skb);
+ nf_send_reset6(net, skb, par->hooknum);
break;
default:
net_info_ratelimited("case %u not handled yet\n", reject->with);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 19cfea8dbca..a0d17270117 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -259,6 +259,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet,
this_cpu_inc(snet->stats->cookie_valid);
opts->mss = mss;
+ opts->options |= XT_SYNPROXY_OPT_MSS;
if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_check_timestamp_cookie(opts);
@@ -282,7 +283,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (th == NULL)
return NF_DROP;
- synproxy_parse_options(skb, par->thoff, th, &opts);
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
if (th->syn && !(th->ack || th->fin || th->rst)) {
/* Initial SYN from client */
@@ -311,7 +313,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -372,7 +374,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
/* fall through */
case TCP_CONNTRACK_SYN_SENT:
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
if (!th->syn && th->ack &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -395,7 +398,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
if (!th->syn || !th->ack)
break;
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->tsoff = opts.tsval - synproxy->its;
@@ -441,6 +446,7 @@ static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
static struct xt_target synproxy_tg6_reg __read_mostly = {
.name = "SYNPROXY",
.family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
.target = synproxy_tg6,
.targetsize = sizeof(struct xt_synproxy_info),
.checkentry = synproxy_tg6_check,
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index e0983f3648a..790e0c6b19e 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
struct ipv6hdr *iph = ipv6_hdr(skb);
bool ret = false;
struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
.flowi6_proto = iph->nexthdr,
.daddr = iph->saddr,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 29b44b14c5e..ca7f6c12808 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,13 +32,14 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c705907ae6a..307bbb782d1 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv6.ip6table_mangle);
/* INPUT/FORWARD */
- return ip6t_do_table(skb, hook, in, out,
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv6.ip6table_mangle);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 9b076d2d3a7..387d8b8fc18 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv6_fn(unsigned int hooknum,
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
__be16 frag_off;
int hdrlen;
u8 nexthdr;
@@ -90,17 +90,9 @@ nf_nat_ipv6_fn(unsigned int hooknum,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
@@ -111,7 +103,8 @@ nf_nat_ipv6_fn(unsigned int hooknum,
if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- hooknum, hdrlen))
+ ops->hooknum,
+ hdrlen))
return NF_DROP;
else
return NF_ACCEPT;
@@ -124,14 +117,14 @@ nf_nat_ipv6_fn(unsigned int hooknum,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
@@ -140,11 +133,11 @@ nf_nat_ipv6_fn(unsigned int hooknum,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -152,7 +145,7 @@ oif_changed:
}
static unsigned int
-nf_nat_ipv6_in(unsigned int hooknum,
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -161,7 +154,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -170,7 +163,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv6_out(unsigned int hooknum,
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -187,7 +180,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -209,7 +202,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv6_local_fn(unsigned int hooknum,
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -224,7 +217,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9a626d86720..5274740acec 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ce88d1d7e52..ab3b0219ecf 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@ static const struct xt_table security_table = {
};
static unsigned int
-ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d6e4dd8b58d..4cbc6b290dd 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv6_helper(unsigned int hooknum,
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
return helper->help(skb, protoff, ct, ctinfo);
}
-static unsigned int ipv6_confirm(unsigned int hooknum,
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -169,66 +169,16 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int __ipv6_conntrack_in(struct net *net,
- unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct sk_buff *reasm = skb->nfct_reasm;
- const struct nf_conn_help *help;
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This packet is fragmented and has reassembled packet. */
- if (reasm) {
- /* Reassembled packet isn't parsed yet ? */
- if (!reasm->nfct) {
- unsigned int ret;
-
- ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
- if (ret != NF_ACCEPT)
- return ret;
- }
-
- /* Conntrack helpers need the entire reassembled packet in the
- * POST_ROUTING hook. In case of unconfirmed connections NAT
- * might reassign a helper, so the entire packet is also
- * required.
- */
- ct = nf_ct_get(reasm, &ctinfo);
- if (ct != NULL && !nf_ct_is_untracked(ct)) {
- help = nfct_help(ct);
- if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
- nf_conntrack_get_reasm(reasm);
- NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
- (struct net_device *)in,
- (struct net_device *)out,
- okfn, NF_IP6_PRI_CONNTRACK + 1);
- return NF_DROP_ERR(-ECANCELED);
- }
- }
-
- nf_conntrack_get(reasm->nfct);
- skb->nfct = reasm->nfct;
- skb->nfctinfo = reasm->nfctinfo;
- return NF_ACCEPT;
- }
-
- return nf_conntrack_in(net, PF_INET6, hooknum, skb);
-}
-
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+ return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
}
-static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -239,7 +189,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+ return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
@@ -297,9 +247,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
struct nf_conn *ct;
- tuple.src.u3.in6 = inet6->rcv_saddr;
+ tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
- tuple.dst.u3.in6 = inet6->daddr;
+ tuple.dst.u3.in6 = sk->sk_v6_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.dst.protonum = sk->sk_protocol;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index dffdc1a389c..0d5279fd852 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -144,12 +144,24 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
+static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ u32 c;
+
+ net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
+ c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, nf_frags.rnd);
+ return c & (INETFRAGS_HASHSZ - 1);
+}
+
+
static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
const struct frag_queue *nq;
nq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
+ return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
static void nf_skb_free(struct sk_buff *skb)
@@ -185,7 +197,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.ecn = ecn;
read_lock_bh(&nf_frags.lock);
- hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
+ hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
local_bh_enable();
@@ -439,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
}
sub_frag_mem_limit(&fq->q, head->truesize);
- head->local_df = 1;
+ head->ignore_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
@@ -621,31 +633,16 @@ ret_orig:
return skb;
}
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
- struct net_device *in, struct net_device *out,
- int (*okfn)(struct sk_buff *))
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
struct sk_buff *s, *s2;
- unsigned int ret = 0;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
- nf_conntrack_put_reasm(s->nfct_reasm);
- nf_conntrack_get_reasm(skb);
- s->nfct_reasm = skb;
-
s2 = s->next;
s->next = NULL;
-
- if (ret != -ECANCELED)
- ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
- in, out, okfn,
- NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
- else
- kfree_skb(s);
-
+ consume_skb(s);
s = s2;
}
- nf_conntrack_put_reasm(skb);
}
static int nf_ct_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c..7b9a748c6ba 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
}
-static unsigned int ipv6_defrag(unsigned int hooknum,
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+ reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
/* queued */
if (reasm == NULL)
return NF_STOLEN;
@@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
if (reasm == skb)
return NF_ACCEPT;
- nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
- (struct net_device *)out, okfn);
+ nf_ct_frag6_consume_orig(reasm);
+
+ NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
+ (struct net_device *) in, (struct net_device *) out,
+ okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
return NF_STOLEN;
}
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 61aaf70f376..2205e8eeeac 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -69,8 +69,8 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr = (struct icmp6hdr *)(skb->data + hdroff);
l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
tuple, maniptype);
- if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
- hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+ if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, 0);
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 00000000000..0d812b31277
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+
+ return nft_do_chain_ipv6(ops, skb, in, out, okfn);
+}
+
+struct nft_af_info nft_af_ipv6 __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv6);
+
+static int nf_tables_ipv6_init_net(struct net *net)
+{
+ net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv6 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
+
+ if (nft_register_afinfo(net, net->nft.ipv6) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv6);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv6_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv6);
+ kfree(net->nft.ipv6);
+}
+
+static struct pernet_operations nf_tables_ipv6_net_ops = {
+ .init = nf_tables_ipv6_init_net,
+ .exit = nf_tables_ipv6_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv6 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_ipv6);
+ ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_ipv6);
+
+ return ret;
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
+ nft_unregister_chain_type(&filter_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
new file mode 100644
index 00000000000..d189fcb437f
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ipv6.h>
+
+/*
+ * IPv6 NAT chains
+ */
+
+static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+ ret = NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ if (ip6_route_me_harder(skb))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET6))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv6 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
+ [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
+ },
+};
+
+static int __init nft_chain_nat_ipv6_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv6);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_ipv6_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv6);
+}
+
+module_init(nft_chain_nat_ipv6_init);
+module_exit(nft_chain_nat_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
new file mode 100644
index 00000000000..42031299585
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ struct in6_addr saddr, daddr;
+ u_int8_t hop_limit;
+ u32 mark, flowlabel;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+
+ /* flowlabel and prio (includes version, which shouldn't change either */
+ flowlabel = *((u32 *)ipv6_hdr(skb));
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE &&
+ (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+ memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ skb->mark != mark ||
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv6 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv6);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv6);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 00000000000..0bc19fa8782
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+ .type = &nft_reject_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv6_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "reject",
+ .ops = &nft_reject_ipv6_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 827f795209c..5ec867e4a8b 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -6,35 +6,7 @@
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/addrconf.h>
-
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static atomic_t ipv6_fragmentation_id;
- int old, new;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
- }
-#endif
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
-}
-EXPORT_SYMBOL(ipv6_select_ident);
+#include <net/secure_seq.h>
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
@@ -106,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb)
if (len > IPV6_MAXPLEN)
len = 0;
ipv6_hdr(skb)->payload_len = htons(len);
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
skb_dst(skb)->dev, dst_output);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 18f19df4189..5b7a1ed2aba 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -31,7 +31,7 @@ struct proto pingv6_prot = {
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
- .connect = ip6_datagram_connect,
+ .connect = ip6_datagram_connect_v6_only,
.disconnect = udp_disconnect,
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -51,20 +51,19 @@ static struct inet_protosw pingv6_protosw = {
.protocol = IPPROTO_ICMPV6,
.prot = &pingv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
-static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
+ int *addr_len)
{
return -EAFNOSUPPORT;
}
-static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
struct sk_buff *skb)
{
- return -EAFNOSUPPORT;
}
static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
{
@@ -102,7 +101,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
return err;
if (msg->msg_name) {
- struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
u->sin6_family != AF_INET6) {
return -EINVAL;
@@ -116,7 +115,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
}
if (!iif)
@@ -135,6 +134,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
+ fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -144,7 +144,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1);
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
if (IS_ERR(dst))
return PTR_ERR(dst);
rt = (struct rt6_info *) dst;
@@ -167,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
pfh.wcheck = 0;
pfh.family = AF_INET6;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
@@ -181,8 +176,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
- ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6,
@@ -253,7 +248,9 @@ int __init pingv6_init(void)
return ret;
#endif
pingv6_ops.ipv6_recv_error = ipv6_recv_error;
- pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl;
+ pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl;
+ pingv6_ops.ip6_datagram_recv_specific_ctl =
+ ip6_datagram_recv_specific_ctl;
pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
@@ -266,7 +263,8 @@ int __init pingv6_init(void)
void pingv6_exit(void)
{
pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
- pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl;
pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 091d066a57b..3317440ea34 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
/* can be called either with percpu mib (pcpumib != NULL),
* or shared one (smib != NULL)
*/
-static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
@@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
}
}
-static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
int i;
@@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
- snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+ snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
+ snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
NULL, snmp6_icmp6_list);
snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
+ snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
NULL, snmp6_udp6_list);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
+ snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
NULL, snmp6_udplite6_list);
return 0;
}
@@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
struct inet6_dev *idev = (struct inet6_dev *)seq->private;
seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
- snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6,
+ snmp6_seq_show_item64(seq, idev->stats.ipv6,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
snmp6_icmp6_list);
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 22d1bd4670d..e048cf1bb6a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol
}
EXPORT_SYMBOL(inet6_add_protocol);
-/*
- * Remove a protocol from the hash tables.
- */
-
int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
int ret;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 58916bbb172..b2dc60b0c76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
sk_for_each_from(sk)
if (inet_sk(sk)->inet_num == num) {
- struct ipv6_pinfo *np = inet6_sk(sk);
if (!net_eq(sock_net(sk), net))
continue;
- if (!ipv6_addr_any(&np->daddr) &&
- !ipv6_addr_equal(&np->daddr, rmt_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
goto found;
if (is_multicast &&
inet6_mc_check(sk, loc_addr, rmt_addr))
@@ -251,6 +250,10 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+
+ if (addr->sin6_family != AF_INET6)
+ return -EINVAL;
+
addr_type = ipv6_addr_type(&addr->sin6_addr);
/* Raw sockets are IPv6 only */
@@ -302,7 +305,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
- np->rcv_saddr = addr->sin6_addr;
+ sk->sk_v6_rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
err = 0;
@@ -335,8 +338,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
}
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ return;
+ }
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
@@ -456,7 +461,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
int noblock, int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct sk_buff *skb;
size_t copied;
int err;
@@ -464,14 +469,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- if (addr_len)
- *addr_len=sizeof(*sin6);
-
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
- return ipv6_recv_rxpmtu(sk, msg, len);
+ return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -505,6 +507,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
IP6CB(skb)->iif);
+ *addr_len = sizeof(*sin6);
}
sock_recv_ts_and_drops(msg, sk, skb);
@@ -735,7 +738,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
- struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -793,7 +796,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- daddr = &flowlabel->dst;
}
}
@@ -802,8 +804,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
@@ -814,7 +816,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
return -EDESTADDRREQ;
proto = inet->inet_num;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
fl6.flowlabel = np->flow_label;
}
@@ -866,19 +868,13 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
fl6.flowi6_oif = np->ucast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -1211,7 +1207,7 @@ struct proto rawv6_prot = {
.owner = THIS_MODULE,
.close = rawv6_close,
.destroy = raw6_destroy,
- .connect = ip6_datagram_connect,
+ .connect = ip6_datagram_connect_v6_only,
.disconnect = udp_disconnect,
.ioctl = rawv6_ioctl,
.init = rawv6_init_sk,
@@ -1326,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1aeb473b2cc..cc85a9ba501 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -82,24 +82,24 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
* callers should be careful not to use the hash value outside the ipfrag_lock
* as doing so could race with ipfrag_hash_rnd being recalculated.
*/
-unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr, u32 rnd)
+static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
u32 c;
+ net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, rnd);
+ (__force u32)id, ip6_frags.rnd);
return c & (INETFRAGS_HASHSZ - 1);
}
-EXPORT_SYMBOL_GPL(inet6_hash_frag);
static unsigned int ip6_hashfn(struct inet_frag_queue *q)
{
struct frag_queue *fq;
fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
+ return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
}
bool ip6_frag_match(struct inet_frag_queue *q, void *a)
@@ -193,7 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.ecn = ecn;
read_lock(&ip6_frags.lock);
- hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
+ hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
if (IS_ERR_OR_NULL(q)) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c979dd96d82..f23fbd28a50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -66,8 +66,9 @@
#endif
enum rt6_nud_state {
- RT6_NUD_FAIL_HARD = -2,
- RT6_NUD_FAIL_SOFT = -1,
+ RT6_NUD_FAIL_HARD = -3,
+ RT6_NUD_FAIL_PROBE = -2,
+ RT6_NUD_FAIL_DO_RR = -1,
RT6_NUD_SUCCEED = 1
};
@@ -83,7 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_prohibit(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -101,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *gwaddr, int ifindex);
#endif
+static void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+ struct inet_peer_base *base;
+ struct inet_peer *peer;
+
+ base = inetpeer_base_ptr(rt->_rt6i_peer);
+ if (!base)
+ return;
+
+ peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+ if (peer) {
+ if (!rt6_set_peer(rt, peer))
+ inet_putpeer(peer);
+ }
+}
+
+static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
+{
+ if (rt6_has_peer(rt))
+ return rt6_peer_ptr(rt);
+
+ rt6_bind_peer(rt, create);
+ return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
+}
+
+static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
+{
+ return __rt6_get_peer(rt, 1);
+}
+
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
struct rt6_info *rt = (struct rt6_info *) dst;
@@ -116,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
unsigned long prev, new;
p = peer->metrics;
- if (inet_metrics_new(peer))
+ if (inet_metrics_new(peer) ||
+ (old & DST_METRICS_FORCE_OVERWRITE))
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
new = (unsigned long) p;
@@ -234,9 +268,6 @@ static const struct rt6_info ip6_null_entry_template = {
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
-
static const struct rt6_info ip6_prohibit_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
@@ -259,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard,
+ .output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -312,22 +343,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
}
}
-void rt6_bind_peer(struct rt6_info *rt, int create)
-{
- struct inet_peer_base *base;
- struct inet_peer *peer;
-
- base = inetpeer_base_ptr(rt->_rt6i_peer);
- if (!base)
- return;
-
- peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
- if (peer) {
- if (!rt6_set_peer(rt, peer))
- inet_putpeer(peer);
- }
-}
-
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how)
{
@@ -359,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static bool rt6_need_strict(const struct in6_addr *daddr)
-{
- return ipv6_addr_type(daddr) &
- (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
-}
-
/* Multipath route selection:
* Hash based function using packet header and flowlabel.
* Adapted from fib_info_hashfn()
@@ -476,6 +485,24 @@ out:
}
#ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+ struct work_struct work;
+ struct in6_addr target;
+ struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+ struct in6_addr mcaddr;
+ struct __rt6_probe_work *work =
+ container_of(w, struct __rt6_probe_work, work);
+
+ addrconf_addr_solict_mult(&work->target, &mcaddr);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ dev_put(work->dev);
+ kfree(w);
+}
+
static void rt6_probe(struct rt6_info *rt)
{
struct neighbour *neigh;
@@ -499,17 +526,23 @@ static void rt6_probe(struct rt6_info *rt)
if (!neigh ||
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct in6_addr mcaddr;
- struct in6_addr *target;
+ struct __rt6_probe_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (neigh) {
- neigh->updated = jiffies;
+ if (neigh && work)
+ __neigh_set_probe_once(neigh);
+
+ if (neigh)
write_unlock(&neigh->lock);
- }
- target = (struct in6_addr *)&rt->rt6i_gateway;
- addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
} else {
out:
write_unlock(&neigh->lock);
@@ -554,11 +587,13 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
#ifdef CONFIG_IPV6_ROUTER_PREF
else if (!(neigh->nud_state & NUD_FAILED))
ret = RT6_NUD_SUCCEED;
+ else
+ ret = RT6_NUD_FAIL_PROBE;
#endif
read_unlock(&neigh->lock);
} else {
ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
- RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
+ RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
}
rcu_read_unlock_bh();
@@ -595,16 +630,17 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
goto out;
m = rt6_score_route(rt, oif, strict);
- if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
+ if (m == RT6_NUD_FAIL_DO_RR) {
match_do_rr = true;
m = 0; /* lowest valid score */
- } else if (m < 0) {
+ } else if (m == RT6_NUD_FAIL_HARD) {
goto out;
}
if (strict & RT6_LOOKUP_F_REACHABLE)
rt6_probe(rt);
+ /* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) {
*do_rr = match_do_rr;
*mpri = m;
@@ -707,8 +743,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
prefix = &prefix_buf;
}
- rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
- dev->ifindex);
+ if (rinfo->prefix_len == 0)
+ rt = rt6_get_dflt_router(gwaddr, dev);
+ else
+ rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
+ gwaddr, dev->ifindex);
if (rt && !lifetime) {
ip6_del_rt(rt);
@@ -813,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup);
be destroyed.
*/
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info);
+ err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -831,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = {
.nl_net = dev_net(rt->dst.dev),
};
- return __ip6_ins_rt(rt, &info);
+ return __ip6_ins_rt(rt, &info, NULL, 0);
}
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -847,12 +887,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
rt = ip6_rt_copy(ort, daddr);
if (rt) {
- if (!(rt->rt6i_flags & RTF_GATEWAY)) {
- if (ort->rt6i_dst.plen != 128 &&
- ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
- rt->rt6i_flags |= RTF_ANYCAST;
- rt->rt6i_gateway = *daddr;
- }
+ if (ort->rt6i_dst.plen != 128 &&
+ ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
+ rt->rt6i_flags |= RTF_ANYCAST;
rt->rt6i_flags |= RTF_CACHE;
@@ -1021,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
if (dst_metrics_read_only(&ort->dst))
new->_metrics = ort->dst._metrics;
@@ -1064,10 +1101,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
return NULL;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
- return dst;
+ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ return NULL;
- return NULL;
+ if (rt6_check_expired(rt))
+ return NULL;
+
+ return dst;
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -1136,8 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.flowi6_flags = 0;
+ fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
@@ -1234,9 +1273,9 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
- fl6.flowi6_flags = 0;
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
@@ -1256,9 +1295,9 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
- fl6.flowi6_flags = 0;
fl6.daddr = msg->dest;
fl6.saddr = iph->daddr;
@@ -1301,7 +1340,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = IPV6_MIN_MTU;
@@ -1311,7 +1350,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
}
static struct dst_entry *icmp6_dst_gc_list;
@@ -1338,6 +1378,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
atomic_set(&rt->dst.__refcnt, 1);
+ rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
@@ -1414,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -1471,7 +1512,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
@@ -1501,17 +1542,11 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128)
- rt->dst.flags |= DST_HOST;
-
- if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
- u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (!metrics) {
- err = -ENOMEM;
- goto out;
- }
- dst_init_metrics(&rt->dst, metrics, 0);
+ if (rt->rt6i_dst.plen == 128) {
+ rt->dst.flags |= DST_HOST;
+ dst_metrics_set_force_overwrite(&rt->dst);
}
+
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1540,21 +1575,24 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
}
}
- rt->dst.output = ip6_pkt_discard_out;
- rt->dst.input = ip6_pkt_discard;
rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
+ rt->dst.output = dst_discard_sk;
+ rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
rt->dst.error = -EACCES;
+ rt->dst.output = ip6_pkt_prohibit_out;
+ rt->dst.input = ip6_pkt_prohibit;
break;
case RTN_THROW:
- rt->dst.error = -EAGAIN;
- break;
default:
- rt->dst.error = -ENETUNREACH;
+ rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
+ : -ENETUNREACH;
+ rt->dst.output = ip6_pkt_discard_out;
+ rt->dst.input = ip6_pkt_discard;
break;
}
goto install_route;
@@ -1627,31 +1665,13 @@ int ip6_route_add(struct fib6_config *cfg)
rt->rt6i_flags = cfg->fc_flags;
install_route:
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- if (type > RTAX_MAX) {
- err = -EINVAL;
- goto out;
- }
-
- dst_metric_set(&rt->dst, type, nla_get_u32(nla));
- }
- }
- }
-
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
cfg->fc_nlinfo.nl_net = dev_net(dev);
- return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
out:
if (dev)
@@ -1873,11 +1893,12 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies;
- rt->rt6i_gateway = ort->rt6i_gateway;
+ if (ort->rt6i_flags & RTF_GATEWAY)
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ else
+ rt->rt6i_gateway = *dest;
rt->rt6i_flags = ort->rt6i_flags;
- if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
- (RTF_DEFAULT | RTF_ADDRCONF))
- rt6_set_from(rt, ort);
+ rt6_set_from(rt, ort);
rt->rt6i_metric = 0;
#ifdef CONFIG_IPV6_SUBTREES
@@ -2110,27 +2131,23 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-
static int ip6_pkt_prohibit(struct sk_buff *skb)
{
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}
-#endif
-
/*
* Allocate a dst for local (unicast / anycast) address.
*/
@@ -2140,12 +2157,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
bool anycast)
{
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
-
- if (!rt) {
- net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
+ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
+ DST_NOCOUNT, NULL);
+ if (!rt)
return ERR_PTR(-ENOMEM);
- }
in6_dev_hold(idev);
@@ -2160,6 +2175,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
else
rt->rt6i_flags |= RTF_LOCAL;
+ rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
@@ -2215,7 +2231,28 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
.net = net,
.addr = &ifp->addr,
};
- fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
+ fib6_clean_all(net, fib6_remove_prefsrc, &adni);
+}
+
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+/* Remove routers and update dst entries when gateway turn into host. */
+static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+{
+ struct in6_addr *gateway = (struct in6_addr *)arg;
+
+ if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
+ ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ return -1;
+ }
+ return 0;
+}
+
+void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
+{
+ fib6_clean_all(net, fib6_clean_tohost, gateway);
}
struct arg_dev_net {
@@ -2242,7 +2279,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
.net = net,
};
- fib6_clean_all(net, fib6_ifdown, 0, &adn);
+ fib6_clean_all(net, fib6_ifdown, &adn);
icmp6_clean_all(fib6_ifdown, &adn);
}
@@ -2297,7 +2334,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
.mtu = mtu,
};
- fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
+ fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
@@ -2693,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
if (tb[RTA_OIF])
oif = nla_get_u32(tb[RTA_OIF]);
+ if (tb[RTA_MARK])
+ fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
+
if (iif) {
struct net_device *dev;
int flags = 0;
@@ -2800,56 +2840,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
#ifdef CONFIG_PROC_FS
-struct rt6_proc_arg
-{
- char *buffer;
- int offset;
- int length;
- int skip;
- int len;
-};
-
-static int rt6_info_route(struct rt6_info *rt, void *p_arg)
-{
- struct seq_file *m = p_arg;
-
- seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
-
-#ifdef CONFIG_IPV6_SUBTREES
- seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
-#else
- seq_puts(m, "00000000000000000000000000000000 00 ");
-#endif
- if (rt->rt6i_flags & RTF_GATEWAY) {
- seq_printf(m, "%pi6", &rt->rt6i_gateway);
- } else {
- seq_puts(m, "00000000000000000000000000000000");
- }
- seq_printf(m, " %08x %08x %08x %08x %8s\n",
- rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, rt->rt6i_flags,
- rt->dst.dev ? rt->dst.dev->name : "");
- return 0;
-}
-
-static int ipv6_route_show(struct seq_file *m, void *v)
-{
- struct net *net = (struct net *)m->private;
- fib6_clean_all_ro(net, rt6_info_route, 0, m);
- return 0;
-}
-
-static int ipv6_route_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, ipv6_route_show);
-}
-
static const struct file_operations ipv6_route_proc_fops = {
.owner = THIS_MODULE,
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release_net,
+ .release = seq_release_net,
};
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb96db3..4f408176dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -475,17 +475,48 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
+ ip_tunnel_dst_reset_all(tunnel);
dev_put(dev);
}
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
+ */
+static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct rt6_info *rt;
+ struct sk_buff *skb2;
+
+ if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8))
+ return 1;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+
+ if (!skb2)
+ return 1;
+
+ skb_dst_drop(skb2);
+ skb_pull(skb2, iph->ihl * 4);
+ skb_reset_network_header(skb2);
+
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
+
+ icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+
+ if (rt)
+ ip6_rt_put(rt);
+
+ kfree_skb(skb2);
+
+ return 0;
+}
static int ipip6_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
const struct iphdr *iph = (const struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
@@ -500,7 +531,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
/* Impossible event. */
return 0;
default:
@@ -530,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPV6, 0);
err = 0;
goto out;
@@ -545,6 +575,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
goto out;
err = 0;
+ if (!ipip6_err_gen_icmpv6_unreach(skb))
+ goto out;
+
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
@@ -566,6 +599,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
return false;
}
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -575,7 +672,7 @@ static int ipip6_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel != NULL) {
- struct pcpu_tstats *tstats;
+ struct pcpu_sw_netstats *tstats;
if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
tunnel->parms.iph.protocol != 0)
@@ -586,19 +683,9 @@ static int ipip6_rcv(struct sk_buff *skb)
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- if (tunnel->dev->priv_flags & IFF_ISATAP) {
- if (!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
- } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
- if (is_spoofed_6rd(tunnel, iph->saddr,
- &ipv6_hdr(skb)->saddr) ||
- is_spoofed_6rd(tunnel, iph->daddr,
- &ipv6_hdr(skb)->daddr)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
}
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -616,8 +703,10 @@ static int ipip6_rcv(struct sk_buff *skb)
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
netif_rx(skb);
@@ -748,7 +837,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -777,7 +866,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -838,7 +927,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (tunnel->parms.iph.daddr && skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ip_rt_put(rt);
goto tx_error;
@@ -865,7 +954,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
if (skb->sk)
@@ -879,21 +968,24 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
- ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
+ kfree_skb(skb);
+out:
dev->stats.tx_errors++;
- dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -902,13 +994,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ if (IS_ERR(skb))
+ goto out;
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
+out:
+ dev->stats.tx_errors++;
+ return NETDEV_TX_OK;
}
static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
@@ -929,7 +1023,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
tx_err:
dev->stats.tx_errors++;
- dev_kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -990,6 +1084,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
}
@@ -1020,6 +1115,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
return 0;
}
@@ -1031,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel_prl prl;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -1043,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
#endif
- t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
t = ipip6_tunnel_locate(net, &p, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
err = -EFAULT;
if (cmd == SIOCGETTUNNEL) {
@@ -1148,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
break;
@@ -1166,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
switch (cmd) {
case SIOCDELPRL:
@@ -1179,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
break;
@@ -1195,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
sizeof(ip6rd)))
goto done;
- t = netdev_priv(dev);
-
if (cmd != SIOCDEL6RD) {
err = ipip6_tunnel_update_6rd(t, &ip6rd);
if (err < 0)
@@ -1234,10 +1322,19 @@ static const struct net_device_ops ipip6_netdev_ops = {
static void ipip6_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
+#define SIT_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
+
static void ipip6_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip6_netdev_ops;
@@ -1251,6 +1348,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
+ dev->features |= SIT_FEATURES;
+ dev->hw_features |= SIT_FEATURES;
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -1264,10 +1363,16 @@ static int ipip6_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -1287,9 +1392,16 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
return 0;
@@ -1540,6 +1652,15 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
#endif
};
+static void ipip6_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ if (dev != sitn->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static struct rtnl_link_ops sit_link_ops __read_mostly = {
.kind = "sit",
.maxtype = IFLA_IPTUN_MAX,
@@ -1551,6 +1672,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
.changelink = ipip6_changelink,
.get_size = ipip6_get_size,
.fill_info = ipip6_fill_info,
+ .dellink = ipip6_dellink,
};
static struct xfrm_tunnel sit_handler __read_mostly = {
@@ -1565,9 +1687,10 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 2,
};
-static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
+static void __net_exit sit_destroy_tunnels(struct net *net,
+ struct list_head *head)
{
- struct net *net = dev_net(sitn->fb_tunnel_dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
struct net_device *dev, *aux;
int prio;
@@ -1612,6 +1735,7 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
@@ -1641,12 +1765,10 @@ err_alloc_dev:
static void __net_exit sit_exit_net(struct net *net)
{
- struct sit_net *sitn = net_generic(net, sit_net_id);
LIST_HEAD(list);
rtnl_lock();
- sit_destroy_tunnels(sitn, &list);
- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
+ sit_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
@@ -1706,4 +1828,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bf63ac8a49b..a822b880689 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -24,26 +24,23 @@
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
-/* Table must be sorted. */
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+
+/* RFC 2460, Section 8.3:
+ * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
+ *
+ * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows
+ * using higher values than ipv4 tcp syncookies.
+ * The other values are chosen based on ethernet (1500 and 9k MTU), plus
+ * one that accounts for common encap (PPPoe) overhead. Table must be sorted.
+ */
static __u16 const msstab[] = {
- 64,
- 512,
- 536,
- 1280 - 60,
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
1480 - 60,
1500 - 60,
- 4460 - 60,
9000 - 60,
};
-/*
- * This (misnamed) value is the age of syncookie which is permitted.
- * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
- * sysctl_tcp_retries1. It's a rather complicated formula (exponential
- * backoff) to compute at runtime so it's currently hardcoded here.
- */
-#define COUNTER_TRIES 4
-
static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
@@ -66,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
+ __u32 *tmp;
+
+ net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
+
+ tmp = __get_cpu_var(ipv6_cookie_scratch);
/*
* we have 320 bits of information to hash, copy in the remaining
- * 192 bits required for sha_transform, from the syncookie_secret
+ * 192 bits required for sha_transform, from the syncookie6_secret
* and overwrite the digest with the secret
*/
- memcpy(tmp + 10, syncookie_secret[c], 44);
+ memcpy(tmp + 10, syncookie6_secret[c], 44);
memcpy(tmp, saddr, 16);
memcpy(tmp + 4, daddr, 16);
tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
@@ -86,8 +87,9 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd
static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__be16 sport, __be16 dport, __u32 sseq,
- __u32 count, __u32 data)
+ __u32 data)
{
+ u32 count = tcp_cookie_time();
return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
sseq + (count << COOKIEBITS) +
((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
@@ -96,15 +98,14 @@ static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
const struct in6_addr *daddr, __be16 sport,
- __be16 dport, __u32 sseq, __u32 count,
- __u32 maxdiff)
+ __be16 dport, __u32 sseq)
{
- __u32 diff;
+ __u32 diff, count = tcp_cookie_time();
cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
- if (diff >= maxdiff)
+ if (diff >= MAX_SYNCOOKIE_AGE)
return (__u32)-1;
return (cookie -
@@ -125,8 +126,7 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
*mssp = msstab[mssind];
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
- th->dest, ntohl(th->seq),
- jiffies / (HZ * 60), mssind);
+ th->dest, ntohl(th->seq), mssind);
}
EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
@@ -146,8 +146,7 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq,
- jiffies / (HZ * 60), COUNTER_TRIES);
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
@@ -157,7 +156,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
struct inet_request_sock *ireq;
- struct inet6_request_sock *ireq6;
struct tcp_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -194,7 +192,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ireq = inet_rsk(req);
- ireq6 = inet6_rsk(req);
treq = tcp_rsk(req);
treq->listener = NULL;
@@ -202,22 +199,24 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out_free;
req->mss = mss;
- ireq->rmt_port = th->source;
- ireq->loc_port = th->dest;
- ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
- ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
- ireq6->pktopts = skb;
+ ireq->pktopts = skb;
}
- ireq6->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq6->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
+
+ ireq->ir_mark = inet_request_mark(sk, skb);
req->expires = 0UL;
req->num_retrans = 0;
@@ -241,16 +240,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = ireq6->rmt_addr;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
final_p = fl6_update_dst(&fl6, np->opt, &final);
- fl6.saddr = ireq6->loc_addr;
+ fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
+ fl6.flowi6_mark = ireq->ir_mark;
+ fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 107b2f1d90a..058f3eca2e5 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -24,6 +24,27 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "anycast_src_echo_reply",
+ .data = &init_net.ipv6.sysctl.anycast_src_echo_reply,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "flowlabel_consistency",
+ .data = &init_net.ipv6.sysctl.flowlabel_consistency,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "fwmark_reflect",
+ .data = &init_net.ipv6.sysctl.fwmark_reflect,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -51,6 +72,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
if (!ipv6_table)
goto out;
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
+ ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
+ ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5c71501fc91..229239ad96b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -39,7 +39,7 @@
#include <linux/ipsec.h>
#include <linux/times.h>
#include <linux/slab.h>
-
+#include <linux/uaccess.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
@@ -65,8 +65,6 @@
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
-#include <asm/uaccess.h>
-
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -156,7 +154,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- usin->sin6_addr = flowlabel->dst;
fl6_sock_release(flowlabel);
}
}
@@ -165,12 +162,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* connect() to INADDR_ANY means loopback (BSD'ism).
*/
- if(ipv6_addr_any(&usin->sin6_addr))
+ if (ipv6_addr_any(&usin->sin6_addr))
usin->sin6_addr.s6_addr[15] = 0x1;
addr_type = ipv6_addr_type(&usin->sin6_addr);
- if(addr_type & IPV6_ADDR_MULTICAST)
+ if (addr_type & IPV6_ADDR_MULTICAST)
return -ENETUNREACH;
if (addr_type&IPV6_ADDR_LINKLOCAL) {
@@ -192,13 +189,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (tp->rx_opt.ts_recent_stamp &&
- !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+ !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
- np->daddr = usin->sin6_addr;
+ sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
/*
@@ -237,17 +234,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
} else {
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &np->rcv_saddr);
+ &sk->sk_v6_rcv_saddr);
}
return err;
}
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ saddr = &sk->sk_v6_rcv_saddr;
fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
@@ -258,7 +255,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
@@ -266,7 +263,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (saddr == NULL) {
saddr = &fl6.saddr;
- np->rcv_saddr = *saddr;
+ sk->sk_v6_rcv_saddr = *saddr;
}
/* set the source address */
@@ -279,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
@@ -298,7 +295,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
@@ -337,13 +334,14 @@ static void tcp_v6_mtu_reduced(struct sock *sk)
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
struct ipv6_pinfo *np;
struct sock *sk;
int err;
struct tcp_sock *tp;
- __u32 seq;
+ struct request_sock *fastopen;
+ __u32 seq, snd_una;
struct net *net = dev_net(skb->dev);
sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
@@ -374,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp = tcp_sk(sk);
seq = ntohl(th->seq);
+ /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+ fastopen = tp->fastopen_rsk;
+ snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt)) {
+ !between(seq, snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -398,6 +399,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk->sk_state == TCP_LISTEN)
goto out;
+ if (!ip6_sk_accept_pmtu(sk))
+ goto out;
+
tp->mtu_info = ntohl(info);
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
@@ -436,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen.
- It can, it SYNs are crossed. --ANK */
+ case TCP_SYN_RECV:
+ /* Only in fast or simultaneous open. If a fast open socket is
+ * is already accepted it is treated as a connected one below.
+ */
+ if (fastopen && fastopen->sk == NULL)
+ break;
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
@@ -463,23 +472,28 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
struct flowi6 *fl6,
struct request_sock *req,
- u16 queue_mapping)
+ u16 queue_mapping,
+ struct tcp_fastopen_cookie *foc)
{
- struct inet6_request_sock *treq = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sk_buff * skb;
+ struct sk_buff *skb;
int err = -ENOMEM;
/* First, grab a route. */
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, NULL);
+ skb = tcp_make_synack(sk, dst, req, foc);
if (skb) {
- __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
+ __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
+ &ireq->ir_v6_rmt_addr);
+
+ fl6->daddr = ireq->ir_v6_rmt_addr;
+ if (np->repflow && (ireq->pktopts != NULL))
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- fl6->daddr = treq->rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
@@ -494,15 +508,17 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
struct flowi6 fl6;
int res;
- res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
- if (!res)
+ res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
- kfree_skb(inet6_rsk(req)->pktopts);
+ kfree_skb(inet_rsk(req)->pktopts);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -515,17 +531,17 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
struct sock *addr_sk)
{
- return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
}
static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
struct request_sock *req)
{
- return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
+ return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
}
-static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
- int optlen)
+static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
+ int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
@@ -621,10 +637,10 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
if (sk) {
saddr = &inet6_sk(sk)->saddr;
- daddr = &inet6_sk(sk)->daddr;
+ daddr = &sk->sk_v6_daddr;
} else if (req) {
- saddr = &inet6_rsk(req)->loc_addr;
- daddr = &inet6_rsk(req)->rmt_addr;
+ saddr = &inet_rsk(req)->ir_v6_loc_addr;
+ daddr = &inet_rsk(req)->ir_v6_rmt_addr;
} else {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
saddr = &ip6h->saddr;
@@ -709,7 +725,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
- .syn_ack_timeout = tcp_syn_ack_timeout,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -720,8 +736,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
#endif
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
- u32 tsval, u32 tsecr,
- struct tcp_md5sig_key *key, int rst, u8 tclass)
+ u32 tsval, u32 tsecr, int oif,
+ struct tcp_md5sig_key *key, int rst, u8 tclass,
+ u32 label)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -783,6 +800,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
memset(&fl6, 0, sizeof(fl6));
fl6.daddr = ipv6_hdr(skb)->saddr;
fl6.saddr = ipv6_hdr(skb)->daddr;
+ fl6.flowlabel = label;
buff->ip_summed = CHECKSUM_PARTIAL;
buff->csum = 0;
@@ -790,8 +808,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
fl6.flowi6_proto = IPPROTO_TCP;
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = inet6_iif(skb);
+ else
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -800,7 +821,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
* Underlying function will use this to retrieve the network
* namespace
*/
- dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
+ dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
@@ -825,6 +846,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
+ int oif;
if (th->rst)
return;
@@ -868,7 +890,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
+ oif = sk ? sk->sk_bound_dev_if : 0;
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
release_sk1:
@@ -880,10 +903,12 @@ release_sk1:
}
static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 tsval, u32 tsecr,
- struct tcp_md5sig_key *key, u8 tclass)
+ u32 win, u32 tsval, u32 tsecr, int oif,
+ struct tcp_md5sig_key *key, u8 tclass,
+ u32 label)
{
- tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
+ tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
+ label);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -894,8 +919,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass);
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+ tw->tw_tclass, (tw->tw_flowlabel << 12));
inet_twsk_put(tw);
}
@@ -903,13 +928,19 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
- req->rcv_wnd, tcp_time_stamp, req->ts_recent,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+ */
+ tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ tcp_rsk(req)->rcv_nxt,
+ req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ 0, 0);
}
-static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
{
struct request_sock *req, **prev;
const struct tcphdr *th = tcp_hdr(skb);
@@ -949,13 +980,15 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tmp_opt;
struct request_sock *req;
- struct inet6_request_sock *treq;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
+ struct tcp_fastopen_cookie foc = { .len = -1 };
+ bool want_cookie = false, fastopen;
struct flowi6 fl6;
- bool want_cookie = false;
+ int err;
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -986,7 +1019,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -994,25 +1027,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
- treq = inet6_rsk(req);
- treq->rmt_addr = ipv6_hdr(skb)->saddr;
- treq->loc_addr = ipv6_hdr(skb)->daddr;
+ ireq = inet_rsk(req);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (!want_cookie || tmp_opt.tstamp_ok)
TCP_ECN_create_request(req, skb, sock_net(sk));
- treq->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_mark = inet_request_mark(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- treq->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
if (!isn) {
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
+ np->repflow) {
atomic_inc(&skb->users);
- treq->pktopts = skb;
+ ireq->pktopts = skb;
}
if (want_cookie) {
@@ -1051,26 +1086,34 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
* to the moment of synflood.
*/
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
- &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+ &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
goto drop_and_release;
}
isn = tcp_v6_init_sequence(skb);
}
have_isn:
- tcp_rsk(req)->snt_isn = isn;
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
- if (tcp_v6_send_synack(sk, dst, &fl6, req,
- skb_get_queue_mapping(skb)) ||
- want_cookie)
+ if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
goto drop_and_free;
+ tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_rsk(req)->listener = NULL;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ tcp_openreq_init_rwin(req, sk, dst);
+ fastopen = !want_cookie &&
+ tcp_try_fastopen(sk, skb, req, &foc, dst);
+ err = tcp_v6_send_synack(sk, dst, &fl6, req,
+ skb_get_queue_mapping(skb), &foc);
+ if (!fastopen) {
+ if (err || want_cookie)
+ goto drop_and_free;
+
+ tcp_rsk(req)->listener = NULL;
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ }
return 0;
drop_and_release:
@@ -1082,11 +1125,11 @@ drop:
return 0; /* don't send reset */
}
-static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
+static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
{
- struct inet6_request_sock *treq;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
@@ -1116,11 +1159,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- newnp->rcv_saddr = newnp->saddr;
+ newsk->sk_v6_rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1134,7 +1177,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ if (np->repflow)
+ newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1151,7 +1196,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
}
- treq = inet6_rsk(req);
+ ireq = inet_rsk(req);
if (sk_acceptq_is_full(sk))
goto out_overflow;
@@ -1185,10 +1230,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- newnp->daddr = treq->rmt_addr;
- newnp->saddr = treq->loc_addr;
- newnp->rcv_saddr = treq->loc_addr;
- newsk->sk_bound_dev_if = treq->iif;
+ newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+ newnp->saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
@@ -1203,18 +1248,20 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts,
+ if (ireq->pktopts != NULL) {
+ newnp->pktoptions = skb_clone(ireq->pktopts,
sk_gfp_atomic(sk, GFP_ATOMIC));
- consume_skb(treq->pktopts);
- treq->pktopts = NULL;
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
if (newnp->pktoptions)
skb_set_owner_r(newnp->pktoptions, newsk);
}
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ if (np->repflow)
+ newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/* Clone native IPv6 options from listening socket (if any)
@@ -1230,7 +1277,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
newnp->opt->opt_flen);
- tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric_advmss(dst);
if (tcp_sk(sk)->rx_opt.user_mss &&
@@ -1244,13 +1290,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ if (key != NULL) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
AF_INET6, key->key, key->keylen,
sk_gfp_atomic(sk, GFP_ATOMIC));
}
@@ -1274,26 +1321,6 @@ out:
return NULL;
}
-static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- }
-
- skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0));
-
- if (skb->len <= 76) {
- return __skb_checksum_complete(skb);
- }
- return 0;
-}
-
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1320,7 +1347,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return tcp_v4_do_rcv(sk, skb);
#ifdef CONFIG_TCP_MD5SIG
- if (tcp_v6_inbound_md5_hash (sk, skb))
+ if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard;
#endif
@@ -1379,7 +1406,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* otherwise we just shortcircuit this and continue with
* the new socket..
*/
- if(nsk != sk) {
+ if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
@@ -1424,8 +1451,10 @@ ipv6_pktoptions:
np->mcast_oif = inet6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
- if (np->rxopt.bits.rxtclass)
- np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
+ if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
+ np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+ if (np->repflow)
+ np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1465,7 +1494,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
- if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
+ if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
goto csum_error;
th = tcp_hdr(skb);
@@ -1585,7 +1614,8 @@ do_time_wait:
break;
case TCP_TW_RST:
goto no_tcp_socket;
- case TCP_TW_SUCCESS:;
+ case TCP_TW_SUCCESS:
+ ;
}
goto discard_it;
}
@@ -1630,7 +1660,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
+ .twsk_destructor = tcp_twsk_destructor,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1664,7 +1694,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
/*
* TCP over IPv4 via INET6 API
*/
-
static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
@@ -1722,8 +1751,8 @@ static void get_openreq6(struct seq_file *seq,
const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
{
int ttd = req->expires - jiffies;
- const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
- const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
+ const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
+ const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
if (ttd < 0)
ttd = 0;
@@ -1734,12 +1763,12 @@ static void get_openreq6(struct seq_file *seq,
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
- ntohs(inet_rsk(req)->loc_port),
+ inet_rsk(req)->ir_num,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3],
- ntohs(inet_rsk(req)->rmt_port),
+ ntohs(inet_rsk(req)->ir_rmt_port),
TCP_SYN_RECV,
- 0,0, /* could print option size, but that is af dependent. */
+ 0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
req->num_timeout,
@@ -1758,10 +1787,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
- const struct ipv6_pinfo *np = inet6_sk(sp);
+ struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
- dest = &np->daddr;
- src = &np->rcv_saddr;
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
@@ -1799,9 +1828,11 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
atomic_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
- (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
+ (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ sp->sk_state == TCP_LISTEN ?
+ (fastopenq ? fastopenq->max_qlen : 0) :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
@@ -1810,11 +1841,10 @@ static void get_timewait6_sock(struct seq_file *seq,
{
const struct in6_addr *dest, *src;
__u16 destp, srcp;
- const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
- long delta = tw->tw_ttd - jiffies;
+ s32 delta = tw->tw_ttd - inet_tw_time_stamp();
- dest = &tw6->tw_v6_daddr;
- src = &tw6->tw_v6_rcv_saddr;
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
destp = ntohs(tw->tw_dport);
srcp = ntohs(tw->tw_sport);
@@ -1834,6 +1864,7 @@ static void get_timewait6_sock(struct seq_file *seq,
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
+ struct sock *sk = v;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
@@ -1849,14 +1880,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
case TCP_SEQ_STATE_ESTABLISHED:
- get_tcp6_sock(seq, v, st->num);
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else
+ get_tcp6_sock(seq, v, st->num);
break;
case TCP_SEQ_STATE_OPENREQ:
get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
- get_timewait6_sock(seq, v, st->num);
- break;
}
out:
return 0;
@@ -1929,6 +1960,7 @@ struct proto tcpv6_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
+ .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
@@ -1960,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
};
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 2ec6bf6a0aa..01b0ff9a0c2 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -37,45 +37,43 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
__wsum wsum;
- __sum16 sum;
+
+ /* Don't bother verifying checksum if we're going to flush anyway. */
+ if (NAPI_GRO_CB(skb)->flush)
+ goto skip_csum;
+
+ wsum = NAPI_GRO_CB(skb)->csum;
switch (skb->ip_summed) {
+ case CHECKSUM_NONE:
+ wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb),
+ wsum);
+
+ /* fall through */
+
case CHECKSUM_COMPLETE:
if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
- skb->csum)) {
+ wsum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
-flush:
+
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
-
- case CHECKSUM_NONE:
- wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
- skb_gro_len(skb),
- IPPROTO_TCP, 0));
- sum = csum_fold(skb_checksum(skb,
- skb_gro_offset(skb),
- skb_gro_len(skb),
- wsum));
- if (sum)
- goto flush;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
}
+skip_csum:
return tcp_gro_receive(head, skb);
}
-static int tcp6_gro_complete(struct sk_buff *skb)
+static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
- th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
- &iph->saddr, &iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
+ &iph->daddr, 0);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
return tcp_gro_complete(skb);
}
@@ -83,7 +81,7 @@ static int tcp6_gro_complete(struct sk_buff *skb)
static const struct net_offload tcpv6_offload = {
.callbacks = {
.gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_tso_segment,
+ .gso_segment = tcp_gso_segment,
.gro_receive = tcp6_gro_receive,
.gro_complete = tcp6_gro_complete,
},
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 4b0f50d9a96..2c4e4c5c761 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors Mitsuru KANDA <mk@linux-ipv6.org>
* YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f4058150262..7092ff78fd8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,22 +53,42 @@
#include <trace/events/skb.h>
#include "udp_impl.h"
+static unsigned int udp6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
+{
+ static u32 udp6_ehash_secret __read_mostly;
+ static u32 udp_ipv6_hash_secret __read_mostly;
+
+ u32 lhash, fhash;
+
+ net_get_random_once(&udp6_ehash_secret,
+ sizeof(udp6_ehash_secret));
+ net_get_random_once(&udp_ipv6_hash_secret,
+ sizeof(udp_ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ udp_ipv6_hash_secret + net_hash_mix(net));
+}
+
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- __be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
- __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
return (!sk2_ipv6only &&
- (!sk1_rcv_saddr || !sk2_rcv_saddr ||
- sk1_rcv_saddr == sk2_rcv_saddr));
+ (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
+ sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
if (addr_type2 == IPV6_ADDR_ANY &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -79,7 +99,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
return 1;
return 0;
@@ -107,7 +127,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
unsigned int hash2_nulladdr =
udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial =
- udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+ udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -117,7 +137,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk)
{
u16 new_hash = udp6_portaddr_hash(sock_net(sk),
- &inet6_sk(sk)->rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash);
@@ -133,7 +153,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
score = 0;
@@ -142,13 +161,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
- if (!ipv6_addr_any(&np->daddr)) {
- if (!ipv6_addr_equal(&np->daddr, saddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
@@ -171,10 +190,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score = 0;
if (inet->inet_dport) {
@@ -182,8 +200,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (!ipv6_addr_any(&np->daddr)) {
- if (!ipv6_addr_equal(&np->daddr, saddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
@@ -219,8 +237,8 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp6_ehashfn(net, daddr, hnum,
+ saddr, sport);
matches = 1;
} else if (score == SCORE2_MAX)
goto exact_match;
@@ -300,8 +318,8 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp6_ehashfn(net, daddr, hnum,
+ saddr, sport);
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -374,14 +392,11 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
int is_udp4;
bool slow;
- if (addr_len)
- *addr_len = sizeof(struct sockaddr_in6);
-
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
- return ipv6_recv_rxpmtu(sk, msg, len);
+ return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
@@ -445,9 +460,7 @@ try_again:
/* Copy the address. */
if (msg->msg_name) {
- struct sockaddr_in6 *sin6;
-
- sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
sin6->sin6_family = AF_INET6;
sin6->sin6_port = udp_hdr(skb)->source;
sin6->sin6_flowinfo = 0;
@@ -462,14 +475,18 @@ try_again:
ipv6_iface_scope_id(&sin6->sin6_addr,
IP6CB(skb)->iif);
}
-
+ *addr_len = sizeof(*sin6);
}
+
+ if (np->rxopt.all)
+ ip6_datagram_recv_common_ctl(sk, msg, skb);
+
if (is_udp4) {
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
} else {
if (np->rxopt.all)
- ip6_datagram_recv_ctl(sk, msg, skb);
+ ip6_datagram_recv_specific_ctl(sk, msg, skb);
}
err = copied;
@@ -523,10 +540,15 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk == NULL)
return;
- if (type == ICMPV6_PKT_TOOBIG)
+ if (type == ICMPV6_PKT_TOOBIG) {
+ if (!ip6_sk_accept_pmtu(sk))
+ goto out;
ip6_sk_update_pmtu(skb, sk, info);
- if (type == NDISC_REDIRECT)
+ }
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ goto out;
+ }
np = inet6_sk(sk);
@@ -549,8 +571,10 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
- if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
+ }
rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
@@ -610,6 +634,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
int ret;
+ /* Verify checksum before giving to encap */
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
ret = encap_rcv(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(sock_net(sk),
@@ -646,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
+ }
skb_dst_drop(skb);
@@ -662,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_unlock_sock(sk);
return rc;
+
csum_error:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
@@ -677,36 +709,34 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
int dif)
{
struct hlist_nulls_node *node;
- struct sock *s = sk;
unsigned short num = ntohs(loc_port);
- sk_nulls_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
+ sk_nulls_for_each_from(sk, node) {
+ struct inet_sock *inet = inet_sk(sk);
- if (!net_eq(sock_net(s), net))
+ if (!net_eq(sock_net(sk), net))
continue;
- if (udp_sk(s)->udp_port_hash == num &&
- s->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(s);
+ if (udp_sk(sk)->udp_port_hash == num &&
+ sk->sk_family == PF_INET6) {
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
continue;
}
- if (!ipv6_addr_any(&np->daddr) &&
- !ipv6_addr_equal(&np->daddr, rmt_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
continue;
}
- if (!inet6_mc_check(s, loc_addr, rmt_addr))
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
continue;
- return s;
+ return sk;
}
}
return NULL;
@@ -737,6 +767,17 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (unlikely(skb1))
kfree_skb(skb1);
}
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ * this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
@@ -756,7 +797,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) {
- stack[count++] = sk;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (uh->check || udp_sk(sk)->no_check6_rx)
+ stack[count++] = sk;
+
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) {
@@ -844,7 +890,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
- sk_mark_napi_id(sk, skb);
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ sock_put(sk);
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -857,6 +908,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return 0;
}
+ if (!uh->check) {
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
@@ -984,7 +1040,10 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
- else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+ else if (up->no_check6_tx) { /* UDP csum disabled */
+ skb->ip_summed = CHECKSUM_NONE;
+ goto send;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
up->len);
goto send;
@@ -1021,7 +1080,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
@@ -1062,7 +1121,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
} else if (!up->pending) {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
} else
daddr = NULL;
@@ -1123,7 +1182,6 @@ do_udp_sendmsg:
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- daddr = &flowlabel->dst;
}
}
@@ -1132,8 +1190,8 @@ do_udp_sendmsg:
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
@@ -1144,7 +1202,7 @@ do_udp_sendmsg:
return -EDESTADDRREQ;
fl6.fl6_dport = inet->inet_dport;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
fl6.flowlabel = np->flow_label;
connected = 1;
}
@@ -1204,28 +1262,19 @@ do_udp_sendmsg:
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true);
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
@@ -1244,6 +1293,8 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
+ if (dontfrag < 0)
+ dontfrag = np->dontfrag;
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
@@ -1260,8 +1311,8 @@ do_append_data:
if (dst) {
if (connected) {
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
- &np->daddr : NULL,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
@@ -1459,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 4691ed50a92..c779c3c90b9 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -7,33 +7,32 @@
#include <net/inet_common.h>
#include <net/transp_v6.h>
-extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
-extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
- u8 , u8 , int , __be32 , struct udp_table *);
+int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
+void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+ __be32, struct udp_table *);
-extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
+int udp_v6_get_port(struct sock *sk, unsigned short snum);
-extern int udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-extern int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#ifdef CONFIG_COMPAT
-extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
#endif
-extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len);
-extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len);
-extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern void udpv6_destroy_sock(struct sock *sk);
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len);
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len);
+int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+void udpv6_destroy_sock(struct sock *sk);
-extern void udp_v6_clear_sk(struct sock *sk, int size);
+void udp_v6_clear_sk(struct sock *sk, int size);
#ifdef CONFIG_PROC_FS
-extern int udp6_seq_show(struct seq_file *seq, void *v);
+int udp6_seq_show(struct seq_file *seq, void *v);
#endif
#endif /* _UDP6_IMPL_H */
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 60559511bd9..0ae3d98f83e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -63,7 +63,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -74,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
}
- if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features);
else {
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
@@ -88,7 +93,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
/* Check if there is enough headroom to insert fragment header. */
tnl_hlen = skb_tnl_header_len(skb);
- if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+ if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
goto out;
}
@@ -111,7 +116,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index dfcc4be4689..9cf097e206e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 63d5d493098..0e015906f9c 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -15,8 +15,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* Authors:
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4770d515c2c..901ef6f8add 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -63,6 +63,12 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+#define for_each_input_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next))
+
+
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e3770..433672d07d0 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (!skb->local_df && skb->len > mtu) {
+ if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
if (xfrm6_local_dontfrag(skb))
@@ -114,13 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
- IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->local_df = 1;
+ skb->ignore_df = 1;
return x->outer_mode->output2(x, skb);
}
@@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
int xfrm6_output_finish(struct sk_buff *skb)
{
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ skb->protocol = htons(ETH_P_IPV6);
+
#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IPV6);
return xfrm_output(skb);
}
@@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int mtu;
+#ifdef CONFIG_NETFILTER
+ if (!x) {
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
@@ -150,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb)
if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
- } else if (!skb->local_df && skb->len > mtu && skb->sk) {
+ } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
@@ -163,8 +166,9 @@ static int __xfrm6_output(struct sk_buff *skb)
return x->outer_mode->afinfo->output_finish(skb);
}
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
- skb_dst(skb)->dev, __xfrm6_output);
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+ NULL, skb_dst(skb)->dev, __xfrm6_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 23ed03d786c..2a0bbda2c76 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -135,9 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u8 nexthdr = nh[IP6CB(skb)->nhoff];
+ int oif = 0;
+
+ if (skb_dst(skb))
+ oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
+ fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
@@ -284,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = {
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
- .gc_thresh = 1024,
+ .gc_thresh = 32768,
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -384,11 +389,17 @@ int __init xfrm6_init(void)
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
@@ -399,6 +410,7 @@ void xfrm6_fini(void)
#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
#endif
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 00000000000..54d13f8dbba
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,279 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+ struct xfrm6_protocol __rcu **head = proto_handlers(protocol);
+
+ if (!head)
+ return 0;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index de2bcfaaf75..1c66465a42d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -12,8 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors Mitsuru KANDA <mk@linux-ipv6.org>
* YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>