aboutsummaryrefslogtreecommitdiff
path: root/net/ipv6/sit.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/sit.c')
-rw-r--r--net/ipv6/sit.c237
1 files changed, 180 insertions, 57 deletions
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb96db3..4f408176dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -475,17 +475,48 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
+ ip_tunnel_dst_reset_all(tunnel);
dev_put(dev);
}
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
+ */
+static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct rt6_info *rt;
+ struct sk_buff *skb2;
+
+ if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8))
+ return 1;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+
+ if (!skb2)
+ return 1;
+
+ skb_dst_drop(skb2);
+ skb_pull(skb2, iph->ihl * 4);
+ skb_reset_network_header(skb2);
+
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
+
+ icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+
+ if (rt)
+ ip6_rt_put(rt);
+
+ kfree_skb(skb2);
+
+ return 0;
+}
static int ipip6_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
const struct iphdr *iph = (const struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
@@ -500,7 +531,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
/* Impossible event. */
return 0;
default:
@@ -530,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPV6, 0);
err = 0;
goto out;
@@ -545,6 +575,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
goto out;
err = 0;
+ if (!ipip6_err_gen_icmpv6_unreach(skb))
+ goto out;
+
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
@@ -566,6 +599,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
return false;
}
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -575,7 +672,7 @@ static int ipip6_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel != NULL) {
- struct pcpu_tstats *tstats;
+ struct pcpu_sw_netstats *tstats;
if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
tunnel->parms.iph.protocol != 0)
@@ -586,19 +683,9 @@ static int ipip6_rcv(struct sk_buff *skb)
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- if (tunnel->dev->priv_flags & IFF_ISATAP) {
- if (!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
- } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
- if (is_spoofed_6rd(tunnel, iph->saddr,
- &ipv6_hdr(skb)->saddr) ||
- is_spoofed_6rd(tunnel, iph->daddr,
- &ipv6_hdr(skb)->daddr)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
}
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -616,8 +703,10 @@ static int ipip6_rcv(struct sk_buff *skb)
}
tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
netif_rx(skb);
@@ -748,7 +837,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -777,7 +866,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -838,7 +927,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (tunnel->parms.iph.daddr && skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ip_rt_put(rt);
goto tx_error;
@@ -865,7 +954,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
if (skb->sk)
@@ -879,21 +968,24 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
- ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
+ kfree_skb(skb);
+out:
dev->stats.tx_errors++;
- dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -902,13 +994,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ if (IS_ERR(skb))
+ goto out;
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
+out:
+ dev->stats.tx_errors++;
+ return NETDEV_TX_OK;
}
static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
@@ -929,7 +1023,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
tx_err:
dev->stats.tx_errors++;
- dev_kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -990,6 +1084,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
}
@@ -1020,6 +1115,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
return 0;
}
@@ -1031,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel_prl prl;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -1043,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
#endif
- t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
t = ipip6_tunnel_locate(net, &p, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
err = -EFAULT;
if (cmd == SIOCGETTUNNEL) {
@@ -1148,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
break;
@@ -1166,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
switch (cmd) {
case SIOCDELPRL:
@@ -1179,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
break;
@@ -1195,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
sizeof(ip6rd)))
goto done;
- t = netdev_priv(dev);
-
if (cmd != SIOCDEL6RD) {
err = ipip6_tunnel_update_6rd(t, &ip6rd);
if (err < 0)
@@ -1234,10 +1322,19 @@ static const struct net_device_ops ipip6_netdev_ops = {
static void ipip6_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
+#define SIT_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
+
static void ipip6_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip6_netdev_ops;
@@ -1251,6 +1348,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
+ dev->features |= SIT_FEATURES;
+ dev->hw_features |= SIT_FEATURES;
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -1264,10 +1363,16 @@ static int ipip6_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -1287,9 +1392,16 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
return 0;
@@ -1540,6 +1652,15 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
#endif
};
+static void ipip6_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ if (dev != sitn->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static struct rtnl_link_ops sit_link_ops __read_mostly = {
.kind = "sit",
.maxtype = IFLA_IPTUN_MAX,
@@ -1551,6 +1672,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
.changelink = ipip6_changelink,
.get_size = ipip6_get_size,
.fill_info = ipip6_fill_info,
+ .dellink = ipip6_dellink,
};
static struct xfrm_tunnel sit_handler __read_mostly = {
@@ -1565,9 +1687,10 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 2,
};
-static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
+static void __net_exit sit_destroy_tunnels(struct net *net,
+ struct list_head *head)
{
- struct net *net = dev_net(sitn->fb_tunnel_dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
struct net_device *dev, *aux;
int prio;
@@ -1612,6 +1735,7 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
@@ -1641,12 +1765,10 @@ err_alloc_dev:
static void __net_exit sit_exit_net(struct net *net)
{
- struct sit_net *sitn = net_generic(net, sit_net_id);
LIST_HEAD(list);
rtnl_lock();
- sit_destroy_tunnels(sitn, &list);
- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
+ sit_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
@@ -1706,4 +1828,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");