diff options
Diffstat (limited to 'net/ipv6/sit.c')
| -rw-r--r-- | net/ipv6/sit.c | 662 |
1 files changed, 500 insertions, 162 deletions
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fee21c6c3eb..4f408176dc6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -49,7 +49,7 @@ #include <net/ip.h> #include <net/udp.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/dsfield.h> @@ -65,9 +65,15 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; @@ -81,40 +87,6 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; -static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->rx_errors = dev->stats.rx_errors; - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - /* * Must be invoked with rcu_read_lock */ @@ -494,26 +466,57 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) static void ipip6_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { - ipip6_tunnel_unlink(sitn, netdev_priv(dev)); - ipip6_tunnel_del_prl(netdev_priv(dev), NULL); + ipip6_tunnel_unlink(sitn, tunnel); + ipip6_tunnel_del_prl(tunnel, NULL); } + ip_tunnel_dst_reset_all(tunnel); dev_put(dev); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, iph->ihl * 4); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} static int ipip6_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -528,7 +531,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: @@ -558,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; @@ -573,6 +575,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; + if (!ipip6_err_gen_icmpv6_unreach(skb)) + goto out; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; @@ -585,48 +590,123 @@ out: return err; } -static inline void ipip6_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) +static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, + const struct in6_addr *v6addr) { - if (INET_ECN_is_ce(iph->tos)) - IP6_ECN_set_ce(ipv6_hdr(skb)); + __be32 v4embed = 0; + if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) + return true; + return false; } -static int ipip6_rcv(struct sk_buff *skb) +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) { - const struct iphdr *iph; - struct ip_tunnel *tunnel; + int prefix_len; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} - iph = ip_hdr(skb); +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + +static int ipip6_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + int err; tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; + + if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && + tunnel->parms.iph.protocol != 0) + goto out; - secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); - skb->pkt_type = PACKET_HOST; - if ((tunnel->dev->priv_flags & IFF_ISATAP) && - !isatap_chksrc(skb, iph, tunnel)) { + if (packet_is_spoofed(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - kfree_skb(skb); - return 0; + goto out; + } + + __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto out; + } } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; - - __skb_tunnel_rx(skb, tunnel->dev); - - ipip6_ecn_decapsulate(iph, skb); + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -640,15 +720,45 @@ out: return 0; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + +static int ipip_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct ip_tunnel *tunnel; + + iph = ip_hdr(skb); + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->saddr, iph->daddr); + if (tunnel != NULL) { + if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + tunnel->parms.iph.protocol != 0) + goto drop; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + } + + return 1; + +drop: + kfree_skb(skb); + return 0; +} + /* - * Returns the embedded IPv4 address if the IPv6 address - * comes from 6rd / 6to4 (RFC 3056) addr space. + * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function + * stores the embedded IPv4 address in v4dst and returns true. */ -static inline -__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst) { - __be32 dst = 0; - #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { @@ -667,14 +777,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); - dst = tunnel->ip6rd.relay_prefix | htonl(d); + *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); + return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ - memcpy(&dst, &v6dst->s6_addr16[1], 4); + memcpy(v4dst, &v6dst->s6_addr16[1], 4); + return true; } #endif + return false; +} + +static inline __be32 try_6rd(struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + __be32 dst = 0; + check_6rd(tunnel, v6dst, &dst); return dst; } @@ -693,13 +813,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; const struct in6_addr *addr6; int addr_type; + u8 ttl; + int err; if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -716,7 +837,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -735,7 +856,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6rd(&iph6->daddr, tunnel); + dst = try_6rd(tunnel, &iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; @@ -745,7 +866,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -767,7 +888,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + rt = ip_route_output_ports(tunnel->net, &fl4, NULL, dst, tiph->saddr, 0, 0, IPPROTO_IPV6, RT_TOS(tos), @@ -806,7 +927,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -833,7 +954,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) @@ -842,42 +963,71 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb = new_skb; iph6 = ipv6_hdr(skb); } + ttl = tiph->ttl; + if (ttl == 0) + ttl = iph6->hop_limit; + tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPV6; - iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = iph6->hop_limit; + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + if (IS_ERR(skb)) { + ip_rt_put(rt); + goto out; + } - iptunnel_xmit(skb, dev); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df, + !net_eq(tunnel->net, dev_net(dev))); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); tx_error: + kfree_skb(skb); +out: + dev->stats.tx_errors++; + return NETDEV_TX_OK; +} + +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tiph = &tunnel->parms.iph; + + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto out; + + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + return NETDEV_TX_OK; +out: dev->stats.tx_errors++; - dev_kfree_skb(skb); return NETDEV_TX_OK; } +static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipip_tunnel_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ipip6_tunnel_xmit(skb, dev); + break; + default: + goto tx_err; + } + + return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; + +} + static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -889,7 +1039,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, + NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, @@ -904,7 +1055,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -917,7 +1068,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); @@ -933,17 +1084,51 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); } +#ifdef CONFIG_IPV6_SIT_6RD +static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, + struct ip_tunnel_6rd *ip6rd) +{ + struct in6_addr prefix; + __be32 relay_prefix; + + if (ip6rd->relay_prefixlen > 32 || + ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64) + return -EINVAL; + + ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen); + if (!ipv6_addr_equal(&prefix, &ip6rd->prefix)) + return -EINVAL; + if (ip6rd->relay_prefixlen) + relay_prefix = ip6rd->relay_prefix & + htonl(0xffffffffUL << + (32 - ip6rd->relay_prefixlen)); + else + relay_prefix = 0; + if (relay_prefix != ip6rd->relay_prefix) + return -EINVAL; + + t->ip6rd.prefix = prefix; + t->ip6rd.relay_prefix = relay_prefix; + t->ip6rd.prefixlen = ip6rd->prefixlen; + t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + ip_tunnel_dst_reset_all(t); + netdev_state_change(t->dev); + return 0; +} +#endif + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; struct ip_tunnel_prl prl; - struct ip_tunnel *t; - struct net *net = dev_net(dev); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; @@ -954,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: #endif - t = NULL; if (dev == sitn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); err = -EFAULT; if (cmd == SIOCGETTUNNEL) { @@ -996,7 +1180,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + if (p.iph.protocol != IPPROTO_IPV6 && + p.iph.protocol != IPPROTO_IPIP && + p.iph.protocol != 0) + goto done; + if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) goto done; if (p.iph.ttl) @@ -1055,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; if (dev == sitn->fb_tunnel_dev) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); break; @@ -1073,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; switch (cmd) { case SIOCDELPRL: @@ -1086,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); break; @@ -1102,34 +1285,10 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(ip6rd))) goto done; - t = netdev_priv(dev); - if (cmd != SIOCDEL6RD) { - struct in6_addr prefix; - __be32 relay_prefix; - - err = -EINVAL; - if (ip6rd.relay_prefixlen > 32 || - ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) goto done; - - ipv6_addr_prefix(&prefix, &ip6rd.prefix, - ip6rd.prefixlen); - if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) - goto done; - if (ip6rd.relay_prefixlen) - relay_prefix = ip6rd.relay_prefix & - htonl(0xffffffffUL << - (32 - ip6rd.relay_prefixlen)); - else - relay_prefix = 0; - if (relay_prefix != ip6rd.relay_prefix) - goto done; - - t->ip6rd.prefix = prefix; - t->ip6rd.relay_prefix = relay_prefix; - t->ip6rd.prefixlen = ip6rd.prefixlen; - t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; } else ipip6_tunnel_clone_6rd(dev, sitn); @@ -1155,18 +1314,27 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, - .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, - .ndo_get_stats64= ipip6_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipip6_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } +#define SIT_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) + static void ipip6_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip6_netdev_ops; @@ -1179,8 +1347,9 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; + dev->features |= SIT_FEATURES; + dev->hw_features |= SIT_FEATURES; } static int ipip6_tunnel_init(struct net_device *dev) @@ -1188,15 +1357,22 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + return 0; } @@ -1208,6 +1384,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; @@ -1215,14 +1392,37 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; } +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data || !data[IFLA_IPTUN_PROTO]) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { @@ -1259,13 +1459,59 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + +} + +#ifdef CONFIG_IPV6_SIT_6RD +/* This function returns true when 6RD attributes are present in the nl msg */ +static bool ipip6_netlink_6rd_parms(struct nlattr *data[], + struct ip_tunnel_6rd *ip6rd) +{ + bool ret = false; + memset(ip6rd, 0, sizeof(*ip6rd)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_6RD_PREFIX]) { + ret = true; + nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], + sizeof(struct in6_addr)); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { + ret = true; + ip6rd->relay_prefix = + nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]); + } + + if (data[IFLA_IPTUN_6RD_PREFIXLEN]) { + ret = true; + ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) { + ret = true; + ip6rd->relay_prefixlen = + nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); + } + + return ret; } +#endif static int ipip6_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip_tunnel *nt; +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif + int err; nt = netdev_priv(dev); ipip6_netlink_parms(data, &nt->parms); @@ -1273,16 +1519,28 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; - return ipip6_tunnel_create(dev); + err = ipip6_tunnel_create(dev); + if (err < 0) + return err; + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + err = ipip6_tunnel_update_6rd(nt, &ip6rd); +#endif + + return err; } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; - struct net *net = dev_net(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif if (dev == sitn->fb_tunnel_dev) return -EINVAL; @@ -1302,6 +1560,12 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], t = netdev_priv(dev); ipip6_tunnel_update(t, &p); + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + return ipip6_tunnel_update_6rd(t, &ip6rd); +#endif + return 0; } @@ -1322,6 +1586,18 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + +#ifdef CONFIG_IPV6_SIT_6RD + /* IFLA_IPTUN_6RD_PREFIX */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_6RD_RELAY_PREFIX */ + nla_total_size(4) + + /* IFLA_IPTUN_6RD_PREFIXLEN */ + nla_total_size(2) + + /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ + nla_total_size(2) + +#endif 0; } @@ -1337,8 +1613,22 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; + +#ifdef CONFIG_IPV6_SIT_6RD + if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), + &tunnel->ip6rd.prefix) || + nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || + nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, + tunnel->ip6rd.prefixlen) || + nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + tunnel->ip6rd.relay_prefixlen)) + goto nla_put_failure; +#endif + return 0; nla_put_failure: @@ -1353,18 +1643,36 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, +#ifdef CONFIG_IPV6_SIT_6RD + [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, + [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, + [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, +#endif }; +static void ipip6_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct sit_net *sitn = net_generic(net, sit_net_id); + + if (dev != sitn->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, + .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, + .dellink = ipip6_dellink, }; static struct xfrm_tunnel sit_handler __read_mostly = { @@ -1373,10 +1681,23 @@ static struct xfrm_tunnel sit_handler __read_mostly = { .priority = 1, }; -static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = ipip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; + +static void __net_exit sit_destroy_tunnels(struct net *net, + struct list_head *head) { + struct sit_net *sitn = net_generic(net, sit_net_id); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &sit_link_ops) + unregister_netdevice_queue(dev, head); + for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1384,7 +1705,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1409,6 +1735,11 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); if (err) @@ -1434,12 +1765,10 @@ err_alloc_dev: static void __net_exit sit_exit_net(struct net *net) { - struct sit_net *sitn = net_generic(net, sit_net_id); LIST_HEAD(list); rtnl_lock(); - sit_destroy_tunnels(sitn, &list); - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); + sit_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } @@ -1455,6 +1784,7 @@ static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1471,9 +1801,14 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - pr_info("%s: can't add protocol\n", __func__); + pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: can't register ip4ip4\n", __func__); + goto xfrm_tunnel4_failed; + } err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1482,6 +1817,8 @@ out: return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); @@ -1491,4 +1828,5 @@ xfrm_tunnel_failed: module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0"); |
