diff options
Diffstat (limited to 'net/ipv4/ip_sockglue.c')
| -rw-r--r-- | net/ipv4/ip_sockglue.c | 229 |
1 files changed, 164 insertions, 65 deletions
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3948c86e59c..64741b93863 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -33,12 +33,14 @@ #include <linux/netfilter.h> #include <linux/route.h> #include <linux/mroute.h> +#include <net/inet_ecn.h> #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <net/transp_v6.h> #endif +#include <net/ip_fib.h> #include <linux/errqueue.h> #include <asm/uaccess.h> @@ -57,17 +59,9 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { - struct in_pktinfo info; - struct rtable *rt = skb_rtable(skb); + struct in_pktinfo info = *PKTINFO_SKB_CB(skb); info.ipi_addr.s_addr = ip_hdr(skb)->daddr; - if (rt) { - info.ipi_ifindex = rt->rt_iif; - info.ipi_spec_dst.s_addr = rt->rt_spec_dst; - } else { - info.ipi_ifindex = 0; - info.ipi_spec_dst.s_addr = 0; - } put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } @@ -96,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; - struct ip_options * opt = (struct ip_options *)optbuf; + struct ip_options *opt = (struct ip_options *)optbuf; if (IPCB(skb)->opt.optlen == 0) return; @@ -131,7 +125,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); __be16 *ports = (__be16 *)skb_transport_header(skb); if (skb_transport_offset(skb) + 4 > skb->len) @@ -153,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(skb->sk); - unsigned flags = inet->cmsg_flags; + unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ if (flags & 1) @@ -192,14 +186,31 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) } EXPORT_SYMBOL(ip_cmsg_recv); -int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) +int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, + bool allow_ipv6) { - int err; + int err, val; struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; +#if defined(CONFIG_IPV6) + if (allow_ipv6 && + cmsg->cmsg_level == SOL_IPV6 && + cmsg->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *src_info; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) + return -EINVAL; + src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); + if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) + return -EINVAL; + ipc->oif = src_info->ipi6_ifindex; + ipc->addr = src_info->ipi6_addr.s6_addr32[3]; + continue; + } +#endif if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { @@ -220,6 +231,24 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) ipc->addr = info->ipi_spec_dst.s_addr; break; } + case IP_TTL: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 1 || val > 255) + return -EINVAL; + ipc->ttl = val; + break; + case IP_TOS: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 0 || val > 255) + return -EINVAL; + ipc->tos = val; + ipc->priority = rt_tos2priority(ipc->tos); + break; + default: return -EINVAL; } @@ -373,11 +402,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; - struct sockaddr_in *sin; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct { struct sock_extended_err ee; struct sockaddr_in offender; @@ -403,13 +432,13 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr = SKB_EXT_ERR(skb); - sin = (struct sockaddr_in *)msg->msg_name; if (sin) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -462,18 +491,28 @@ static int do_ip_setsockopt(struct sock *sk, int level, struct inet_sock *inet = inet_sk(sk); int val = 0, err; - if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | - (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | - (1<<IP_RETOPTS) | (1<<IP_TOS) | - (1<<IP_TTL) | (1<<IP_HDRINCL) | - (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | - (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | - (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | - (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || - optname == IP_MULTICAST_TTL || - optname == IP_MULTICAST_ALL || - optname == IP_MULTICAST_LOOP || - optname == IP_RECVORIGDSTADDR) { + switch (optname) { + case IP_PKTINFO: + case IP_RECVTTL: + case IP_RECVOPTS: + case IP_RECVTOS: + case IP_RETOPTS: + case IP_TOS: + case IP_TTL: + case IP_HDRINCL: + case IP_MTU_DISCOVER: + case IP_RECVERR: + case IP_ROUTER_ALERT: + case IP_FREEBIND: + case IP_PASSSEC: + case IP_TRANSPARENT: + case IP_MINTTL: + case IP_NODEFRAG: + case IP_UNICAST_IF: + case IP_MULTICAST_TTL: + case IP_MULTICAST_ALL: + case IP_MULTICAST_LOOP: + case IP_RECVORIGDSTADDR: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -497,32 +536,36 @@ static int do_ip_setsockopt(struct sock *sk, int level, switch (optname) { case IP_OPTIONS: { - struct ip_options *opt = NULL; + struct ip_options_rcu *old, *opt = NULL; + if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); if (err) break; + old = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet->inet_daddr != LOOPBACK4_IPV6)) { #endif - if (inet->opt) - icsk->icsk_ext_hdr_len -= inet->opt->optlen; + if (old) + icsk->icsk_ext_hdr_len -= old->opt.optlen; if (opt) - icsk->icsk_ext_hdr_len += opt->optlen; + icsk->icsk_ext_hdr_len += opt->opt.optlen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) } #endif } - opt = xchg(&inet->opt, opt); - kfree(opt); + rcu_assign_pointer(inet->inet_opt, opt); + if (old) + kfree_rcu(old, rcu); break; } case IP_PKTINFO: @@ -569,8 +612,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { - val &= ~3; - val |= inet->tos & 3; + val &= ~INET_ECN_MASK; + val |= inet->tos & INET_ECN_MASK; } if (inet->tos != val) { inet->tos = val; @@ -581,7 +624,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_TTL: if (optlen < 1) goto e_inval; - if (val != -1 && (val < 0 || val > 255)) + if (val != -1 && (val < 1 || val > 255)) goto e_inval; inet->uc_ttl = val; break; @@ -600,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->nodefrag = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) goto e_inval; inet->pmtudisc = val; break; @@ -625,6 +668,35 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto e_inval; inet->mc_loop = !!val; break; + case IP_UNICAST_IF: + { + struct net_device *dev = NULL; + int ifindex; + + if (optlen != sizeof(int)) + goto e_inval; + + ifindex = (__force int)ntohl((__force __be32)val); + if (ifindex == 0) { + inet->uc_index = 0; + err = 0; + break; + } + + dev = dev_get_by_index(sock_net(sk), ifindex); + err = -EADDRNOTAVAIL; + if (!dev) + break; + dev_put(dev); + + err = -EINVAL; + if (sk->sk_bound_dev_if) + break; + + inet->uc_index = ifindex; + err = 0; + break; + } case IP_MULTICAST_IF: { struct ip_mreqn mreq; @@ -645,10 +717,15 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } else { memset(&mreq, 0, sizeof(mreq)); - if (optlen >= sizeof(struct in_addr) && - copy_from_user(&mreq.imr_address, optval, - sizeof(struct in_addr))) - break; + if (optlen >= sizeof(struct ip_mreq)) { + if (copy_from_user(&mreq, optval, + sizeof(struct ip_mreq))) + break; + } else if (optlen >= sizeof(struct in_addr)) { + if (copy_from_user(&mreq.imr_address, optval, + sizeof(struct in_addr))) + break; + } } if (!mreq.imr_ifindex) { @@ -946,13 +1023,14 @@ mc_msf_out: case IP_IPSEC_POLICY: case IP_XFRM_POLICY: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) break; err = xfrm_user_policy(sk, optname, optval, optlen); break; case IP_TRANSPARENT: - if (!capable(CAP_NET_ADMIN)) { + if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { err = -EPERM; break; } @@ -982,20 +1060,29 @@ e_inval: } /** - * ip_queue_rcv_skb - Queue an skb into sock receive queue + * ipv4_pktinfo_prepare - transfert some info from rtable to skb * @sk: socket * @skb: buffer * - * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option - * is not set, we drop skb dst entry now, while dst cache line is hot. + * To support IP_CMSG_PKTINFO option, we store rt_iif and specific + * destination in skb->cb[] before dst drop. + * This way, receiver doesn't make cache line misses to read rtable. */ -int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) { - if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) - skb_dst_drop(skb); - return sock_queue_rcv_skb(sk, skb); + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); + bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || + ipv6_sk_rxinfo(sk); + + if (prepare && skb_rtable(skb)) { + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); + } else { + pktinfo->ipi_ifindex = 0; + pktinfo->ipi_spec_dst.s_addr = 0; + } + skb_dst_drop(skb); } -EXPORT_SYMBOL(ip_queue_rcv_skb); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) @@ -1058,7 +1145,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); int val; @@ -1081,12 +1168,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; - struct ip_options * opt = (struct ip_options *)optbuf; + struct ip_options *opt = (struct ip_options *)optbuf; + struct ip_options_rcu *inet_opt; + + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); opt->optlen = 0; - if (inet->opt) - memcpy(optbuf, inet->opt, - sizeof(struct ip_options)+ - inet->opt->optlen); + if (inet_opt) + memcpy(optbuf, &inet_opt->opt, + sizeof(struct ip_options) + + inet_opt->opt.optlen); release_sock(sk); if (opt->optlen == 0) @@ -1163,6 +1254,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MULTICAST_LOOP: val = inet->mc_loop; break; + case IP_UNICAST_IF: + val = (__force int)htonl((__u32) inet->uc_index); + break; case IP_MULTICAST_IF: { struct in_addr addr; @@ -1227,7 +1321,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; @@ -1241,6 +1335,10 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, int hlim = inet->mc_ttl; put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); } + if (inet->cmsg_flags & IP_CMSG_TOS) { + int tos = inet->rcv_tos; + put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); + } len -= msg.msg_controllen; return put_user(len, optlen); } @@ -1281,7 +1379,7 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1314,7 +1412,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ip_getsockopt); - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ |
