aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/ip_sockglue.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_sockglue.c')
-rw-r--r--net/ipv4/ip_sockglue.c229
1 files changed, 164 insertions, 65 deletions
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3948c86e59c..64741b93863 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -33,12 +33,14 @@
#include <linux/netfilter.h>
#include <linux/route.h>
#include <linux/mroute.h>
+#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/compat.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/transp_v6.h>
#endif
+#include <net/ip_fib.h>
#include <linux/errqueue.h>
#include <asm/uaccess.h>
@@ -57,17 +59,9 @@
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
- struct in_pktinfo info;
- struct rtable *rt = skb_rtable(skb);
+ struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
- if (rt) {
- info.ipi_ifindex = rt->rt_iif;
- info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
- } else {
- info.ipi_ifindex = 0;
- info.ipi_spec_dst.s_addr = 0;
- }
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
@@ -96,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
- struct ip_options * opt = (struct ip_options *)optbuf;
+ struct ip_options *opt = (struct ip_options *)optbuf;
if (IPCB(skb)->opt.optlen == 0)
return;
@@ -131,7 +125,7 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
struct sockaddr_in sin;
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
__be16 *ports = (__be16 *)skb_transport_header(skb);
if (skb_transport_offset(skb) + 4 > skb->len)
@@ -153,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(skb->sk);
- unsigned flags = inet->cmsg_flags;
+ unsigned int flags = inet->cmsg_flags;
/* Ordered by supposed usage frequency */
if (flags & 1)
@@ -192,14 +186,31 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip_cmsg_recv);
-int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
+int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
+ bool allow_ipv6)
{
- int err;
+ int err, val;
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
+#if defined(CONFIG_IPV6)
+ if (allow_ipv6 &&
+ cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *src_info;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
+ return -EINVAL;
+ src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
+ return -EINVAL;
+ ipc->oif = src_info->ipi6_ifindex;
+ ipc->addr = src_info->ipi6_addr.s6_addr32[3];
+ continue;
+ }
+#endif
if (cmsg->cmsg_level != SOL_IP)
continue;
switch (cmsg->cmsg_type) {
@@ -220,6 +231,24 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
ipc->addr = info->ipi_spec_dst.s_addr;
break;
}
+ case IP_TTL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->ttl = val;
+ break;
+ case IP_TOS:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 0 || val > 255)
+ return -EINVAL;
+ ipc->tos = val;
+ ipc->priority = rt_tos2priority(ipc->tos);
+ break;
+
default:
return -EINVAL;
}
@@ -373,11 +402,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
/*
* Handle MSG_ERRQUEUE
*/
-int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct sock_exterr_skb *serr;
struct sk_buff *skb, *skb2;
- struct sockaddr_in *sin;
+ DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
struct sockaddr_in offender;
@@ -403,13 +432,13 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
serr = SKB_EXT_ERR(skb);
- sin = (struct sockaddr_in *)msg->msg_name;
if (sin) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -462,18 +491,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
struct inet_sock *inet = inet_sk(sk);
int val = 0, err;
- if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
- (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
- (1<<IP_RETOPTS) | (1<<IP_TOS) |
- (1<<IP_TTL) | (1<<IP_HDRINCL) |
- (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
- (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
- (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
- (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
- optname == IP_MULTICAST_TTL ||
- optname == IP_MULTICAST_ALL ||
- optname == IP_MULTICAST_LOOP ||
- optname == IP_RECVORIGDSTADDR) {
+ switch (optname) {
+ case IP_PKTINFO:
+ case IP_RECVTTL:
+ case IP_RECVOPTS:
+ case IP_RECVTOS:
+ case IP_RETOPTS:
+ case IP_TOS:
+ case IP_TTL:
+ case IP_HDRINCL:
+ case IP_MTU_DISCOVER:
+ case IP_RECVERR:
+ case IP_ROUTER_ALERT:
+ case IP_FREEBIND:
+ case IP_PASSSEC:
+ case IP_TRANSPARENT:
+ case IP_MINTTL:
+ case IP_NODEFRAG:
+ case IP_UNICAST_IF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_ALL:
+ case IP_MULTICAST_LOOP:
+ case IP_RECVORIGDSTADDR:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -497,32 +536,36 @@ static int do_ip_setsockopt(struct sock *sk, int level,
switch (optname) {
case IP_OPTIONS:
{
- struct ip_options *opt = NULL;
+ struct ip_options_rcu *old, *opt = NULL;
+
if (optlen > 40)
goto e_inval;
err = ip_options_get_from_user(sock_net(sk), &opt,
optval, optlen);
if (err)
break;
+ old = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
if (inet->is_icsk) {
struct inet_connection_sock *icsk = inet_csk(sk);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
(!((1 << sk->sk_state) &
(TCPF_LISTEN | TCPF_CLOSE)) &&
inet->inet_daddr != LOOPBACK4_IPV6)) {
#endif
- if (inet->opt)
- icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+ if (old)
+ icsk->icsk_ext_hdr_len -= old->opt.optlen;
if (opt)
- icsk->icsk_ext_hdr_len += opt->optlen;
+ icsk->icsk_ext_hdr_len += opt->opt.optlen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
}
#endif
}
- opt = xchg(&inet->opt, opt);
- kfree(opt);
+ rcu_assign_pointer(inet->inet_opt, opt);
+ if (old)
+ kfree_rcu(old, rcu);
break;
}
case IP_PKTINFO:
@@ -569,8 +612,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
- val &= ~3;
- val |= inet->tos & 3;
+ val &= ~INET_ECN_MASK;
+ val |= inet->tos & INET_ECN_MASK;
}
if (inet->tos != val) {
inet->tos = val;
@@ -581,7 +624,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_TTL:
if (optlen < 1)
goto e_inval;
- if (val != -1 && (val < 0 || val > 255))
+ if (val != -1 && (val < 1 || val > 255))
goto e_inval;
inet->uc_ttl = val;
break;
@@ -600,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->nodefrag = val ? 1 : 0;
break;
case IP_MTU_DISCOVER:
- if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
@@ -625,6 +668,35 @@ static int do_ip_setsockopt(struct sock *sk, int level,
goto e_inval;
inet->mc_loop = !!val;
break;
+ case IP_UNICAST_IF:
+ {
+ struct net_device *dev = NULL;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ goto e_inval;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (ifindex == 0) {
+ inet->uc_index = 0;
+ err = 0;
+ break;
+ }
+
+ dev = dev_get_by_index(sock_net(sk), ifindex);
+ err = -EADDRNOTAVAIL;
+ if (!dev)
+ break;
+ dev_put(dev);
+
+ err = -EINVAL;
+ if (sk->sk_bound_dev_if)
+ break;
+
+ inet->uc_index = ifindex;
+ err = 0;
+ break;
+ }
case IP_MULTICAST_IF:
{
struct ip_mreqn mreq;
@@ -645,10 +717,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
break;
} else {
memset(&mreq, 0, sizeof(mreq));
- if (optlen >= sizeof(struct in_addr) &&
- copy_from_user(&mreq.imr_address, optval,
- sizeof(struct in_addr)))
- break;
+ if (optlen >= sizeof(struct ip_mreq)) {
+ if (copy_from_user(&mreq, optval,
+ sizeof(struct ip_mreq)))
+ break;
+ } else if (optlen >= sizeof(struct in_addr)) {
+ if (copy_from_user(&mreq.imr_address, optval,
+ sizeof(struct in_addr)))
+ break;
+ }
}
if (!mreq.imr_ifindex) {
@@ -946,13 +1023,14 @@ mc_msf_out:
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
break;
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
case IP_TRANSPARENT:
- if (!capable(CAP_NET_ADMIN)) {
+ if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
err = -EPERM;
break;
}
@@ -982,20 +1060,29 @@ e_inval:
}
/**
- * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * ipv4_pktinfo_prepare - transfert some info from rtable to skb
* @sk: socket
* @skb: buffer
*
- * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
- * is not set, we drop skb dst entry now, while dst cache line is hot.
+ * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
+ * destination in skb->cb[] before dst drop.
+ * This way, receiver doesn't make cache line misses to read rtable.
*/
-int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{
- if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
- skb_dst_drop(skb);
- return sock_queue_rcv_skb(sk, skb);
+ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+ bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
+ ipv6_sk_rxinfo(sk);
+
+ if (prepare && skb_rtable(skb)) {
+ pktinfo->ipi_ifindex = inet_iif(skb);
+ pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
+ } else {
+ pktinfo->ipi_ifindex = 0;
+ pktinfo->ipi_spec_dst.s_addr = 0;
+ }
+ skb_dst_drop(skb);
}
-EXPORT_SYMBOL(ip_queue_rcv_skb);
int ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
@@ -1058,7 +1145,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
int val;
@@ -1081,12 +1168,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_OPTIONS:
{
unsigned char optbuf[sizeof(struct ip_options)+40];
- struct ip_options * opt = (struct ip_options *)optbuf;
+ struct ip_options *opt = (struct ip_options *)optbuf;
+ struct ip_options_rcu *inet_opt;
+
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
opt->optlen = 0;
- if (inet->opt)
- memcpy(optbuf, inet->opt,
- sizeof(struct ip_options)+
- inet->opt->optlen);
+ if (inet_opt)
+ memcpy(optbuf, &inet_opt->opt,
+ sizeof(struct ip_options) +
+ inet_opt->opt.optlen);
release_sock(sk);
if (opt->optlen == 0)
@@ -1163,6 +1254,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MULTICAST_LOOP:
val = inet->mc_loop;
break;
+ case IP_UNICAST_IF:
+ val = (__force int)htonl((__u32) inet->uc_index);
+ break;
case IP_MULTICAST_IF:
{
struct in_addr addr;
@@ -1227,7 +1321,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_control = optval;
msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_flags = flags;
if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
struct in_pktinfo info;
@@ -1241,6 +1335,10 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
int hlim = inet->mc_ttl;
put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
}
+ if (inet->cmsg_flags & IP_CMSG_TOS) {
+ int tos = inet->rcv_tos;
+ put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
+ }
len -= msg.msg_controllen;
return put_user(len, optlen);
}
@@ -1281,7 +1379,7 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1314,7 +1412,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
return compat_mc_getsockopt(sk, level, optname, optval, optlen,
ip_getsockopt);
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen,
+ MSG_CMSG_COMPAT);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */