aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/ip_sockglue.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_sockglue.c')
-rw-r--r--net/ipv4/ip_sockglue.c486
1 files changed, 357 insertions, 129 deletions
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f72457b4b0a..64741b93863 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,8 +5,6 @@
*
* The IP to API glue.
*
- * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $
- *
* Authors: see ip.c
*
* Fixes:
@@ -25,6 +23,7 @@
#include <linux/icmp.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -34,11 +33,14 @@
#include <linux/netfilter.h>
#include <linux/route.h>
#include <linux/mroute.h>
+#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/xfrm.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <net/compat.h>
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/transp_v6.h>
#endif
+#include <net/ip_fib.h>
#include <linux/errqueue.h>
#include <asm/uaccess.h>
@@ -49,6 +51,7 @@
#define IP_CMSG_RECVOPTS 8
#define IP_CMSG_RETOPTS 16
#define IP_CMSG_PASSSEC 32
+#define IP_CMSG_ORIGDSTADDR 64
/*
* SOL_IP control messages.
@@ -56,17 +59,9 @@
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
- struct in_pktinfo info;
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
- if (rt) {
- info.ipi_ifindex = rt->rt_iif;
- info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
- } else {
- info.ipi_ifindex = 0;
- info.ipi_spec_dst.s_addr = 0;
- }
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
@@ -95,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
- struct ip_options * opt = (struct ip_options*)optbuf;
+ struct ip_options *opt = (struct ip_options *)optbuf;
if (IPCB(skb)->opt.optlen == 0)
return;
@@ -127,56 +122,102 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
security_release_secctx(secdata, seclen);
}
+static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
+{
+ struct sockaddr_in sin;
+ const struct iphdr *iph = ip_hdr(skb);
+ __be16 *ports = (__be16 *)skb_transport_header(skb);
+
+ if (skb_transport_offset(skb) + 4 > skb->len)
+ return;
+
+ /* All current transport protocols have the port numbers in the
+ * first four bytes of the transport header and this function is
+ * written with this assumption in mind.
+ */
+
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = iph->daddr;
+ sin.sin_port = ports[1];
+ memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+ put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
+}
void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(skb->sk);
- unsigned flags = inet->cmsg_flags;
+ unsigned int flags = inet->cmsg_flags;
/* Ordered by supposed usage frequency */
if (flags & 1)
ip_cmsg_recv_pktinfo(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_ttl(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_tos(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_opts(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_retopts(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_security(msg, skb);
+
+ if ((flags >>= 1) == 0)
+ return;
+ if (flags & 1)
+ ip_cmsg_recv_dstaddr(msg, skb);
+
}
+EXPORT_SYMBOL(ip_cmsg_recv);
-int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
+int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
+ bool allow_ipv6)
{
- int err;
+ int err, val;
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
+#if defined(CONFIG_IPV6)
+ if (allow_ipv6 &&
+ cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *src_info;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
+ return -EINVAL;
+ src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
+ return -EINVAL;
+ ipc->oif = src_info->ipi6_ifindex;
+ ipc->addr = src_info->ipi6_addr.s6_addr32[3];
+ continue;
+ }
+#endif
if (cmsg->cmsg_level != SOL_IP)
continue;
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
- err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
+ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
+ err < 40 ? err : 40);
if (err)
return err;
break;
@@ -190,6 +231,24 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
ipc->addr = info->ipi_spec_dst.s_addr;
break;
}
+ case IP_TTL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->ttl = val;
+ break;
+ case IP_TOS:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 0 || val > 255)
+ return -EINVAL;
+ ipc->tos = val;
+ ipc->priority = rt_tos2priority(ipc->tos);
+ break;
+
default:
return -EINVAL;
}
@@ -208,47 +267,68 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
but receiver should be enough clever f.e. to forward mtrace requests,
sent to multicast group to reach destination designated router.
*/
-struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+struct ip_ra_chain __rcu *ip_ra_chain;
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
+{
+ struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+ sock_put(ra->saved_sk);
+ kfree(ra);
+}
-int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *))
+int ip_ra_control(struct sock *sk, unsigned char on,
+ void (*destructor)(struct sock *))
{
- struct ip_ra_chain *ra, *new_ra, **rap;
+ struct ip_ra_chain *ra, *new_ra;
+ struct ip_ra_chain __rcu **rap;
- if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW)
+ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
- write_lock_bh(&ip_ra_lock);
- for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+ spin_lock_bh(&ip_ra_lock);
+ for (rap = &ip_ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&ip_ra_lock))) != NULL;
+ rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
- write_unlock_bh(&ip_ra_lock);
+ spin_unlock_bh(&ip_ra_lock);
kfree(new_ra);
return -EADDRINUSE;
}
- *rap = ra->next;
- write_unlock_bh(&ip_ra_lock);
+ /* dont let ip_call_ra_chain() use sk again */
+ ra->sk = NULL;
+ rcu_assign_pointer(*rap, ra->next);
+ spin_unlock_bh(&ip_ra_lock);
if (ra->destructor)
ra->destructor(sk);
- sock_put(sk);
- kfree(ra);
+ /*
+ * Delay sock_put(sk) and kfree(ra) after one rcu grace
+ * period. This guarantee ip_call_ra_chain() dont need
+ * to mess with socket refcounts.
+ */
+ ra->saved_sk = sk;
+ call_rcu(&ra->rcu, ip_ra_destroy_rcu);
return 0;
}
}
if (new_ra == NULL) {
- write_unlock_bh(&ip_ra_lock);
+ spin_unlock_bh(&ip_ra_lock);
return -ENOBUFS;
}
new_ra->sk = sk;
new_ra->destructor = destructor;
new_ra->next = ra;
- *rap = new_ra;
+ rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
- write_unlock_bh(&ip_ra_lock);
+ spin_unlock_bh(&ip_ra_lock);
return 0;
}
@@ -256,12 +336,8 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
- struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
- if (!inet->recverr)
- return;
-
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb)
return;
@@ -326,11 +402,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
/*
* Handle MSG_ERRQUEUE
*/
-int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct sock_exterr_skb *serr;
struct sk_buff *skb, *skb2;
- struct sockaddr_in *sin;
+ DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
struct sockaddr_in offender;
@@ -356,13 +432,13 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
serr = SKB_EXT_ERR(skb);
- sin = (struct sockaddr_in *)msg->msg_name;
if (sin) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -389,7 +465,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
/* Reset and regenerate socket error */
spin_lock_bh(&sk->sk_error_queue.lock);
sk->sk_err = 0;
- if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ skb2 = skb_peek(&sk->sk_error_queue);
+ if (skb2 != NULL) {
sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
spin_unlock_bh(&sk->sk_error_queue.lock);
sk->sk_error_report(sk);
@@ -404,25 +481,38 @@ out:
/*
- * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
- * an IP socket.
+ * Socket option code for IP. This is the end of the line after any
+ * TCP,UDP etc options on an IP socket.
*/
static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
- int val=0,err;
-
- if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
- (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
- (1<<IP_RETOPTS) | (1<<IP_TOS) |
- (1<<IP_TTL) | (1<<IP_HDRINCL) |
- (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
- (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
- (1<<IP_PASSSEC))) ||
- optname == IP_MULTICAST_TTL ||
- optname == IP_MULTICAST_LOOP) {
+ int val = 0, err;
+
+ switch (optname) {
+ case IP_PKTINFO:
+ case IP_RECVTTL:
+ case IP_RECVOPTS:
+ case IP_RECVTOS:
+ case IP_RETOPTS:
+ case IP_TOS:
+ case IP_TTL:
+ case IP_HDRINCL:
+ case IP_MTU_DISCOVER:
+ case IP_RECVERR:
+ case IP_ROUTER_ALERT:
+ case IP_FREEBIND:
+ case IP_PASSSEC:
+ case IP_TRANSPARENT:
+ case IP_MINTTL:
+ case IP_NODEFRAG:
+ case IP_UNICAST_IF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_ALL:
+ case IP_MULTICAST_LOOP:
+ case IP_RECVORIGDSTADDR:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -438,7 +528,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
/* If optlen==0, it is equivalent to val == 0 */
if (ip_mroute_opt(optname))
- return ip_mroute_setsockopt(sk,optname,optval,optlen);
+ return ip_mroute_setsockopt(sk, optname, optval, optlen);
err = 0;
lock_sock(sk);
@@ -446,31 +536,36 @@ static int do_ip_setsockopt(struct sock *sk, int level,
switch (optname) {
case IP_OPTIONS:
{
- struct ip_options * opt = NULL;
- if (optlen > 40 || optlen < 0)
+ struct ip_options_rcu *old, *opt = NULL;
+
+ if (optlen > 40)
goto e_inval;
- err = ip_options_get_from_user(&opt, optval, optlen);
+ err = ip_options_get_from_user(sock_net(sk), &opt,
+ optval, optlen);
if (err)
break;
+ old = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
if (inet->is_icsk) {
struct inet_connection_sock *icsk = inet_csk(sk);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
(!((1 << sk->sk_state) &
(TCPF_LISTEN | TCPF_CLOSE)) &&
- inet->daddr != LOOPBACK4_IPV6)) {
+ inet->inet_daddr != LOOPBACK4_IPV6)) {
#endif
- if (inet->opt)
- icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+ if (old)
+ icsk->icsk_ext_hdr_len -= old->opt.optlen;
if (opt)
- icsk->icsk_ext_hdr_len += opt->optlen;
+ icsk->icsk_ext_hdr_len += opt->opt.optlen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
}
#endif
}
- opt = xchg(&inet->opt, opt);
- kfree(opt);
+ rcu_assign_pointer(inet->inet_opt, opt);
+ if (old)
+ kfree_rcu(old, rcu);
break;
}
case IP_PKTINFO:
@@ -509,10 +604,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
else
inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
break;
+ case IP_RECVORIGDSTADDR:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
+ break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
- val &= ~3;
- val |= inet->tos & 3;
+ val &= ~INET_ECN_MASK;
+ val |= inet->tos & INET_ECN_MASK;
}
if (inet->tos != val) {
inet->tos = val;
@@ -521,9 +622,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
break;
case IP_TTL:
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
- if (val != -1 && (val < 1 || val>255))
+ if (val != -1 && (val < 1 || val > 255))
goto e_inval;
inet->uc_ttl = val;
break;
@@ -534,8 +635,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
inet->hdrincl = val ? 1 : 0;
break;
+ case IP_NODEFRAG:
+ if (sk->sk_type != SOCK_RAW) {
+ err = -ENOPROTOOPT;
+ break;
+ }
+ inet->nodefrag = val ? 1 : 0;
+ break;
case IP_MTU_DISCOVER:
- if (val<0 || val>3)
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
@@ -547,19 +655,48 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
- if (val==-1)
+ if (val == -1)
val = 1;
if (val < 0 || val > 255)
goto e_inval;
inet->mc_ttl = val;
break;
case IP_MULTICAST_LOOP:
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
inet->mc_loop = !!val;
break;
+ case IP_UNICAST_IF:
+ {
+ struct net_device *dev = NULL;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ goto e_inval;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (ifindex == 0) {
+ inet->uc_index = 0;
+ err = 0;
+ break;
+ }
+
+ dev = dev_get_by_index(sock_net(sk), ifindex);
+ err = -EADDRNOTAVAIL;
+ if (!dev)
+ break;
+ dev_put(dev);
+
+ err = -EINVAL;
+ if (sk->sk_bound_dev_if)
+ break;
+
+ inet->uc_index = ifindex;
+ err = 0;
+ break;
+ }
case IP_MULTICAST_IF:
{
struct ip_mreqn mreq;
@@ -571,15 +708,24 @@ static int do_ip_setsockopt(struct sock *sk, int level,
* Check the arguments are allowable
*/
+ if (optlen < sizeof(struct in_addr))
+ goto e_inval;
+
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq,optval,sizeof(mreq)))
+ if (copy_from_user(&mreq, optval, sizeof(mreq)))
break;
} else {
memset(&mreq, 0, sizeof(mreq));
- if (optlen >= sizeof(struct in_addr) &&
- copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
- break;
+ if (optlen >= sizeof(struct ip_mreq)) {
+ if (copy_from_user(&mreq, optval,
+ sizeof(struct ip_mreq)))
+ break;
+ } else if (optlen >= sizeof(struct in_addr)) {
+ if (copy_from_user(&mreq.imr_address, optval,
+ sizeof(struct in_addr)))
+ break;
+ }
}
if (!mreq.imr_ifindex) {
@@ -589,18 +735,17 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = 0;
break;
}
- dev = ip_dev_find(&init_net, mreq.imr_address.s_addr);
- if (dev) {
+ dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
+ if (dev)
mreq.imr_ifindex = dev->ifindex;
- dev_put(dev);
- }
} else
- dev = __dev_get_by_index(&init_net, mreq.imr_ifindex);
+ dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
err = -EADDRNOTAVAIL;
if (!dev)
break;
+ dev_put(dev);
err = -EINVAL;
if (sk->sk_bound_dev_if &&
@@ -626,11 +771,11 @@ static int do_ip_setsockopt(struct sock *sk, int level,
goto e_inval;
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq,optval,sizeof(mreq)))
+ if (copy_from_user(&mreq, optval, sizeof(mreq)))
break;
} else {
memset(&mreq, 0, sizeof(mreq));
- if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+ if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
break;
}
@@ -642,7 +787,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
case IP_MSFILTER:
{
- extern int sysctl_igmp_max_msf;
struct ip_msfilter *msf;
if (optlen < IP_MSFILTER_SIZE(0))
@@ -796,7 +940,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
case MCAST_MSFILTER:
{
- extern int sysctl_igmp_max_msf;
struct sockaddr_in *psin;
struct ip_msfilter *msf = NULL;
struct group_filter *gsf = NULL;
@@ -808,15 +951,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -ENOBUFS;
break;
}
- gsf = kmalloc(optlen,GFP_KERNEL);
+ gsf = kmalloc(optlen, GFP_KERNEL);
if (!gsf) {
err = -ENOBUFS;
break;
}
err = -EFAULT;
- if (copy_from_user(gsf, optval, optlen)) {
+ if (copy_from_user(gsf, optval, optlen))
goto mc_msf_out;
- }
+
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (gsf->gf_numsrc >= 0x1ffffff ||
gsf->gf_numsrc > sysctl_igmp_max_msf) {
@@ -828,7 +971,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
goto mc_msf_out;
}
msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
- msf = kmalloc(msize,GFP_KERNEL);
+ msf = kmalloc(msize, GFP_KERNEL);
if (!msf) {
err = -ENOBUFS;
goto mc_msf_out;
@@ -844,7 +987,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
msf->imsf_fmode = gsf->gf_fmode;
msf->imsf_numsrc = gsf->gf_numsrc;
err = -EADDRNOTAVAIL;
- for (i=0; i<gsf->gf_numsrc; ++i) {
+ for (i = 0; i < gsf->gf_numsrc; ++i) {
psin = (struct sockaddr_in *)&gsf->gf_slist[i];
if (psin->sin_family != AF_INET)
@@ -855,17 +998,24 @@ static int do_ip_setsockopt(struct sock *sk, int level,
gsf = NULL;
err = ip_mc_msfilter(sk, msf, ifindex);
- mc_msf_out:
+mc_msf_out:
kfree(msf);
kfree(gsf);
break;
}
+ case IP_MULTICAST_ALL:
+ if (optlen < 1)
+ goto e_inval;
+ if (val != 0 && val != 1)
+ goto e_inval;
+ inet->mc_all = val;
+ break;
case IP_ROUTER_ALERT:
err = ip_ra_control(sk, val ? 1 : 0, NULL);
break;
case IP_FREEBIND:
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
inet->freebind = !!val;
break;
@@ -873,11 +1023,30 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
break;
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
+ case IP_TRANSPARENT:
+ if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ break;
+ }
+ if (optlen < 1)
+ goto e_inval;
+ inet->transparent = !!val;
+ break;
+
+ case IP_MINTTL:
+ if (optlen < 1)
+ goto e_inval;
+ if (val < 0 || val > 255)
+ goto e_inval;
+ inet->min_ttl = val;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -890,8 +1059,33 @@ e_inval:
return -EINVAL;
}
+/**
+ * ipv4_pktinfo_prepare - transfert some info from rtable to skb
+ * @sk: socket
+ * @skb: buffer
+ *
+ * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
+ * destination in skb->cb[] before dst drop.
+ * This way, receiver doesn't make cache line misses to read rtable.
+ */
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+{
+ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+ bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
+ ipv6_sk_rxinfo(sk);
+
+ if (prepare && skb_rtable(skb)) {
+ pktinfo->ipi_ifindex = inet_iif(skb);
+ pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
+ } else {
+ pktinfo->ipi_ifindex = 0;
+ pktinfo->ipi_spec_dst.s_addr = 0;
+ }
+ skb_dst_drop(skb);
+}
+
int ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, unsigned int optlen)
{
int err;
@@ -912,16 +1106,21 @@ int ip_setsockopt(struct sock *sk, int level,
#endif
return err;
}
+EXPORT_SYMBOL(ip_setsockopt);
#ifdef CONFIG_COMPAT
int compat_ip_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
int err;
if (level != SOL_IP)
return -ENOPROTOOPT;
+ if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+ return compat_mc_setsockopt(sk, level, optname, optval, optlen,
+ ip_setsockopt);
+
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
@@ -937,17 +1136,16 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(compat_ip_setsockopt);
#endif
/*
- * Get the options. Note for future reference. The GET of IP options gets the
- * _received_ ones. The set sets the _sent_ ones.
+ * Get the options. Note for future reference. The GET of IP options gets
+ * the _received_ ones. The set sets the _sent_ ones.
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
int val;
@@ -957,9 +1155,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EOPNOTSUPP;
if (ip_mroute_opt(optname))
- return ip_mroute_getsockopt(sk,optname,optval,optlen);
+ return ip_mroute_getsockopt(sk, optname, optval, optlen);
- if (get_user(len,optlen))
+ if (get_user(len, optlen))
return -EFAULT;
if (len < 0)
return -EINVAL;
@@ -970,12 +1168,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_OPTIONS:
{
unsigned char optbuf[sizeof(struct ip_options)+40];
- struct ip_options * opt = (struct ip_options*)optbuf;
+ struct ip_options *opt = (struct ip_options *)optbuf;
+ struct ip_options_rcu *inet_opt;
+
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
opt->optlen = 0;
- if (inet->opt)
- memcpy(optbuf, inet->opt,
- sizeof(struct ip_options)+
- inet->opt->optlen);
+ if (inet_opt)
+ memcpy(optbuf, &inet_opt->opt,
+ sizeof(struct ip_options) +
+ inet_opt->opt.optlen);
release_sock(sk);
if (opt->optlen == 0)
@@ -1008,6 +1210,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_PASSSEC:
val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
break;
+ case IP_RECVORIGDSTADDR:
+ val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
+ break;
case IP_TOS:
val = inet->tos;
break;
@@ -1019,6 +1224,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_HDRINCL:
val = inet->hdrincl;
break;
+ case IP_NODEFRAG:
+ val = inet->nodefrag;
+ break;
case IP_MTU_DISCOVER:
val = inet->pmtudisc;
break;
@@ -1046,6 +1254,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MULTICAST_LOOP:
val = inet->mc_loop;
break;
+ case IP_UNICAST_IF:
+ val = (__force int)htonl((__u32) inet->uc_index);
+ break;
case IP_MULTICAST_IF:
{
struct in_addr addr;
@@ -1091,10 +1302,14 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
}
err = ip_mc_gsfget(sk, &gsf,
- (struct group_filter __user *)optval, optlen);
+ (struct group_filter __user *)optval,
+ optlen);
release_sock(sk);
return err;
}
+ case IP_MULTICAST_ALL:
+ val = inet->mc_all;
+ break;
case IP_PKTOPTIONS:
{
struct msghdr msg;
@@ -1106,13 +1321,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_control = optval;
msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_flags = flags;
if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
struct in_pktinfo info;
- info.ipi_addr.s_addr = inet->rcv_saddr;
- info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+ info.ipi_addr.s_addr = inet->inet_rcv_saddr;
+ info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
info.ipi_ifindex = inet->mc_index;
put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
@@ -1120,30 +1335,40 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
int hlim = inet->mc_ttl;
put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
}
+ if (inet->cmsg_flags & IP_CMSG_TOS) {
+ int tos = inet->rcv_tos;
+ put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
+ }
len -= msg.msg_controllen;
return put_user(len, optlen);
}
case IP_FREEBIND:
val = inet->freebind;
break;
+ case IP_TRANSPARENT:
+ val = inet->transparent;
+ break;
+ case IP_MINTTL:
+ val = inet->min_ttl;
+ break;
default:
release_sock(sk);
return -ENOPROTOOPT;
}
release_sock(sk);
- if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
+ if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval,&ucval,1))
+ if (copy_to_user(optval, &ucval, 1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval,&val,len))
+ if (copy_to_user(optval, &val, len))
return -EFAULT;
}
return 0;
@@ -1154,14 +1379,14 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
!ip_mroute_opt(optname)) {
int len;
- if (get_user(len,optlen))
+ if (get_user(len, optlen))
return -EFAULT;
lock_sock(sk);
@@ -1175,12 +1400,21 @@ int ip_getsockopt(struct sock *sk, int level,
#endif
return err;
}
+EXPORT_SYMBOL(ip_getsockopt);
#ifdef CONFIG_COMPAT
int compat_ip_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
- int err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ int err;
+
+ if (optname == MCAST_MSFILTER)
+ return compat_mc_getsockopt(sk, level, optname, optval, optlen,
+ ip_getsockopt);
+
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen,
+ MSG_CMSG_COMPAT);
+
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1200,11 +1434,5 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(compat_ip_getsockopt);
#endif
-
-EXPORT_SYMBOL(ip_cmsg_recv);
-
-EXPORT_SYMBOL(ip_getsockopt);
-EXPORT_SYMBOL(ip_setsockopt);