diff options
Diffstat (limited to 'net/ipv4/raw.c')
| -rw-r--r-- | net/ipv4/raw.c | 238 | 
1 files changed, 141 insertions, 97 deletions
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3d5ab786e8..2c65160565e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -38,7 +38,7 @@   */  #include <linux/types.h> -#include <asm/atomic.h> +#include <linux/atomic.h>  #include <asm/byteorder.h>  #include <asm/current.h>  #include <asm/uaccess.h> @@ -48,6 +48,7 @@  #include <linux/errno.h>  #include <linux/aio.h>  #include <linux/kernel.h> +#include <linux/export.h>  #include <linux/spinlock.h>  #include <linux/sockios.h>  #include <linux/socket.h> @@ -76,6 +77,7 @@  #include <linux/seq_file.h>  #include <linux/netfilter.h>  #include <linux/netfilter_ipv4.h> +#include <linux/compat.h>  static struct raw_hashinfo raw_v4_hashinfo = {  	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), @@ -109,9 +111,7 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk);  static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,  		unsigned short num, __be32 raddr, __be32 laddr, int dif)  { -	struct hlist_node *node; - -	sk_for_each_from(sk, node) { +	sk_for_each_from(sk) {  		struct inet_sock *inet = inet_sk(sk);  		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&& @@ -129,18 +129,20 @@ found:   *	0 - deliver   *	1 - block   */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)  { -	int type; +	struct icmphdr _hdr; +	const struct icmphdr *hdr; -	if (!pskb_may_pull(skb, sizeof(struct icmphdr))) +	hdr = skb_header_pointer(skb, skb_transport_offset(skb), +				 sizeof(_hdr), &_hdr); +	if (!hdr)  		return 1; -	type = icmp_hdr(skb)->type; -	if (type < 32) { +	if (hdr->type < 32) {  		__u32 data = raw_sk(sk)->filter.data; -		return ((1 << type) & data) != 0; +		return ((1U << hdr->type) & data) != 0;  	}  	/* Do not block unknown ICMP types */ @@ -153,7 +155,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)   * RFC 1122: SHOULD pass TOS value up to the transport layer.   * -> It does. And not only TOS, but all IP header.   */ -static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) +static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)  {  	struct sock *sk;  	struct hlist_head *head; @@ -214,6 +216,13 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)  	int err = 0;  	int harderr = 0; +	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) +		ipv4_sk_update_pmtu(skb, sk, info); +	else if (type == ICMP_REDIRECT) { +		ipv4_sk_redirect(skb, sk); +		return; +	} +  	/* Report error on raw socket, if:  	   1. User requested ip_recverr.  	   2. Socket is connected (otherwise the error indication @@ -246,7 +255,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)  	}  	if (inet->recverr) { -		struct iphdr *iph = (struct iphdr *)skb->data; +		const struct iphdr *iph = (const struct iphdr *)skb->data;  		u8 *payload = skb->data + (iph->ihl << 2);  		if (inet->hdrincl) @@ -264,7 +273,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)  {  	int hash;  	struct sock *raw_sk; -	struct iphdr *iph; +	const struct iphdr *iph;  	struct net *net;  	hash = protocol & (RAW_HTABLE_SIZE - 1); @@ -272,7 +281,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)  	read_lock(&raw_v4_hashinfo.lock);  	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);  	if (raw_sk != NULL) { -		iph = (struct iphdr *)skb->data; +		iph = (const struct iphdr *)skb->data;  		net = dev_net(skb->dev);  		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, @@ -280,17 +289,18 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)  						skb->dev->ifindex)) != NULL) {  			raw_err(raw_sk, skb, info);  			raw_sk = sk_next(raw_sk); -			iph = (struct iphdr *)skb->data; +			iph = (const struct iphdr *)skb->data;  		}  	}  	read_unlock(&raw_v4_hashinfo.lock);  } -static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) +static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)  {  	/* Charge it to the socket. */ -	if (ip_queue_rcv_skb(sk, skb) < 0) { +	ipv4_pktinfo_prepare(sk, skb); +	if (sock_queue_rcv_skb(sk, skb) < 0) {  		kfree_skb(skb);  		return NET_RX_DROP;  	} @@ -313,9 +323,10 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)  	return 0;  } -static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, -			struct rtable **rtp, -			unsigned int flags) +static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, +			   void *from, size_t length, +			   struct rtable **rtp, +			   unsigned int flags)  {  	struct inet_sock *inet = inet_sk(sk);  	struct net *net = sock_net(sk); @@ -324,21 +335,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,  	unsigned int iphlen;  	int err;  	struct rtable *rt = *rtp; +	int hlen, tlen;  	if (length > rt->dst.dev->mtu) { -		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, +		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,  			       rt->dst.dev->mtu);  		return -EMSGSIZE;  	}  	if (flags&MSG_PROBE)  		goto out; +	hlen = LL_RESERVED_SPACE(rt->dst.dev); +	tlen = rt->dst.dev->needed_tailroom;  	skb = sock_alloc_send_skb(sk, -				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, +				  length + hlen + tlen + 15,  				  flags & MSG_DONTWAIT, &err);  	if (skb == NULL)  		goto error; -	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); +	skb_reserve(skb, hlen);  	skb->priority = sk->sk_priority;  	skb->mark = sk->sk_mark; @@ -371,11 +385,11 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,  	if (iphlen >= sizeof(*iph)) {  		if (!iph->saddr) -			iph->saddr = rt->rt_src; +			iph->saddr = fl4->saddr;  		iph->check   = 0;  		iph->tot_len = htons(length);  		if (!iph->id) -			ip_select_ident(iph, &rt->dst, NULL); +			ip_select_ident(skb, NULL);  		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);  	} @@ -401,7 +415,7 @@ error:  	return err;  } -static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg)  {  	struct iovec *iov;  	u8 __user *type = NULL; @@ -417,7 +431,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)  		if (!iov)  			continue; -		switch (fl->proto) { +		switch (fl4->flowi4_proto) {  		case IPPROTO_ICMP:  			/* check if one-byte field is readable or not. */  			if (iov->iov_base && iov->iov_len < 1) @@ -432,8 +446,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)  				code = iov->iov_base;  			if (type && code) { -				if (get_user(fl->fl_icmp_type, type) || -				    get_user(fl->fl_icmp_code, code)) +				if (get_user(fl4->fl4_icmp_type, type) || +				    get_user(fl4->fl4_icmp_code, code))  					return -EFAULT;  				probed = 1;  			} @@ -454,11 +468,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	struct inet_sock *inet = inet_sk(sk);  	struct ipcm_cookie ipc;  	struct rtable *rt = NULL; +	struct flowi4 fl4;  	int free = 0;  	__be32 daddr;  	__be32 saddr;  	u8  tos;  	int err; +	struct ip_options_data opt_copy;  	err = -EMSGSIZE;  	if (len > 0xFFFF) @@ -477,16 +493,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	 */  	if (msg->msg_namelen) { -		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; +		DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);  		err = -EINVAL;  		if (msg->msg_namelen < sizeof(*usin))  			goto out;  		if (usin->sin_family != AF_INET) { -			static int complained; -			if (!complained++) -				printk(KERN_INFO "%s forgot to set AF_INET in " -						 "raw sendmsg. Fix it!\n", -						 current->comm); +			pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n", +				     __func__, current->comm);  			err = -EAFNOSUPPORT;  			if (usin->sin_family)  				goto out; @@ -506,10 +519,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	ipc.addr = inet->inet_saddr;  	ipc.opt = NULL;  	ipc.tx_flags = 0; +	ipc.ttl = 0; +	ipc.tos = -1;  	ipc.oif = sk->sk_bound_dev_if;  	if (msg->msg_controllen) { -		err = ip_cmsg_send(sock_net(sk), msg, &ipc); +		err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);  		if (err)  			goto out;  		if (ipc.opt) @@ -519,8 +534,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	saddr = ipc.addr;  	ipc.addr = daddr; -	if (!ipc.opt) -		ipc.opt = inet->opt; +	if (!ipc.opt) { +		struct ip_options_rcu *inet_opt; + +		rcu_read_lock(); +		inet_opt = rcu_dereference(inet->inet_opt); +		if (inet_opt) { +			memcpy(&opt_copy, inet_opt, +			       sizeof(*inet_opt) + inet_opt->opt.optlen); +			ipc.opt = &opt_copy.opt; +		} +		rcu_read_unlock(); +	}  	if (ipc.opt) {  		err = -EINVAL; @@ -529,13 +554,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		 */  		if (inet->hdrincl)  			goto done; -		if (ipc.opt->srr) { +		if (ipc.opt->opt.srr) {  			if (!daddr)  				goto done; -			daddr = ipc.opt->faddr; +			daddr = ipc.opt->opt.faddr;  		}  	} -	tos = RT_CONN_FLAGS(sk); +	tos = get_rtconn_flags(&ipc, sk);  	if (msg->msg_flags & MSG_DONTROUTE)  		tos |= RTO_ONLINK; @@ -544,28 +569,29 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  			ipc.oif = inet->mc_index;  		if (!saddr)  			saddr = inet->mc_addr; +	} else if (!ipc.oif) +		ipc.oif = inet->uc_index; + +	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, +			   RT_SCOPE_UNIVERSE, +			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, +			   inet_sk_flowi_flags(sk) | +			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), +			   daddr, saddr, 0, 0); + +	if (!inet->hdrincl) { +		err = raw_probe_proto_opt(&fl4, msg); +		if (err) +			goto done;  	} -	{ -		struct flowi fl = { .oif = ipc.oif, -				    .mark = sk->sk_mark, -				    .fl4_dst = daddr, -				    .fl4_src = saddr, -				    .fl4_tos = tos, -				    .proto = inet->hdrincl ? IPPROTO_RAW : -							     sk->sk_protocol, -				  }; -		if (!inet->hdrincl) { -			err = raw_probe_proto_opt(&fl, msg); -			if (err) -				goto done; -		} - -		security_sk_classify_flow(sk, &fl); -		err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); -	} -	if (err) +	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); +	rt = ip_route_output_flow(sock_net(sk), &fl4, sk); +	if (IS_ERR(rt)) { +		err = PTR_ERR(rt); +		rt = NULL;  		goto done; +	}  	err = -EACCES;  	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) @@ -576,19 +602,20 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  back_from_confirm:  	if (inet->hdrincl) -		err = raw_send_hdrinc(sk, msg->msg_iov, len, -					&rt, msg->msg_flags); +		err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len, +				      &rt, msg->msg_flags);  	 else {  		if (!ipc.addr) -			ipc.addr = rt->rt_dst; +			ipc.addr = fl4.daddr;  		lock_sock(sk); -		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, -					&ipc, &rt, msg->msg_flags); +		err = ip_append_data(sk, &fl4, ip_generic_getfrag, +				     msg->msg_iov, len, 0, +				     &ipc, &rt, msg->msg_flags);  		if (err)  			ip_flush_pending_frames(sk);  		else if (!(msg->msg_flags & MSG_MORE)) { -			err = ip_push_pending_frames(sk); +			err = ip_push_pending_frames(sk, &fl4);  			if (err == -ENOBUFS && !inet->recverr)  				err = 0;  		} @@ -615,7 +642,7 @@ do_confirm:  static void raw_close(struct sock *sk, long timeout)  {  	/* -	 * Raw sockets may have direct kernel refereneces. Kill them. +	 * Raw sockets may have direct kernel references. Kill them.  	 */  	ip_ra_control(sk, 0, NULL); @@ -663,17 +690,14 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	struct inet_sock *inet = inet_sk(sk);  	size_t copied = 0;  	int err = -EOPNOTSUPP; -	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; +	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);  	struct sk_buff *skb;  	if (flags & MSG_OOB)  		goto out; -	if (addr_len) -		*addr_len = sizeof(*sin); -  	if (flags & MSG_ERRQUEUE) { -		err = ip_recv_error(sk, msg, len); +		err = ip_recv_error(sk, msg, len, addr_len);  		goto out;  	} @@ -699,6 +723,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;  		sin->sin_port = 0;  		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); +		*addr_len = sizeof(*sin);  	}  	if (inet->cmsg_flags)  		ip_cmsg_recv(msg, skb); @@ -812,31 +837,48 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname,  static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)  {  	switch (cmd) { -		case SIOCOUTQ: { -			int amount = sk_wmem_alloc_get(sk); +	case SIOCOUTQ: { +		int amount = sk_wmem_alloc_get(sk); -			return put_user(amount, (int __user *)arg); -		} -		case SIOCINQ: { -			struct sk_buff *skb; -			int amount = 0; - -			spin_lock_bh(&sk->sk_receive_queue.lock); -			skb = skb_peek(&sk->sk_receive_queue); -			if (skb != NULL) -				amount = skb->len; -			spin_unlock_bh(&sk->sk_receive_queue.lock); -			return put_user(amount, (int __user *)arg); -		} +		return put_user(amount, (int __user *)arg); +	} +	case SIOCINQ: { +		struct sk_buff *skb; +		int amount = 0; + +		spin_lock_bh(&sk->sk_receive_queue.lock); +		skb = skb_peek(&sk->sk_receive_queue); +		if (skb != NULL) +			amount = skb->len; +		spin_unlock_bh(&sk->sk_receive_queue.lock); +		return put_user(amount, (int __user *)arg); +	} -		default: +	default: +#ifdef CONFIG_IP_MROUTE +		return ipmr_ioctl(sk, cmd, (void __user *)arg); +#else +		return -ENOIOCTLCMD; +#endif +	} +} + +#ifdef CONFIG_COMPAT +static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ +	switch (cmd) { +	case SIOCOUTQ: +	case SIOCINQ: +		return -ENOIOCTLCMD; +	default:  #ifdef CONFIG_IP_MROUTE -			return ipmr_ioctl(sk, cmd, (void __user *)arg); +		return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg));  #else -			return -ENOIOCTLCMD; +		return -ENOIOCTLCMD;  #endif  	}  } +#endif  struct proto raw_prot = {  	.name		   = "RAW", @@ -853,6 +895,7 @@ struct proto raw_prot = {  	.recvmsg	   = raw_recvmsg,  	.bind		   = raw_bind,  	.backlog_rcv	   = raw_rcv_skb, +	.release_cb	   = ip4_datagram_release_cb,  	.hash		   = raw_hash_sk,  	.unhash		   = raw_unhash_sk,  	.obj_size	   = sizeof(struct raw_sock), @@ -860,6 +903,7 @@ struct proto raw_prot = {  #ifdef CONFIG_COMPAT  	.compat_setsockopt = compat_raw_setsockopt,  	.compat_getsockopt = compat_raw_getsockopt, +	.compat_ioctl	   = compat_raw_ioctl,  #endif  }; @@ -871,9 +915,7 @@ static struct sock *raw_get_first(struct seq_file *seq)  	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;  			++state->bucket) { -		struct hlist_node *node; - -		sk_for_each(sk, node, &state->h->ht[state->bucket]) +		sk_for_each(sk, &state->h->ht[state->bucket])  			if (sock_net(sk) == seq_file_net(seq))  				goto found;  	} @@ -948,11 +990,13 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)  	      srcp  = inet->inet_num;  	seq_printf(seq, "%4d: %08X:%04X %08X:%04X" -		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", +		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",  		i, src, srcp, dest, destp, sp->sk_state,  		sk_wmem_alloc_get(sp),  		sk_rmem_alloc_get(sp), -		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), +		0, 0L, 0, +		from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), +		0, sock_i_ino(sp),  		atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));  } @@ -1005,7 +1049,7 @@ static const struct file_operations raw_seq_fops = {  static __net_init int raw_init_net(struct net *net)  { -	if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops)) +	if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))  		return -ENOMEM;  	return 0; @@ -1013,7 +1057,7 @@ static __net_init int raw_init_net(struct net *net)  static __net_exit void raw_exit_net(struct net *net)  { -	proc_net_remove(net, "raw"); +	remove_proc_entry("raw", net->proc_net);  }  static __net_initdata struct pernet_operations raw_net_ops = {  | 
