diff options
Diffstat (limited to 'net/netlink/af_netlink.c')
| -rw-r--r-- | net/netlink/af_netlink.c | 232 | 
1 files changed, 174 insertions, 58 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8df7f64c6db..e6fac7e3db5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -131,7 +131,7 @@ int netlink_add_tap(struct netlink_tap *nt)  }  EXPORT_SYMBOL_GPL(netlink_add_tap); -int __netlink_remove_tap(struct netlink_tap *nt) +static int __netlink_remove_tap(struct netlink_tap *nt)  {  	bool found = false;  	struct netlink_tap *tmp; @@ -155,7 +155,6 @@ out:  	return found ? 0 : -ENODEV;  } -EXPORT_SYMBOL_GPL(__netlink_remove_tap);  int netlink_remove_tap(struct netlink_tap *nt)  { @@ -204,6 +203,8 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,  	if (nskb) {  		nskb->dev = dev;  		nskb->protocol = htons((u16) sk->sk_protocol); +		nskb->pkt_type = netlink_is_kernel(sk) ? +				 PACKET_KERNEL : PACKET_USER;  		ret = dev_queue_xmit(nskb);  		if (unlikely(ret > 0)) @@ -239,6 +240,13 @@ static void netlink_deliver_tap(struct sk_buff *skb)  	rcu_read_unlock();  } +static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, +				       struct sk_buff *skb) +{ +	if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) +		netlink_deliver_tap(skb); +} +  static void netlink_overrun(struct sock *sk)  {  	struct netlink_sock *nlk = nlk_sk(sk); @@ -628,7 +636,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,  		while (nlk->cb_running && netlink_dump_space(nlk)) {  			err = netlink_dump(sk);  			if (err < 0) { -				sk->sk_err = err; +				sk->sk_err = -err;  				sk->sk_error_report(sk);  				break;  			} @@ -1198,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,  	struct module *module = NULL;  	struct mutex *cb_mutex;  	struct netlink_sock *nlk; -	void (*bind)(int group); +	int (*bind)(int group); +	void (*unbind)(int group);  	int err = 0;  	sock->state = SS_UNCONNECTED; @@ -1224,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,  		err = -EPROTONOSUPPORT;  	cb_mutex = nl_table[protocol].cb_mutex;  	bind = nl_table[protocol].bind; +	unbind = nl_table[protocol].unbind;  	netlink_unlock_table();  	if (err < 0) @@ -1240,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,  	nlk = nlk_sk(sock->sk);  	nlk->module = module;  	nlk->netlink_bind = bind; +	nlk->netlink_unbind = unbind;  out:  	return err; @@ -1293,6 +1304,7 @@ static int netlink_release(struct socket *sock)  			kfree_rcu(old, rcu);  			nl_table[sk->sk_protocol].module = NULL;  			nl_table[sk->sk_protocol].bind = NULL; +			nl_table[sk->sk_protocol].unbind = NULL;  			nl_table[sk->sk_protocol].flags = 0;  			nl_table[sk->sk_protocol].registered = 0;  		} @@ -1352,7 +1364,74 @@ retry:  	return err;  } -static inline int netlink_capable(const struct socket *sock, unsigned int flag) +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, +			struct user_namespace *user_ns, int cap) +{ +	return ((nsp->flags & NETLINK_SKB_DST) || +		file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && +		ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, +			struct user_namespace *user_ns, int cap) +{ +	return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ +	return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ +	return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + +static inline int netlink_allowed(const struct socket *sock, unsigned int flag)  {  	return (nl_table[sock->sk->sk_protocol].flags & flag) ||  		ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); @@ -1403,6 +1482,19 @@ static int netlink_realloc_groups(struct sock *sk)  	return err;  } +static void netlink_unbind(int group, long unsigned int groups, +			   struct netlink_sock *nlk) +{ +	int undo; + +	if (!nlk->netlink_unbind) +		return; + +	for (undo = 0; undo < group; undo++) +		if (test_bit(group, &groups)) +			nlk->netlink_unbind(undo); +} +  static int netlink_bind(struct socket *sock, struct sockaddr *addr,  			int addr_len)  { @@ -1411,6 +1503,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,  	struct netlink_sock *nlk = nlk_sk(sk);  	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;  	int err; +	long unsigned int groups = nladdr->nl_groups;  	if (addr_len < sizeof(struct sockaddr_nl))  		return -EINVAL; @@ -1419,45 +1512,53 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,  		return -EINVAL;  	/* Only superuser is allowed to listen multicasts */ -	if (nladdr->nl_groups) { -		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) +	if (groups) { +		if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))  			return -EPERM;  		err = netlink_realloc_groups(sk);  		if (err)  			return err;  	} -	if (nlk->portid) { +	if (nlk->portid)  		if (nladdr->nl_pid != nlk->portid)  			return -EINVAL; -	} else { + +	if (nlk->netlink_bind && groups) { +		int group; + +		for (group = 0; group < nlk->ngroups; group++) { +			if (!test_bit(group, &groups)) +				continue; +			err = nlk->netlink_bind(group); +			if (!err) +				continue; +			netlink_unbind(group, groups, nlk); +			return err; +		} +	} + +	if (!nlk->portid) {  		err = nladdr->nl_pid ?  			netlink_insert(sk, net, nladdr->nl_pid) :  			netlink_autobind(sock); -		if (err) +		if (err) { +			netlink_unbind(nlk->ngroups - 1, groups, nlk);  			return err; +		}  	} -	if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) +	if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))  		return 0;  	netlink_table_grab();  	netlink_update_subscriptions(sk, nlk->subscriptions + -					 hweight32(nladdr->nl_groups) - +					 hweight32(groups) -  					 hweight32(nlk->groups[0])); -	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; +	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;  	netlink_update_listeners(sk);  	netlink_table_ungrab(); -	if (nlk->netlink_bind && nlk->groups[0]) { -		int i; - -		for (i=0; i<nlk->ngroups; i++) { -			if (test_bit(i, nlk->groups)) -				nlk->netlink_bind(i); -		} -	} -  	return 0;  } @@ -1481,8 +1582,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,  	if (addr->sa_family != AF_NETLINK)  		return -EINVAL; -	/* Only superuser is allowed to send multicasts */ -	if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) +	if ((nladdr->nl_groups || nladdr->nl_pid) && +	    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))  		return -EPERM;  	if (!nlk->portid) @@ -1645,7 +1746,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)  	else  #endif /* CONFIG_NETLINK_MMAP */  		skb_queue_tail(&sk->sk_receive_queue, skb); -	sk->sk_data_ready(sk, len); +	sk->sk_data_ready(sk);  	return len;  } @@ -1697,14 +1798,10 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,  	ret = -ECONNREFUSED;  	if (nlk->netlink_rcv != NULL) { -		/* We could do a netlink_deliver_tap(skb) here as well -		 * but since this is intended for the kernel only, we -		 * should rather let it stay under the hood. -		 */ -  		ret = skb->len;  		netlink_skb_set_owner_r(skb, sk);  		NETLINK_CB(skb).sk = ssk; +		netlink_deliver_tap_kernel(sk, ssk, skb);  		nlk->netlink_rcv(skb);  		consume_skb(skb);  	} else { @@ -1769,6 +1866,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,  	if (ring->pg_vec == NULL)  		goto out_put; +	if (ring->frame_size - NL_MMAP_HDRLEN < size) +		goto out_put; +  	skb = alloc_skb_head(gfp_mask);  	if (skb == NULL)  		goto err1; @@ -1778,6 +1878,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,  	if (ring->pg_vec == NULL)  		goto out_free; +	/* check again under lock */  	maxlen = ring->frame_size - NL_MMAP_HDRLEN;  	if (maxlen < size)  		goto out_free; @@ -2017,7 +2118,7 @@ out:   * netlink_set_err - report error to broadcast listeners   * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()   * @portid: the PORTID of a process that we want to skip (if any) - * @groups: the broadcast group that will notice the error + * @group: the broadcast group that will notice the error   * @code: error code, must be negative (as usual in kernelspace)   *   * This function returns the number of broadcast listeners that have set the @@ -2088,20 +2189,24 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,  		break;  	case NETLINK_ADD_MEMBERSHIP:  	case NETLINK_DROP_MEMBERSHIP: { -		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) +		if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))  			return -EPERM;  		err = netlink_realloc_groups(sk);  		if (err)  			return err;  		if (!val || val - 1 >= nlk->ngroups)  			return -EINVAL; +		if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { +			err = nlk->netlink_bind(val); +			if (err) +				return err; +		}  		netlink_table_grab();  		netlink_update_socket_mc(nlk, val,  					 optname == NETLINK_ADD_MEMBERSHIP);  		netlink_table_ungrab(); - -		if (nlk->netlink_bind) -			nlk->netlink_bind(val); +		if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) +			nlk->netlink_unbind(val);  		err = 0;  		break; @@ -2214,12 +2319,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,  	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);  	struct sock *sk = sock->sk;  	struct netlink_sock *nlk = nlk_sk(sk); -	struct sockaddr_nl *addr = msg->msg_name; +	DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);  	u32 dst_portid;  	u32 dst_group;  	struct sk_buff *skb;  	int err;  	struct scm_cookie scm; +	u32 netlink_skb_flags = 0;  	if (msg->msg_flags&MSG_OOB)  		return -EOPNOTSUPP; @@ -2239,8 +2345,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,  		dst_group = ffs(addr->nl_groups);  		err =  -EPERM;  		if ((dst_group || dst_portid) && -		    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) +		    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))  			goto out; +		netlink_skb_flags |= NETLINK_SKB_DST;  	} else {  		dst_portid = nlk->dst_portid;  		dst_group = nlk->dst_group; @@ -2270,6 +2377,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,  	NETLINK_CB(skb).portid	= nlk->portid;  	NETLINK_CB(skb).dst_group = dst_group;  	NETLINK_CB(skb).creds	= siocb->scm->creds; +	NETLINK_CB(skb).flags	= netlink_skb_flags;  	err = -EFAULT;  	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -2335,7 +2443,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,  	}  #endif -	msg->msg_namelen = 0; +	/* Record the max length of recvmsg() calls for future allocations */ +	nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); +	nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, +				     16384);  	copied = data_skb->len;  	if (len < copied) { @@ -2347,7 +2458,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,  	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);  	if (msg->msg_name) { -		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; +		DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);  		addr->nl_family = AF_NETLINK;  		addr->nl_pad    = 0;  		addr->nl_pid	= NETLINK_CB(skb).portid; @@ -2372,7 +2483,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,  	    atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {  		ret = netlink_dump(sk);  		if (ret) { -			sk->sk_err = ret; +			sk->sk_err = -ret;  			sk->sk_error_report(sk);  		}  	} @@ -2383,7 +2494,7 @@ out:  	return err ? : copied;  } -static void netlink_data_ready(struct sock *sk, int len) +static void netlink_data_ready(struct sock *sk)  {  	BUG();  } @@ -2537,28 +2648,13 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)  		netlink_update_socket_mc(nlk_sk(sk), group, 0);  } -/** - * netlink_clear_multicast_users - kick off multicast listeners - * - * This function removes all listeners from the given group. - * @ksk: The kernel netlink socket, as returned by - *	netlink_kernel_create(). - * @group: The multicast group to clear. - */ -void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) -{ -	netlink_table_grab(); -	__netlink_clear_multicast_users(ksk, group); -	netlink_table_ungrab(); -} -  struct nlmsghdr *  __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)  {  	struct nlmsghdr *nlh;  	int size = nlmsg_msg_size(len); -	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); +	nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size));  	nlh->nlmsg_type = type;  	nlh->nlmsg_len = size;  	nlh->nlmsg_flags = flags; @@ -2596,7 +2692,27 @@ static int netlink_dump(struct sock *sk)  	if (!netlink_rx_is_mmaped(sk) &&  	    atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)  		goto errout_skb; -	skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + +	/* NLMSG_GOODSIZE is small to avoid high order allocations being +	 * required, but it makes sense to _attempt_ a 16K bytes allocation +	 * to reduce number of system calls on dump operations, if user +	 * ever provided a big enough buffer. +	 */ +	if (alloc_size < nlk->max_recvmsg_len) { +		skb = netlink_alloc_skb(sk, +					nlk->max_recvmsg_len, +					nlk->portid, +					GFP_KERNEL | +					__GFP_NOWARN | +					__GFP_NORETRY); +		/* available room should be exact amount to avoid MSG_TRUNC */ +		if (skb) +			skb_reserve(skb, skb_tailroom(skb) - +					 nlk->max_recvmsg_len); +	} +	if (!skb) +		skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, +					GFP_KERNEL);  	if (!skb)  		goto errout_skb;  	netlink_skb_set_owner_r(skb, sk);  | 
