diff options
Diffstat (limited to 'net/netlink')
| -rw-r--r-- | net/netlink/af_netlink.c | 232 | ||||
| -rw-r--r-- | net/netlink/af_netlink.h | 7 | ||||
| -rw-r--r-- | net/netlink/genetlink.c | 552 |
3 files changed, 449 insertions, 342 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8df7f64c6db..e6fac7e3db5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -131,7 +131,7 @@ int netlink_add_tap(struct netlink_tap *nt) } EXPORT_SYMBOL_GPL(netlink_add_tap); -int __netlink_remove_tap(struct netlink_tap *nt) +static int __netlink_remove_tap(struct netlink_tap *nt) { bool found = false; struct netlink_tap *tmp; @@ -155,7 +155,6 @@ out: return found ? 0 : -ENODEV; } -EXPORT_SYMBOL_GPL(__netlink_remove_tap); int netlink_remove_tap(struct netlink_tap *nt) { @@ -204,6 +203,8 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); + nskb->pkt_type = netlink_is_kernel(sk) ? + PACKET_KERNEL : PACKET_USER; ret = dev_queue_xmit(nskb); if (unlikely(ret > 0)) @@ -239,6 +240,13 @@ static void netlink_deliver_tap(struct sk_buff *skb) rcu_read_unlock(); } +static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, + struct sk_buff *skb) +{ + if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) + netlink_deliver_tap(skb); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -628,7 +636,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock, while (nlk->cb_running && netlink_dump_space(nlk)) { err = netlink_dump(sk); if (err < 0) { - sk->sk_err = err; + sk->sk_err = -err; sk->sk_error_report(sk); break; } @@ -1198,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1224,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; + unbind = nl_table[protocol].unbind; netlink_unlock_table(); if (err < 0) @@ -1240,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, nlk = nlk_sk(sock->sk); nlk->module = module; nlk->netlink_bind = bind; + nlk->netlink_unbind = unbind; out: return err; @@ -1293,6 +1304,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].unbind = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1352,7 +1364,74 @@ retry: return err; } -static inline int netlink_capable(const struct socket *sock, unsigned int flag) +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *user_ns, int cap) +{ + return ((nsp->flags & NETLINK_SKB_DST) || + file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *user_ns, int cap) +{ + return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + +static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); @@ -1403,6 +1482,19 @@ static int netlink_realloc_groups(struct sock *sk) return err; } +static void netlink_unbind(int group, long unsigned int groups, + struct netlink_sock *nlk) +{ + int undo; + + if (!nlk->netlink_unbind) + return; + + for (undo = 0; undo < group; undo++) + if (test_bit(group, &groups)) + nlk->netlink_unbind(undo); +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1411,6 +1503,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + long unsigned int groups = nladdr->nl_groups; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1419,45 +1512,53 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (groups) { + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) return err; } - if (nlk->portid) { + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } else { + + if (nlk->netlink_bind && groups) { + int group; + + for (group = 0; group < nlk->ngroups; group++) { + if (!test_bit(group, &groups)) + continue; + err = nlk->netlink_bind(group); + if (!err) + continue; + netlink_unbind(group, groups, nlk); + return err; + } + } + + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); - if (err) + if (err) { + netlink_unbind(nlk->ngroups - 1, groups, nlk); return err; + } } - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) + if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - + hweight32(groups) - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; netlink_update_listeners(sk); netlink_table_ungrab(); - if (nlk->netlink_bind && nlk->groups[0]) { - int i; - - for (i=0; i<nlk->ngroups; i++) { - if (test_bit(i, nlk->groups)) - nlk->netlink_bind(i); - } - } - return 0; } @@ -1481,8 +1582,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family != AF_NETLINK) return -EINVAL; - /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + if ((nladdr->nl_groups || nladdr->nl_pid) && + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) @@ -1645,7 +1746,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) else #endif /* CONFIG_NETLINK_MMAP */ skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, len); + sk->sk_data_ready(sk); return len; } @@ -1697,14 +1798,10 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { - /* We could do a netlink_deliver_tap(skb) here as well - * but since this is intended for the kernel only, we - * should rather let it stay under the hood. - */ - ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; + netlink_deliver_tap_kernel(sk, ssk, skb); nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -1769,6 +1866,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_put; + if (ring->frame_size - NL_MMAP_HDRLEN < size) + goto out_put; + skb = alloc_skb_head(gfp_mask); if (skb == NULL) goto err1; @@ -1778,6 +1878,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_free; + /* check again under lock */ maxlen = ring->frame_size - NL_MMAP_HDRLEN; if (maxlen < size) goto out_free; @@ -2017,7 +2118,7 @@ out: * netlink_set_err - report error to broadcast listeners * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() * @portid: the PORTID of a process that we want to skip (if any) - * @groups: the broadcast group that will notice the error + * @group: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the @@ -2088,20 +2189,24 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; + if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { + err = nlk->netlink_bind(val); + if (err) + return err; + } netlink_table_grab(); netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); - - if (nlk->netlink_bind) - nlk->netlink_bind(val); + if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) + nlk->netlink_unbind(val); err = 0; break; @@ -2214,12 +2319,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *addr = msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; struct scm_cookie scm; + u32 netlink_skb_flags = 0; if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; @@ -2239,8 +2345,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_group = ffs(addr->nl_groups); err = -EPERM; if ((dst_group || dst_portid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; + netlink_skb_flags |= NETLINK_SKB_DST; } else { dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; @@ -2270,6 +2377,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -2335,7 +2443,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; + /* Record the max length of recvmsg() calls for future allocations */ + nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); + nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, + 16384); copied = data_skb->len; if (len < copied) { @@ -2347,7 +2458,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); if (msg->msg_name) { - struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); addr->nl_family = AF_NETLINK; addr->nl_pad = 0; addr->nl_pid = NETLINK_CB(skb).portid; @@ -2372,7 +2483,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = ret; + sk->sk_err = -ret; sk->sk_error_report(sk); } } @@ -2383,7 +2494,7 @@ out: return err ? : copied; } -static void netlink_data_ready(struct sock *sk, int len) +static void netlink_data_ready(struct sock *sk) { BUG(); } @@ -2537,28 +2648,13 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) netlink_update_socket_mc(nlk_sk(sk), group, 0); } -/** - * netlink_clear_multicast_users - kick off multicast listeners - * - * This function removes all listeners from the given group. - * @ksk: The kernel netlink socket, as returned by - * netlink_kernel_create(). - * @group: The multicast group to clear. - */ -void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) -{ - netlink_table_grab(); - __netlink_clear_multicast_users(ksk, group); - netlink_table_ungrab(); -} - struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = nlmsg_msg_size(len); - nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); + nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; @@ -2596,7 +2692,27 @@ static int netlink_dump(struct sock *sk) if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + + /* NLMSG_GOODSIZE is small to avoid high order allocations being + * required, but it makes sense to _attempt_ a 16K bytes allocation + * to reduce number of system calls on dump operations, if user + * ever provided a big enough buffer. + */ + if (alloc_size < nlk->max_recvmsg_len) { + skb = netlink_alloc_skb(sk, + nlk->max_recvmsg_len, + nlk->portid, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); + /* available room should be exact amount to avoid MSG_TRUNC */ + if (skb) + skb_reserve(skb, skb_tailroom(skb) - + nlk->max_recvmsg_len); + } + if (!skb) + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, + GFP_KERNEL); if (!skb) goto errout_skb; netlink_skb_set_owner_r(skb, sk); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index acbd774eeb7..0b59d441f5b 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -31,13 +31,15 @@ struct netlink_sock { u32 ngroups; unsigned long *groups; unsigned long state; + size_t max_recvmsg_len; wait_queue_head_t wait; bool cb_running; struct netlink_callback cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -73,7 +75,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 0c741cec4d0..76393f2f4b2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -65,12 +65,27 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * To avoid an allocation at boot of just one unsigned long, * declare it global instead. * Bit 0 is marked as already used since group 0 is invalid. + * Bit 1 is marked as already used since the drop-monitor code + * abuses the API and thinks it can statically use group 1. + * That group will typically conflict with other groups that + * any proper users use. + * Bit 16 is marked as used since it's used for generic netlink + * and the code no longer marks pre-reserved IDs as used. + * Bit 17 is marked as already used since the VFS quota code + * also abused this API and relied on family == group ID, we + * cater to that by giving it a static family and group ID. + * Bit 18 is marked as already used since the PMCRAID driver + * did the same thing as the VFS quota code (maybe copied?) */ -static unsigned long mc_group_start = 0x1; +static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | + BIT(GENL_ID_VFS_DQUOT) | + BIT(GENL_ID_PMCRAID); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; -static int genl_ctrl_event(int event, void *data); +static int genl_ctrl_event(int event, struct genl_family *family, + const struct genl_multicast_group *grp, + int grp_id); static inline unsigned int genl_family_hash(unsigned int id) { @@ -106,13 +121,13 @@ static struct genl_family *genl_family_find_byname(char *name) return NULL; } -static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) +static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) { - struct genl_ops *ops; + int i; - list_for_each_entry(ops, &family->ops_list, ops_list) - if (ops->cmd == cmd) - return ops; + for (i = 0; i < family->n_ops; i++) + if (family->ops[i].cmd == cmd) + return &family->ops[i]; return NULL; } @@ -126,7 +141,9 @@ static u16 genl_generate_id(void) int i; for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { - if (!genl_family_find_byid(id_gen_idx)) + if (id_gen_idx != GENL_ID_VFS_DQUOT && + id_gen_idx != GENL_ID_PMCRAID && + !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) id_gen_idx = GENL_MIN_ID; @@ -135,62 +152,113 @@ static u16 genl_generate_id(void) return 0; } -static struct genl_multicast_group notify_grp; - -/** - * genl_register_mc_group - register a multicast group - * - * Registers the specified multicast group and notifies userspace - * about the new group. - * - * Returns 0 on success or a negative error code. - * - * @family: The generic netlink family the group shall be registered for. - * @grp: The group to register, must have a name. - */ -int genl_register_mc_group(struct genl_family *family, - struct genl_multicast_group *grp) +static int genl_allocate_reserve_groups(int n_groups, int *first_id) { - int id; unsigned long *new_groups; - int err = 0; + int start = 0; + int i; + int id; + bool fits; + + do { + if (start == 0) + id = find_first_zero_bit(mc_groups, + mc_groups_longs * + BITS_PER_LONG); + else + id = find_next_zero_bit(mc_groups, + mc_groups_longs * BITS_PER_LONG, + start); + + fits = true; + for (i = id; + i < min_t(int, id + n_groups, + mc_groups_longs * BITS_PER_LONG); + i++) { + if (test_bit(i, mc_groups)) { + start = i; + fits = false; + break; + } + } - BUG_ON(grp->name[0] == '\0'); - BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); + if (id >= mc_groups_longs * BITS_PER_LONG) { + unsigned long new_longs = mc_groups_longs + + BITS_TO_LONGS(n_groups); + size_t nlen = new_longs * sizeof(unsigned long); + + if (mc_groups == &mc_group_start) { + new_groups = kzalloc(nlen, GFP_KERNEL); + if (!new_groups) + return -ENOMEM; + mc_groups = new_groups; + *mc_groups = mc_group_start; + } else { + new_groups = krealloc(mc_groups, nlen, + GFP_KERNEL); + if (!new_groups) + return -ENOMEM; + mc_groups = new_groups; + for (i = 0; i < BITS_TO_LONGS(n_groups); i++) + mc_groups[mc_groups_longs + i] = 0; + } + mc_groups_longs = new_longs; + } + } while (!fits); - genl_lock_all(); + for (i = id; i < id + n_groups; i++) + set_bit(i, mc_groups); + *first_id = id; + return 0; +} - /* special-case our own group */ - if (grp == ¬ify_grp) - id = GENL_ID_CTRL; - else - id = find_first_zero_bit(mc_groups, - mc_groups_longs * BITS_PER_LONG); +static struct genl_family genl_ctrl; +static int genl_validate_assign_mc_groups(struct genl_family *family) +{ + int first_id; + int n_groups = family->n_mcgrps; + int err = 0, i; + bool groups_allocated = false; - if (id >= mc_groups_longs * BITS_PER_LONG) { - size_t nlen = (mc_groups_longs + 1) * sizeof(unsigned long); + if (!n_groups) + return 0; - if (mc_groups == &mc_group_start) { - new_groups = kzalloc(nlen, GFP_KERNEL); - if (!new_groups) { - err = -ENOMEM; - goto out; - } - mc_groups = new_groups; - *mc_groups = mc_group_start; - } else { - new_groups = krealloc(mc_groups, nlen, GFP_KERNEL); - if (!new_groups) { - err = -ENOMEM; - goto out; - } - mc_groups = new_groups; - mc_groups[mc_groups_longs] = 0; - } - mc_groups_longs++; + for (i = 0; i < n_groups; i++) { + const struct genl_multicast_group *grp = &family->mcgrps[i]; + + if (WARN_ON(grp->name[0] == '\0')) + return -EINVAL; + if (WARN_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL)) + return -EINVAL; } + /* special-case our own group and hacks */ + if (family == &genl_ctrl) { + first_id = GENL_ID_CTRL; + BUG_ON(n_groups != 1); + } else if (strcmp(family->name, "NET_DM") == 0) { + first_id = 1; + BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_VFS_DQUOT) { + first_id = GENL_ID_VFS_DQUOT; + BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_PMCRAID) { + first_id = GENL_ID_PMCRAID; + BUG_ON(n_groups != 1); + } else { + groups_allocated = true; + err = genl_allocate_reserve_groups(n_groups, &first_id); + if (err) + return err; + } + + family->mcgrp_offset = first_id; + + /* if still initializing, can't and don't need to to realloc bitmaps */ + if (!init_net.genl_sock) + return 0; + if (family->netnsok) { struct net *net; @@ -206,9 +274,7 @@ int genl_register_mc_group(struct genl_family *family, * number of _possible_ groups has been * increased on some sockets which is ok. */ - rcu_read_unlock(); - netlink_table_ungrab(); - goto out; + break; } } rcu_read_unlock(); @@ -216,152 +282,63 @@ int genl_register_mc_group(struct genl_family *family, } else { err = netlink_change_ngroups(init_net.genl_sock, mc_groups_longs * BITS_PER_LONG); - if (err) - goto out; } - grp->id = id; - set_bit(id, mc_groups); - list_add_tail(&grp->list, &family->mcast_groups); - grp->family = family; + if (groups_allocated && err) { + for (i = 0; i < family->n_mcgrps; i++) + clear_bit(family->mcgrp_offset + i, mc_groups); + } - genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); - out: - genl_unlock_all(); return err; } -EXPORT_SYMBOL(genl_register_mc_group); -static void __genl_unregister_mc_group(struct genl_family *family, - struct genl_multicast_group *grp) +static void genl_unregister_mc_groups(struct genl_family *family) { struct net *net; - BUG_ON(grp->family != family); + int i; netlink_table_grab(); rcu_read_lock(); - for_each_net_rcu(net) - __netlink_clear_multicast_users(net->genl_sock, grp->id); + for_each_net_rcu(net) { + for (i = 0; i < family->n_mcgrps; i++) + __netlink_clear_multicast_users( + net->genl_sock, family->mcgrp_offset + i); + } rcu_read_unlock(); netlink_table_ungrab(); - clear_bit(grp->id, mc_groups); - list_del(&grp->list); - genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, grp); - grp->id = 0; - grp->family = NULL; -} + for (i = 0; i < family->n_mcgrps; i++) { + int grp_id = family->mcgrp_offset + i; -/** - * genl_unregister_mc_group - unregister a multicast group - * - * Unregisters the specified multicast group and notifies userspace - * about it. All current listeners on the group are removed. - * - * Note: It is not necessary to unregister all multicast groups before - * unregistering the family, unregistering the family will cause - * all assigned multicast groups to be unregistered automatically. - * - * @family: Generic netlink family the group belongs to. - * @grp: The group to unregister, must have been registered successfully - * previously. - */ -void genl_unregister_mc_group(struct genl_family *family, - struct genl_multicast_group *grp) -{ - genl_lock_all(); - __genl_unregister_mc_group(family, grp); - genl_unlock_all(); -} -EXPORT_SYMBOL(genl_unregister_mc_group); - -static void genl_unregister_mc_groups(struct genl_family *family) -{ - struct genl_multicast_group *grp, *tmp; - - list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list) - __genl_unregister_mc_group(family, grp); + if (grp_id != 1) + clear_bit(grp_id, mc_groups); + genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, family, + &family->mcgrps[i], grp_id); + } } -/** - * genl_register_ops - register generic netlink operations - * @family: generic netlink family - * @ops: operations to be registered - * - * Registers the specified operations and assigns them to the specified - * family. Either a doit or dumpit callback must be specified or the - * operation will fail. Only one operation structure per command - * identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * Returns 0 on success or a negative error code. - */ -int genl_register_ops(struct genl_family *family, struct genl_ops *ops) +static int genl_validate_ops(const struct genl_family *family) { - int err = -EINVAL; + const struct genl_ops *ops = family->ops; + unsigned int n_ops = family->n_ops; + int i, j; - if (ops->dumpit == NULL && ops->doit == NULL) - goto errout; - - if (genl_get_cmd(ops->cmd, family)) { - err = -EEXIST; - goto errout; - } - - if (ops->dumpit) - ops->flags |= GENL_CMD_CAP_DUMP; - if (ops->doit) - ops->flags |= GENL_CMD_CAP_DO; - if (ops->policy) - ops->flags |= GENL_CMD_CAP_HASPOL; - - genl_lock_all(); - list_add_tail(&ops->ops_list, &family->ops_list); - genl_unlock_all(); - - genl_ctrl_event(CTRL_CMD_NEWOPS, ops); - err = 0; -errout: - return err; -} -EXPORT_SYMBOL(genl_register_ops); + if (WARN_ON(n_ops && !ops)) + return -EINVAL; -/** - * genl_unregister_ops - unregister generic netlink operations - * @family: generic netlink family - * @ops: operations to be unregistered - * - * Unregisters the specified operations and unassigns them from the - * specified family. The operation blocks until the current message - * processing has finished and doesn't start again until the - * unregister process has finished. - * - * Note: It is not necessary to unregister all operations before - * unregistering the family, unregistering the family will cause - * all assigned operations to be unregistered automatically. - * - * Returns 0 on success or a negative error code. - */ -int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) -{ - struct genl_ops *rc; + if (!n_ops) + return 0; - genl_lock_all(); - list_for_each_entry(rc, &family->ops_list, ops_list) { - if (rc == ops) { - list_del(&ops->ops_list); - genl_unlock_all(); - genl_ctrl_event(CTRL_CMD_DELOPS, ops); - return 0; - } + for (i = 0; i < n_ops; i++) { + if (ops[i].dumpit == NULL && ops[i].doit == NULL) + return -EINVAL; + for (j = i + 1; j < n_ops; j++) + if (ops[i].cmd == ops[j].cmd) + return -EINVAL; } - genl_unlock_all(); - return -ENOENT; + return 0; } -EXPORT_SYMBOL(genl_unregister_ops); /** * __genl_register_family - register a generic netlink family @@ -372,11 +349,14 @@ EXPORT_SYMBOL(genl_unregister_ops); * The family id may equal GENL_ID_GENERATE causing an unique id to * be automatically generated and assigned. * + * The family's ops array must already be assigned, you can use the + * genl_register_family_with_ops() helper function. + * * Return 0 on success or a negative error code. */ int __genl_register_family(struct genl_family *family) { - int err = -EINVAL; + int err = -EINVAL, i; if (family->id && family->id < GENL_MIN_ID) goto errout; @@ -384,8 +364,9 @@ int __genl_register_family(struct genl_family *family) if (family->id > GENL_MAX_ID) goto errout; - INIT_LIST_HEAD(&family->ops_list); - INIT_LIST_HEAD(&family->mcast_groups); + err = genl_validate_ops(family); + if (err) + return err; genl_lock_all(); @@ -418,10 +399,18 @@ int __genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; + err = genl_validate_assign_mc_groups(family); + if (err) + goto errout_locked; + list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); - genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); + /* send all events */ + genl_ctrl_event(CTRL_CMD_NEWFAMILY, family, NULL, 0); + for (i = 0; i < family->n_mcgrps; i++) + genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, family, + &family->mcgrps[i], family->mcgrp_offset + i); return 0; @@ -433,52 +422,6 @@ errout: EXPORT_SYMBOL(__genl_register_family); /** - * __genl_register_family_with_ops - register a generic netlink family - * @family: generic netlink family - * @ops: operations to be registered - * @n_ops: number of elements to register - * - * Registers the specified family and operations from the specified table. - * Only one family may be registered with the same family name or identifier. - * - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. - * - * Either a doit or dumpit callback must be specified for every registered - * operation or the function will fail. Only one operation structure per - * command identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * This is equivalent to calling genl_register_family() followed by - * genl_register_ops() for every operation entry in the table taking - * care to unregister the family on error path. - * - * Return 0 on success or a negative error code. - */ -int __genl_register_family_with_ops(struct genl_family *family, - struct genl_ops *ops, size_t n_ops) -{ - int err, i; - - err = __genl_register_family(family); - if (err) - return err; - - for (i = 0; i < n_ops; ++i, ++ops) { - err = genl_register_ops(family, ops); - if (err) - goto err_out; - } - return 0; -err_out: - genl_unregister_family(family); - return err; -} -EXPORT_SYMBOL(__genl_register_family_with_ops); - -/** * genl_unregister_family - unregister generic netlink family * @family: generic netlink family * @@ -499,11 +442,11 @@ int genl_unregister_family(struct genl_family *family) continue; list_del(&rc->family_list); - INIT_LIST_HEAD(&family->ops_list); + family->n_ops = 0; genl_unlock_all(); kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family); + genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); return 0; } @@ -514,6 +457,26 @@ int genl_unregister_family(struct genl_family *family) EXPORT_SYMBOL(genl_unregister_family); /** + * genlmsg_new_unicast - Allocate generic netlink message for unicast + * @payload: size of the message payload + * @info: information on destination + * @flags: the type of memory to allocate + * + * Allocates a new sk_buff large enough to cover the specified payload + * plus required Netlink headers. Will check receiving socket for + * memory mapped i/o capability and use it if enabled. Will fall back + * to non-mapped skb if message size exceeds the frame size of the ring. + */ +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags) +{ + size_t len = nlmsg_total_size(genlmsg_total_size(payload)); + + return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); +} +EXPORT_SYMBOL_GPL(genlmsg_new_unicast); + +/** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message * @portid: netlink portid the message is addressed to @@ -546,7 +509,8 @@ EXPORT_SYMBOL(genlmsg_put); static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - struct genl_ops *ops = cb->data; + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; int rc; genl_lock(); @@ -557,7 +521,8 @@ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) static int genl_lock_done(struct netlink_callback *cb) { - struct genl_ops *ops = cb->data; + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; int rc = 0; if (ops->done) { @@ -572,7 +537,7 @@ static int genl_family_rcv_msg(struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) { - struct genl_ops *ops; + const struct genl_ops *ops; struct net *net = sock_net(skb->sk); struct genl_info info; struct genlmsghdr *hdr = nlmsg_data(nlh); @@ -592,7 +557,7 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EOPNOTSUPP; if ((ops->flags & GENL_ADMIN_PERM) && - !capable(CAP_NET_ADMIN)) + !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { @@ -604,7 +569,8 @@ static int genl_family_rcv_msg(struct genl_family *family, if (!family->parallel_ops) { struct netlink_dump_control c = { .module = family->module, - .data = ops, + /* we have const, but the netlink API doesn't */ + .data = (void *)ops, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -650,6 +616,7 @@ static int genl_family_rcv_msg(struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; + info.dst_sk = skb->sk; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); @@ -726,24 +693,32 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, nla_put_u32(skb, CTRL_ATTR_MAXATTR, family->maxattr)) goto nla_put_failure; - if (!list_empty(&family->ops_list)) { + if (family->n_ops) { struct nlattr *nla_ops; - struct genl_ops *ops; - int idx = 1; + int i; nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); if (nla_ops == NULL) goto nla_put_failure; - list_for_each_entry(ops, &family->ops_list, ops_list) { + for (i = 0; i < family->n_ops; i++) { struct nlattr *nest; + const struct genl_ops *ops = &family->ops[i]; + u32 op_flags = ops->flags; - nest = nla_nest_start(skb, idx++); + if (ops->dumpit) + op_flags |= GENL_CMD_CAP_DUMP; + if (ops->doit) + op_flags |= GENL_CMD_CAP_DO; + if (ops->policy) + op_flags |= GENL_CMD_CAP_HASPOL; + + nest = nla_nest_start(skb, i + 1); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, CTRL_ATTR_OP_ID, ops->cmd) || - nla_put_u32(skb, CTRL_ATTR_OP_FLAGS, ops->flags)) + nla_put_u32(skb, CTRL_ATTR_OP_FLAGS, op_flags)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -752,23 +727,26 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, nla_nest_end(skb, nla_ops); } - if (!list_empty(&family->mcast_groups)) { - struct genl_multicast_group *grp; + if (family->n_mcgrps) { struct nlattr *nla_grps; - int idx = 1; + int i; nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure; - list_for_each_entry(grp, &family->mcast_groups, list) { + for (i = 0; i < family->n_mcgrps; i++) { struct nlattr *nest; + const struct genl_multicast_group *grp; + + grp = &family->mcgrps[i]; - nest = nla_nest_start(skb, idx++); + nest = nla_nest_start(skb, i + 1); if (nest == NULL) goto nla_put_failure; - if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id) || + if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, + family->mcgrp_offset + i) || nla_put_string(skb, CTRL_ATTR_MCAST_GRP_NAME, grp->name)) goto nla_put_failure; @@ -785,9 +763,10 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, - u32 seq, u32 flags, struct sk_buff *skb, - u8 cmd) +static int ctrl_fill_mcgrp_info(struct genl_family *family, + const struct genl_multicast_group *grp, + int grp_id, u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) { void *hdr; struct nlattr *nla_grps; @@ -797,8 +776,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, if (hdr == NULL) return -1; - if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, grp->family->name) || - nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, grp->family->id)) + if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) || + nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id)) goto nla_put_failure; nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); @@ -809,7 +788,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, if (nest == NULL) goto nla_put_failure; - if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id) || + if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp_id) || nla_put_string(skb, CTRL_ATTR_MCAST_GRP_NAME, grp->name)) goto nla_put_failure; @@ -875,8 +854,10 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, return skb; } -static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, - u32 portid, int seq, u8 cmd) +static struct sk_buff * +ctrl_build_mcgrp_msg(struct genl_family *family, + const struct genl_multicast_group *grp, + int grp_id, u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -885,7 +866,8 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd); + err = ctrl_fill_mcgrp_info(family, grp, grp_id, portid, + seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -947,11 +929,11 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -static int genl_ctrl_event(int event, void *data) +static int genl_ctrl_event(int event, struct genl_family *family, + const struct genl_multicast_group *grp, + int grp_id) { struct sk_buff *msg; - struct genl_family *family; - struct genl_multicast_group *grp; /* genl is still initialising */ if (!init_net.genl_sock) @@ -960,14 +942,13 @@ static int genl_ctrl_event(int event, void *data) switch (event) { case CTRL_CMD_NEWFAMILY: case CTRL_CMD_DELFAMILY: - family = data; + WARN_ON(grp); msg = ctrl_build_family_msg(family, 0, 0, event); break; case CTRL_CMD_NEWMCAST_GRP: case CTRL_CMD_DELMCAST_GRP: - grp = data; - family = grp->family; - msg = ctrl_build_mcgrp_msg(data, 0, 0, event); + BUG_ON(!grp); + msg = ctrl_build_mcgrp_msg(family, grp, grp_id, 0, 0, event); break; default: return -EINVAL; @@ -977,26 +958,29 @@ static int genl_ctrl_event(int event, void *data) return PTR_ERR(msg); if (!family->netnsok) { - genlmsg_multicast_netns(&init_net, msg, 0, - GENL_ID_CTRL, GFP_KERNEL); + genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0, + 0, GFP_KERNEL); } else { rcu_read_lock(); - genlmsg_multicast_allns(msg, 0, GENL_ID_CTRL, GFP_ATOMIC); + genlmsg_multicast_allns(&genl_ctrl, msg, 0, + 0, GFP_ATOMIC); rcu_read_unlock(); } return 0; } -static struct genl_ops genl_ctrl_ops = { - .cmd = CTRL_CMD_GETFAMILY, - .doit = ctrl_getfamily, - .dumpit = ctrl_dumpfamily, - .policy = ctrl_policy, +static struct genl_ops genl_ctrl_ops[] = { + { + .cmd = CTRL_CMD_GETFAMILY, + .doit = ctrl_getfamily, + .dumpit = ctrl_dumpfamily, + .policy = ctrl_policy, + }, }; -static struct genl_multicast_group notify_grp = { - .name = "notify", +static struct genl_multicast_group genl_ctrl_groups[] = { + { .name = "notify", }, }; static int __net_init genl_pernet_init(struct net *net) @@ -1036,7 +1020,8 @@ static int __init genl_init(void) for (i = 0; i < GENL_FAM_TAB_SIZE; i++) INIT_LIST_HEAD(&family_ht[i]); - err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1); + err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops, + genl_ctrl_groups); if (err < 0) goto problem; @@ -1044,10 +1029,6 @@ static int __init genl_init(void) if (err) goto problem; - err = genl_register_mc_group(&genl_ctrl, ¬ify_grp); - if (err < 0) - goto problem; - return 0; problem: @@ -1085,14 +1066,18 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, return err; } -int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, - gfp_t flags) +int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, + u32 portid, unsigned int group, gfp_t flags) { + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, +void genl_notify(struct genl_family *family, + struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; @@ -1101,6 +1086,9 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, if (nlh) report = nlmsg_report(nlh); + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return; + group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); |
