diff options
Diffstat (limited to 'net/netfilter/nfnetlink.c')
| -rw-r--r-- | net/netfilter/nfnetlink.c | 584 |
1 files changed, 338 insertions, 246 deletions
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4bc27a6334c..c138b8fbe28 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -3,7 +3,7 @@ * * (C) 2001 by Jay Schulist <jschlst@samba.org>, * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> - * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net> + * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial netfilter messages via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -14,27 +14,19 @@ * of the GNU General Public License, incorporated herein by reference. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> -#include <linux/major.h> -#include <linux/sched.h> -#include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> -#include <linux/fcntl.h> #include <linux/skbuff.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <net/sock.h> #include <linux/init.h> -#include <linux/spinlock.h> -#include <linux/netfilter.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> MODULE_LICENSE("GPL"); @@ -43,334 +35,434 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -#if 0 -#define DEBUGP(format, args...) \ - printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \ - __LINE__, __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) -#endif - -static struct sock *nfnl = NULL; -static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; -DECLARE_MUTEX(nfnl_sem); - -void nfnl_lock(void) +static struct { + struct mutex mutex; + const struct nfnetlink_subsystem __rcu *subsys; +} table[NFNL_SUBSYS_COUNT]; + +static const int nfnl_group2type[NFNLGRP_MAX+1] = { + [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, + [NFNLGRP_CONNTRACK_UPDATE] = NFNL_SUBSYS_CTNETLINK, + [NFNLGRP_CONNTRACK_DESTROY] = NFNL_SUBSYS_CTNETLINK, + [NFNLGRP_CONNTRACK_EXP_NEW] = NFNL_SUBSYS_CTNETLINK_EXP, + [NFNLGRP_CONNTRACK_EXP_UPDATE] = NFNL_SUBSYS_CTNETLINK_EXP, + [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, +}; + +void nfnl_lock(__u8 subsys_id) { - nfnl_shlock(); + mutex_lock(&table[subsys_id].mutex); } +EXPORT_SYMBOL_GPL(nfnl_lock); -void nfnl_unlock(void) +void nfnl_unlock(__u8 subsys_id) { - nfnl_shunlock(); + mutex_unlock(&table[subsys_id].mutex); } +EXPORT_SYMBOL_GPL(nfnl_unlock); -int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) +#ifdef CONFIG_PROVE_LOCKING +int lockdep_nfnl_is_held(u8 subsys_id) { - DEBUGP("registering subsystem ID %u\n", n->subsys_id); + return lockdep_is_held(&table[subsys_id].mutex); +} +EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); +#endif - nfnl_lock(); - if (subsys_table[n->subsys_id]) { - nfnl_unlock(); +int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) +{ + nfnl_lock(n->subsys_id); + if (table[n->subsys_id].subsys) { + nfnl_unlock(n->subsys_id); return -EBUSY; } - subsys_table[n->subsys_id] = n; - nfnl_unlock(); + rcu_assign_pointer(table[n->subsys_id].subsys, n); + nfnl_unlock(n->subsys_id); return 0; } +EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); -int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) +int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { - DEBUGP("unregistering subsystem ID %u\n", n->subsys_id); - - nfnl_lock(); - subsys_table[n->subsys_id] = NULL; - nfnl_unlock(); - + nfnl_lock(n->subsys_id); + table[n->subsys_id].subsys = NULL; + nfnl_unlock(n->subsys_id); + synchronize_rcu(); return 0; } +EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); -static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) { u_int8_t subsys_id = NFNL_SUBSYS_ID(type); - if (subsys_id >= NFNL_SUBSYS_COUNT - || subsys_table[subsys_id] == NULL) + if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; - return subsys_table[subsys_id]; + return rcu_dereference(table[subsys_id].subsys); } -static inline struct nfnl_callback * -nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) +static inline const struct nfnl_callback * +nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) { u_int8_t cb_id = NFNL_MSG_TYPE(type); - - if (cb_id >= ss->cb_count) { - DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count); + + if (cb_id >= ss->cb_count) return NULL; - } return &ss->cb[cb_id]; } -void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, - const void *data) +int nfnetlink_has_listeners(struct net *net, unsigned int group) { - struct nfattr *nfa; - int size = NFA_LENGTH(attrlen); - - nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); - nfa->nfa_type = attrtype; - nfa->nfa_len = size; - memcpy(NFA_DATA(nfa), data, attrlen); - memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); + return netlink_has_listeners(net->nfnl, group); } +EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) +struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, + u32 dst_portid, gfp_t gfp_mask) { - memset(tb, 0, sizeof(struct nfattr *) * maxattr); - - while (NFA_OK(nfa, len)) { - unsigned flavor = NFA_TYPE(nfa); - if (flavor && flavor <= maxattr) - tb[flavor-1] = nfa; - nfa = NFA_NEXT(nfa, len); - } - - return 0; + return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); } +EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); -/** - * nfnetlink_check_attributes - check and parse nfnetlink attributes - * - * subsys: nfnl subsystem for which this message is to be parsed - * nlmsghdr: netlink message to be checked/parsed - * cda: array of pointers, needs to be at least subsys->attr_count big - * - */ -static int -nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, - struct nlmsghdr *nlh, struct nfattr *cda[]) +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, + unsigned int group, int echo, gfp_t flags) { - int min_len; - u_int16_t attr_count; - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - - if (unlikely(cb_id >= subsys->cb_count)) { - DEBUGP("msgtype %u >= %u, returning\n", - cb_id, subsys->cb_count); - return -EINVAL; - } - - min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); - if (unlikely(nlh->nlmsg_len < min_len)) - return -EINVAL; - - attr_count = subsys->cb[cb_id].attr_count; - memset(cda, 0, sizeof(struct nfattr *) * attr_count); - - /* check attribute lengths. */ - if (likely(nlh->nlmsg_len > min_len)) { - struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - - while (NFA_OK(attr, attrlen)) { - unsigned flavor = NFA_TYPE(attr); - if (flavor) { - if (flavor > attr_count) - return -EINVAL; - cda[flavor - 1] = attr; - } - attr = NFA_NEXT(attr, attrlen); - } - } - - /* implicit: if nlmsg_len == min_len, we return 0, and an empty - * (zeroed) cda[] array. The message is valid, but empty. */ - - return 0; + return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags); } +EXPORT_SYMBOL_GPL(nfnetlink_send); -int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) { - gfp_t allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; - int err = 0; - - NETLINK_CB(skb).dst_group = group; - if (echo) - atomic_inc(&skb->users); - netlink_broadcast(nfnl, skb, pid, group, allocation); - if (echo) - err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); - - return err; + return netlink_set_err(net->nfnl, portid, group, error); } +EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, + int flags) { - return netlink_unicast(nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, portid, flags); } +EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ -static inline int nfnetlink_rcv_msg(struct sk_buff *skb, - struct nlmsghdr *nlh, int *errp) +static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct nfnl_callback *nc; - struct nfnetlink_subsystem *ss; - int type, err = 0; - - DEBUGP("entered; subsys=%u, msgtype=%u\n", - NFNL_SUBSYS_ID(nlh->nlmsg_type), - NFNL_MSG_TYPE(nlh->nlmsg_type)); - - /* Only requests are handled by kernel now. */ - if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { - DEBUGP("received non-request message\n"); - return 0; - } + struct net *net = sock_net(skb->sk); + const struct nfnl_callback *nc; + const struct nfnetlink_subsystem *ss; + int type, err; /* All the messages must at least contain nfgenmsg */ - if (nlh->nlmsg_len < - NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) { - DEBUGP("received message was too short\n"); + if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; - } type = nlh->nlmsg_type; +replay: + rcu_read_lock(); ss = nfnetlink_get_subsys(type); if (!ss) { -#ifdef CONFIG_KMOD - /* don't call nfnl_shunlock, since it would reenter - * with further packet processing */ - up(&nfnl_sem); +#ifdef CONFIG_MODULES + rcu_read_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); - nfnl_shlock(); + rcu_read_lock(); ss = nfnetlink_get_subsys(type); if (!ss) #endif - goto err_inval; + { + rcu_read_unlock(); + return -EINVAL; + } } nc = nfnetlink_find_client(type, ss); if (!nc) { - DEBUGP("unable to find client for type %d\n", type); - goto err_inval; - } - - if (nc->cap_required && - !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { - DEBUGP("permission denied for type %d\n", type); - *errp = -EPERM; - return -1; + rcu_read_unlock(); + return -EINVAL; } { - u_int16_t attr_count = - ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; - struct nfattr *cda[attr_count]; - - memset(cda, 0, sizeof(struct nfattr *) * attr_count); - - err = nfnetlink_check_attributes(ss, nlh, cda); - if (err < 0) - goto err_inval; - - DEBUGP("calling handler\n"); - err = nc->call(nfnl, skb, nlh, cda, errp); - *errp = err; + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + __u8 subsys_id = NFNL_SUBSYS_ID(type); + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) { + rcu_read_unlock(); + return err; + } + + if (nc->call_rcu) { + err = nc->call_rcu(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + rcu_read_unlock(); + } else { + rcu_read_unlock(); + nfnl_lock(subsys_id); + if (rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)) != ss || + nfnetlink_find_client(type, ss) != nc) + err = -EAGAIN; + else if (nc->call) + err = nc->call(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + else + err = -EINVAL; + nfnl_unlock(subsys_id); + } + if (err == -EAGAIN) + goto replay; return err; } - -err_inval: - DEBUGP("returning -EINVAL\n"); - *errp = -EINVAL; - return -1; } -/* Process one packet of messages. */ -static inline int nfnetlink_rcv_skb(struct sk_buff *skb) +static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, + u_int16_t subsys_id) { + struct sk_buff *nskb, *oskb = skb; + struct net *net = sock_net(skb->sk); + const struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + bool success = true, done = false; int err; - struct nlmsghdr *nlh; - - while (skb->len >= NLMSG_SPACE(0)) { - u32 rlen; - - nlh = (struct nlmsghdr *)skb->data; - if (nlh->nlmsg_len < sizeof(struct nlmsghdr) - || skb->len < nlh->nlmsg_len) - return 0; - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (rlen > skb->len) - rlen = skb->len; - if (nfnetlink_rcv_msg(skb, nlh, &err)) { - if (!err) - return -1; - netlink_ack(skb, nlh, err); - } else - if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(skb, nlh, 0); - skb_pull(skb, rlen); + + if (subsys_id >= NFNL_SUBSYS_COUNT) + return netlink_ack(skb, nlh, -EINVAL); +replay: + nskb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!nskb) + return netlink_ack(oskb, nlh, -ENOMEM); + + nskb->sk = oskb->sk; + skb = nskb; + + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) { +#ifdef CONFIG_MODULES + nfnl_unlock(subsys_id); + request_module("nfnetlink-subsys-%d", subsys_id); + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) +#endif + { + nfnl_unlock(subsys_id); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(nskb); + } } - return 0; + if (!ss->commit || !ss->abort) { + nfnl_unlock(subsys_id); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(skb); + } + + while (skb->len >= nlmsg_total_size(0)) { + int msglen, type; + + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN) { + err = -EINVAL; + goto ack; + } + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + err = -EINVAL; + goto ack; + } + + type = nlh->nlmsg_type; + if (type == NFNL_MSG_BATCH_BEGIN) { + /* Malformed: Batch begin twice */ + success = false; + goto done; + } else if (type == NFNL_MSG_BATCH_END) { + done = true; + goto done; + } else if (type < NLMSG_MIN_TYPE) { + err = -EINVAL; + goto ack; + } + + /* We only accept a batch with messages for the same + * subsystem. + */ + if (NFNL_SUBSYS_ID(type) != subsys_id) { + err = -EINVAL; + goto ack; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + err = -EINVAL; + goto ack; + } + + { + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + goto ack; + + if (nc->call_batch) { + err = nc->call_batch(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + } + + /* The lock was released to autoload some module, we + * have to abort and start from scratch using the + * original skb. + */ + if (err == -EAGAIN) { + ss->abort(skb); + nfnl_unlock(subsys_id); + kfree_skb(nskb); + goto replay; + } + } +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* We don't stop processing the batch on errors, thus, + * userspace gets all the errors that the batch + * triggers. + */ + netlink_ack(skb, nlh, err); + if (err) + success = false; + } + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +done: + if (success && done) + ss->commit(skb); + else + ss->abort(skb); + + nfnl_unlock(subsys_id); + kfree_skb(nskb); } -static void nfnetlink_rcv(struct sock *sk, int len) +static void nfnetlink_rcv(struct sk_buff *skb) { - do { - struct sk_buff *skb; + struct nlmsghdr *nlh = nlmsg_hdr(skb); + int msglen; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { + netlink_ack(skb, nlh, -EPERM); + return; + } + + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { + struct nfgenmsg *nfgenmsg; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; - if (nfnl_shlock_nowait()) + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (nfnetlink_rcv_skb(skb)) { - if (skb->len) - skb_queue_head(&sk->sk_receive_queue, - skb); - else - kfree_skb(skb); - break; - } - kfree_skb(skb); - } + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + } else { + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + } +} - /* don't call nfnl_shunlock, since it would reenter - * with further packet processing */ - up(&nfnl_sem); - } while(nfnl && nfnl->sk_receive_queue.qlen); +#ifdef CONFIG_MODULES +static int nfnetlink_bind(int group) +{ + const struct nfnetlink_subsystem *ss; + int type = nfnl_group2type[group]; + + rcu_read_lock(); + ss = nfnetlink_get_subsys(type); + rcu_read_unlock(); + if (!ss) + request_module("nfnetlink-subsys-%d", type); + return 0; } +#endif -static void __exit nfnetlink_exit(void) +static int __net_init nfnetlink_net_init(struct net *net) +{ + struct sock *nfnl; + struct netlink_kernel_cfg cfg = { + .groups = NFNLGRP_MAX, + .input = nfnetlink_rcv, +#ifdef CONFIG_MODULES + .bind = nfnetlink_bind, +#endif + }; + + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); + if (!nfnl) + return -ENOMEM; + net->nfnl_stash = nfnl; + rcu_assign_pointer(net->nfnl, nfnl); + return 0; +} + +static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { - printk("Removing netfilter NETLINK layer.\n"); - sock_release(nfnl->sk_socket); - return; + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) + RCU_INIT_POINTER(net->nfnl, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->nfnl_stash); } +static struct pernet_operations nfnetlink_net_ops = { + .init = nfnetlink_net_init, + .exit_batch = nfnetlink_net_exit_batch, +}; + static int __init nfnetlink_init(void) { - printk("Netfilter messages via NETLINK v%s.\n", nfversion); + int i; - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, THIS_MODULE); - if (!nfnl) { - printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -1; - } + for (i=0; i<NFNL_SUBSYS_COUNT; i++) + mutex_init(&table[i].mutex); - return 0; + pr_info("Netfilter messages via NETLINK v%s.\n", nfversion); + return register_pernet_subsys(&nfnetlink_net_ops); } +static void __exit nfnetlink_exit(void) +{ + pr_info("Removing netfilter NETLINK layer.\n"); + unregister_pernet_subsys(&nfnetlink_net_ops); +} module_init(nfnetlink_init); module_exit(nfnetlink_exit); - -EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); -EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); -EXPORT_SYMBOL_GPL(nfnetlink_send); -EXPORT_SYMBOL_GPL(nfnetlink_unicast); -EXPORT_SYMBOL_GPL(nfattr_parse); -EXPORT_SYMBOL_GPL(__nfa_fill); |
