diff options
Diffstat (limited to 'net/netfilter/xt_hashlimit.c')
| -rw-r--r-- | net/netfilter/xt_hashlimit.c | 245 |
1 files changed, 183 insertions, 62 deletions
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9228ee0dc11..a3910fc2122 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -3,6 +3,7 @@ * separately for each hashbucket (sourceip/sourceport/dstip/dstport) * * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * Development of this code was funded by Astaro AG, http://www.astaro.com/ @@ -21,7 +22,7 @@ #include <linux/mm.h> #include <linux/in.h> #include <linux/ip.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #include <linux/ipv6.h> #include <net/ipv6.h> #endif @@ -64,7 +65,7 @@ struct dsthash_dst { __be32 src; __be32 dst; } ip; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) struct { __be32 src[4]; __be32 dst[4]; @@ -107,6 +108,7 @@ struct xt_hashlimit_htable { /* seq_file stuff */ struct proc_dir_entry *pde; + const char *name; struct net *net; struct hlist_head hash[0]; /* hashtable itself */ @@ -141,11 +143,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) { struct dsthash_ent *ent; - struct hlist_node *pos; u_int32_t hash = hash_dst(ht, dst); if (!hlist_empty(&ht->hash[hash])) { - hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node) + hlist_for_each_entry_rcu(ent, &ht->hash[hash], node) if (dst_cmp(ent, dst)) { spin_lock(&ent->lock); return ent; @@ -157,11 +158,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht, /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * dsthash_alloc_init(struct xt_hashlimit_htable *ht, - const struct dsthash_dst *dst) + const struct dsthash_dst *dst, bool *race) { struct dsthash_ent *ent; spin_lock(&ht->lock); + + /* Two or more packets may race to create the same entry in the + * hashtable, double check if this packet lost race. + */ + ent = dsthash_find(ht, dst); + if (ent != NULL) { + spin_unlock(&ht->lock); + *race = true; + return ent; + } + /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (unlikely(!ht->rnd_initialized)) { @@ -171,15 +183,11 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, if (ht->cfg.max && ht->count >= ht->cfg.max) { /* FIXME: do something. question is what.. */ - if (net_ratelimit()) - pr_err("max count of %u reached\n", ht->cfg.max); + net_err_ratelimited("max count of %u reached\n", ht->cfg.max); ent = NULL; } else ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); - if (!ent) { - if (net_ratelimit()) - pr_err("cannot allocate dsthash_ent\n"); - } else { + if (ent) { memcpy(&ent->dst, dst, sizeof(ent->dst)); spin_lock_init(&ent->lock); @@ -247,6 +255,11 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; + hinfo->name = kstrdup(minfo->name, GFP_KERNEL); + if (!hinfo->name) { + vfree(hinfo); + return -ENOMEM; + } spin_lock_init(&hinfo->lock); hinfo->pde = proc_create_data(minfo->name, 0, @@ -254,6 +267,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, &dl_file_ops, hinfo); if (hinfo->pde == NULL) { + kfree(hinfo->name); vfree(hinfo); return -ENOMEM; } @@ -290,8 +304,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, spin_lock_bh(&ht->lock); for (i = 0; i < ht->cfg.size; i++) { struct dsthash_ent *dh; - struct hlist_node *pos, *n; - hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) { + struct hlist_node *n; + hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { if ((*select)(ht, dh)) dsthash_free(ht, dh); } @@ -311,19 +325,26 @@ static void htable_gc(unsigned long htlong) add_timer(&ht->timer); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) +static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net); struct proc_dir_entry *parent; - del_timer_sync(&hinfo->timer); - if (hinfo->family == NFPROTO_IPV4) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - remove_proc_entry(hinfo->pde->name, parent); + + if (parent != NULL) + remove_proc_entry(hinfo->name, parent); +} + +static void htable_destroy(struct xt_hashlimit_htable *hinfo) +{ + del_timer_sync(&hinfo->timer); + htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); + kfree(hinfo->name); vfree(hinfo); } @@ -333,10 +354,9 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - struct hlist_node *pos; - hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) { - if (!strcmp(name, hinfo->pde->name) && + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { + if (!strcmp(name, hinfo->name) && hinfo->family == family) { hinfo->use++; return hinfo; @@ -391,9 +411,20 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) +/* in byte mode, the lowest possible rate is one packet/second. + * credit_cap is used as a counter that tells us how many times we can + * refill the "credits available" counter when it becomes empty. + */ +#define MAX_CPJ_BYTES (0xFFFFFFFF / HZ) +#define CREDITS_PER_JIFFY_BYTES POW2_BELOW32(MAX_CPJ_BYTES) + +static u32 xt_hashlimit_len_to_chunks(u32 len) +{ + return (len >> XT_HASHLIMIT_BYTE_SHIFT) + 1; +} + /* Precision saver. */ -static inline u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -403,12 +434,53 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; } -static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) +static u32 user2credits_byte(u32 user) { - dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY; - if (dh->rateinfo.credit > dh->rateinfo.credit_cap) - dh->rateinfo.credit = dh->rateinfo.credit_cap; + u64 us = user; + us *= HZ * CREDITS_PER_JIFFY_BYTES; + return (u32) (us >> 32); +} + +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +{ + unsigned long delta = now - dh->rateinfo.prev; + u32 cap; + + if (delta == 0) + return; + dh->rateinfo.prev = now; + + if (mode & XT_HASHLIMIT_BYTES) { + u32 tmp = dh->rateinfo.credit; + dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; + cap = CREDITS_PER_JIFFY_BYTES * HZ; + if (tmp >= dh->rateinfo.credit) {/* overflow */ + dh->rateinfo.credit = cap; + return; + } + } else { + dh->rateinfo.credit += delta * CREDITS_PER_JIFFY; + cap = dh->rateinfo.credit_cap; + } + if (dh->rateinfo.credit > cap) + dh->rateinfo.credit = cap; +} + +static void rateinfo_init(struct dsthash_ent *dh, + struct xt_hashlimit_htable *hinfo) +{ + dh->rateinfo.prev = jiffies; + if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { + dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; + dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg); + dh->rateinfo.credit_cap = hinfo->cfg.burst; + } else { + dh->rateinfo.credit = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + dh->rateinfo.credit_cap = dh->rateinfo.credit; + } } static inline __be32 maskl(__be32 a, unsigned int l) @@ -416,7 +488,7 @@ static inline __be32 maskl(__be32 a, unsigned int l) return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0; } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static void hashlimit_ipv6_mask(__be32 *i, unsigned int p) { switch (p) { @@ -466,8 +538,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; nexthdr = ip_hdr(skb)->protocol; break; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: + { + __be16 frag_off; + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) { memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr, sizeof(dst->ip6.dst)); @@ -483,10 +558,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) return 0; nexthdr = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if ((int)protoff < 0) return -1; break; + } #endif default: BUG(); @@ -510,6 +586,21 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; } +static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) +{ + u64 tmp = xt_hashlimit_len_to_chunks(len); + tmp = tmp * dh->rateinfo.cost; + + if (unlikely(tmp > CREDITS_PER_JIFFY_BYTES * HZ)) + tmp = CREDITS_PER_JIFFY_BYTES * HZ; + + if (dh->rateinfo.credit < tmp && dh->rateinfo.credit_cap) { + dh->rateinfo.credit_cap--; + dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; + } + return (u32) tmp; +} + static bool hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -518,6 +609,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; + bool race = false; + u32 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -525,27 +618,32 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) rcu_read_lock_bh(); dh = dsthash_find(hinfo, &dst); if (dh == NULL) { - dh = dsthash_alloc_init(hinfo, &dst); + dh = dsthash_alloc_init(hinfo, &dst, &race); if (dh == NULL) { rcu_read_unlock_bh(); goto hotdrop; + } else if (race) { + /* Already got an entry, update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now, hinfo->cfg.mode); + } else { + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_init(dh, hinfo); } - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now); + rateinfo_recalc(dh, now, hinfo->cfg.mode); } - if (dh->rateinfo.credit >= dh->rateinfo.cost) { + if (info->cfg.mode & XT_HASHLIMIT_BYTES) + cost = hashlimit_byte_cost(skb->len, dh); + else + cost = dh->rateinfo.cost; + + if (dh->rateinfo.credit >= cost) { /* below the limit */ - dh->rateinfo.credit -= dh->rateinfo.cost; + dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); return !(info->cfg.mode & XT_HASHLIMIT_INVERT); @@ -567,14 +665,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) struct xt_hashlimit_mtinfo1 *info = par->matchinfo; int ret; - /* Check for overflow. */ - if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); - return -ERANGE; - } if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) return -EINVAL; if (info->name[sizeof(info->name)-1] != '\0') @@ -587,6 +677,26 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } + if (info->cfg.mode & ~XT_HASHLIMIT_ALL) { + pr_info("Unknown mode mask %X, kernel too old?\n", + info->cfg.mode); + return -EINVAL; + } + + /* Check for overflow. */ + if (info->cfg.mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(info->cfg.avg) == 0) { + pr_info("overflow, rate too high: %u\n", info->cfg.avg); + return -EINVAL; + } + } else if (info->cfg.burst == 0 || + user2credits(info->cfg.avg * info->cfg.burst) < + user2credits(info->cfg.avg)) { + pr_info("overflow, try lower: %u/%u\n", + info->cfg.avg, info->cfg.burst); + return -ERANGE; + } + mutex_lock(&hashlimit_mutex); info->hinfo = htable_find_get(net, info->name, par->family); if (info->hinfo == NULL) { @@ -618,7 +728,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, }, -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "hashlimit", .revision = 1, @@ -679,10 +789,11 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { int res; + const struct xt_hashlimit_htable *ht = s->private; spin_lock(&ent->lock); /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies); + rateinfo_recalc(ent, jiffies, ht->cfg.mode); switch (family) { case NFPROTO_IPV4: @@ -695,7 +806,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); break; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, @@ -720,10 +831,9 @@ static int dl_seq_show(struct seq_file *s, void *v) struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; - struct hlist_node *pos; if (!hlist_empty(&htable->hash[*bucket])) { - hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) + hlist_for_each_entry(ent, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) return -1; } @@ -743,7 +853,7 @@ static int dl_proc_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - sf->private = PDE(inode)->data; + sf->private = PDE_DATA(inode); } return ret; } @@ -763,10 +873,10 @@ static int __net_init hashlimit_proc_net_init(struct net *net) hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net); if (!hashlimit_net->ipt_hashlimit) return -ENOMEM; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net); if (!hashlimit_net->ip6t_hashlimit) { - proc_net_remove(net, "ipt_hashlimit"); + remove_proc_entry("ipt_hashlimit", net->proc_net); return -ENOMEM; } #endif @@ -775,9 +885,23 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { - proc_net_remove(net, "ipt_hashlimit"); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - proc_net_remove(net, "ip6t_hashlimit"); + struct xt_hashlimit_htable *hinfo; + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + /* hashlimit_net_exit() is called before hashlimit_mt_destroy(). + * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc + * entries is empty before trying to remove it. + */ + mutex_lock(&hashlimit_mutex); + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) + htable_remove_proc_entry(hinfo); + hashlimit_net->ipt_hashlimit = NULL; + hashlimit_net->ip6t_hashlimit = NULL; + mutex_unlock(&hashlimit_mutex); + + remove_proc_entry("ipt_hashlimit", net->proc_net); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + remove_proc_entry("ip6t_hashlimit", net->proc_net); #endif } @@ -791,9 +915,6 @@ static int __net_init hashlimit_net_init(struct net *net) static void __net_exit hashlimit_net_exit(struct net *net) { - struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - - BUG_ON(!hlist_empty(&hashlimit_net->htables)); hashlimit_proc_net_exit(net); } |
