aboutsummaryrefslogtreecommitdiff
path: root/net/netfilter/xt_hashlimit.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/xt_hashlimit.c')
-rw-r--r--net/netfilter/xt_hashlimit.c245
1 files changed, 183 insertions, 62 deletions
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9228ee0dc11..a3910fc2122 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -3,6 +3,7 @@
* separately for each hashbucket (sourceip/sourceport/dstip/dstport)
*
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
*
* Development of this code was funded by Astaro AG, http://www.astaro.com/
@@ -21,7 +22,7 @@
#include <linux/mm.h>
#include <linux/in.h>
#include <linux/ip.h>
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#include <linux/ipv6.h>
#include <net/ipv6.h>
#endif
@@ -64,7 +65,7 @@ struct dsthash_dst {
__be32 src;
__be32 dst;
} ip;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
struct {
__be32 src[4];
__be32 dst[4];
@@ -107,6 +108,7 @@ struct xt_hashlimit_htable {
/* seq_file stuff */
struct proc_dir_entry *pde;
+ const char *name;
struct net *net;
struct hlist_head hash[0]; /* hashtable itself */
@@ -141,11 +143,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
const struct dsthash_dst *dst)
{
struct dsthash_ent *ent;
- struct hlist_node *pos;
u_int32_t hash = hash_dst(ht, dst);
if (!hlist_empty(&ht->hash[hash])) {
- hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
+ hlist_for_each_entry_rcu(ent, &ht->hash[hash], node)
if (dst_cmp(ent, dst)) {
spin_lock(&ent->lock);
return ent;
@@ -157,11 +158,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
/* allocate dsthash_ent, initialize dst, put in htable and lock it */
static struct dsthash_ent *
dsthash_alloc_init(struct xt_hashlimit_htable *ht,
- const struct dsthash_dst *dst)
+ const struct dsthash_dst *dst, bool *race)
{
struct dsthash_ent *ent;
spin_lock(&ht->lock);
+
+ /* Two or more packets may race to create the same entry in the
+ * hashtable, double check if this packet lost race.
+ */
+ ent = dsthash_find(ht, dst);
+ if (ent != NULL) {
+ spin_unlock(&ht->lock);
+ *race = true;
+ return ent;
+ }
+
/* initialize hash with random val at the time we allocate
* the first hashtable entry */
if (unlikely(!ht->rnd_initialized)) {
@@ -171,15 +183,11 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
if (ht->cfg.max && ht->count >= ht->cfg.max) {
/* FIXME: do something. question is what.. */
- if (net_ratelimit())
- pr_err("max count of %u reached\n", ht->cfg.max);
+ net_err_ratelimited("max count of %u reached\n", ht->cfg.max);
ent = NULL;
} else
ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
- if (!ent) {
- if (net_ratelimit())
- pr_err("cannot allocate dsthash_ent\n");
- } else {
+ if (ent) {
memcpy(&ent->dst, dst, sizeof(ent->dst));
spin_lock_init(&ent->lock);
@@ -247,6 +255,11 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
+ hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+ if (!hinfo->name) {
+ vfree(hinfo);
+ return -ENOMEM;
+ }
spin_lock_init(&hinfo->lock);
hinfo->pde = proc_create_data(minfo->name, 0,
@@ -254,6 +267,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
&dl_file_ops, hinfo);
if (hinfo->pde == NULL) {
+ kfree(hinfo->name);
vfree(hinfo);
return -ENOMEM;
}
@@ -290,8 +304,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
spin_lock_bh(&ht->lock);
for (i = 0; i < ht->cfg.size; i++) {
struct dsthash_ent *dh;
- struct hlist_node *pos, *n;
- hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
+ struct hlist_node *n;
+ hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
if ((*select)(ht, dh))
dsthash_free(ht, dh);
}
@@ -311,19 +325,26 @@ static void htable_gc(unsigned long htlong)
add_timer(&ht->timer);
}
-static void htable_destroy(struct xt_hashlimit_htable *hinfo)
+static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net);
struct proc_dir_entry *parent;
- del_timer_sync(&hinfo->timer);
-
if (hinfo->family == NFPROTO_IPV4)
parent = hashlimit_net->ipt_hashlimit;
else
parent = hashlimit_net->ip6t_hashlimit;
- remove_proc_entry(hinfo->pde->name, parent);
+
+ if (parent != NULL)
+ remove_proc_entry(hinfo->name, parent);
+}
+
+static void htable_destroy(struct xt_hashlimit_htable *hinfo)
+{
+ del_timer_sync(&hinfo->timer);
+ htable_remove_proc_entry(hinfo);
htable_selective_cleanup(hinfo, select_all);
+ kfree(hinfo->name);
vfree(hinfo);
}
@@ -333,10 +354,9 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
- struct hlist_node *pos;
- hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
- if (!strcmp(name, hinfo->pde->name) &&
+ hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
+ if (!strcmp(name, hinfo->name) &&
hinfo->family == family) {
hinfo->use++;
return hinfo;
@@ -391,9 +411,20 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+/* in byte mode, the lowest possible rate is one packet/second.
+ * credit_cap is used as a counter that tells us how many times we can
+ * refill the "credits available" counter when it becomes empty.
+ */
+#define MAX_CPJ_BYTES (0xFFFFFFFF / HZ)
+#define CREDITS_PER_JIFFY_BYTES POW2_BELOW32(MAX_CPJ_BYTES)
+
+static u32 xt_hashlimit_len_to_chunks(u32 len)
+{
+ return (len >> XT_HASHLIMIT_BYTE_SHIFT) + 1;
+}
+
/* Precision saver. */
-static inline u_int32_t
-user2credits(u_int32_t user)
+static u32 user2credits(u32 user)
{
/* If multiplying would overflow... */
if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
@@ -403,12 +434,53 @@ user2credits(u_int32_t user)
return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
}
-static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
+static u32 user2credits_byte(u32 user)
{
- dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
- if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
- dh->rateinfo.credit = dh->rateinfo.credit_cap;
+ u64 us = user;
+ us *= HZ * CREDITS_PER_JIFFY_BYTES;
+ return (u32) (us >> 32);
+}
+
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
+{
+ unsigned long delta = now - dh->rateinfo.prev;
+ u32 cap;
+
+ if (delta == 0)
+ return;
+
dh->rateinfo.prev = now;
+
+ if (mode & XT_HASHLIMIT_BYTES) {
+ u32 tmp = dh->rateinfo.credit;
+ dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
+ cap = CREDITS_PER_JIFFY_BYTES * HZ;
+ if (tmp >= dh->rateinfo.credit) {/* overflow */
+ dh->rateinfo.credit = cap;
+ return;
+ }
+ } else {
+ dh->rateinfo.credit += delta * CREDITS_PER_JIFFY;
+ cap = dh->rateinfo.credit_cap;
+ }
+ if (dh->rateinfo.credit > cap)
+ dh->rateinfo.credit = cap;
+}
+
+static void rateinfo_init(struct dsthash_ent *dh,
+ struct xt_hashlimit_htable *hinfo)
+{
+ dh->rateinfo.prev = jiffies;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
+ dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
+ dh->rateinfo.credit_cap = hinfo->cfg.burst;
+ } else {
+ dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+ hinfo->cfg.burst);
+ dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+ dh->rateinfo.credit_cap = dh->rateinfo.credit;
+ }
}
static inline __be32 maskl(__be32 a, unsigned int l)
@@ -416,7 +488,7 @@ static inline __be32 maskl(__be32 a, unsigned int l)
return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0;
}
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
{
switch (p) {
@@ -466,8 +538,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
return 0;
nexthdr = ip_hdr(skb)->protocol;
break;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
+ {
+ __be16 frag_off;
+
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
sizeof(dst->ip6.dst));
@@ -483,10 +558,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
(XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
return 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
if ((int)protoff < 0)
return -1;
break;
+ }
#endif
default:
BUG();
@@ -510,6 +586,21 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
return 0;
}
+static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
+{
+ u64 tmp = xt_hashlimit_len_to_chunks(len);
+ tmp = tmp * dh->rateinfo.cost;
+
+ if (unlikely(tmp > CREDITS_PER_JIFFY_BYTES * HZ))
+ tmp = CREDITS_PER_JIFFY_BYTES * HZ;
+
+ if (dh->rateinfo.credit < tmp && dh->rateinfo.credit_cap) {
+ dh->rateinfo.credit_cap--;
+ dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
+ }
+ return (u32) tmp;
+}
+
static bool
hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -518,6 +609,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
unsigned long now = jiffies;
struct dsthash_ent *dh;
struct dsthash_dst dst;
+ bool race = false;
+ u32 cost;
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
@@ -525,27 +618,32 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
rcu_read_lock_bh();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
- dh = dsthash_alloc_init(hinfo, &dst);
+ dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
rcu_read_unlock_bh();
goto hotdrop;
+ } else if (race) {
+ /* Already got an entry, update expiration timeout */
+ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+ rateinfo_recalc(dh, now, hinfo->cfg.mode);
+ } else {
+ dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
+ rateinfo_init(dh, hinfo);
}
- dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
- dh->rateinfo.prev = jiffies;
- dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
- dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
- dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
} else {
/* update expiration timeout */
dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
- rateinfo_recalc(dh, now);
+ rateinfo_recalc(dh, now, hinfo->cfg.mode);
}
- if (dh->rateinfo.credit >= dh->rateinfo.cost) {
+ if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+ cost = hashlimit_byte_cost(skb->len, dh);
+ else
+ cost = dh->rateinfo.cost;
+
+ if (dh->rateinfo.credit >= cost) {
/* below the limit */
- dh->rateinfo.credit -= dh->rateinfo.cost;
+ dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
rcu_read_unlock_bh();
return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
@@ -567,14 +665,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
int ret;
- /* Check for overflow. */
- if (info->cfg.burst == 0 ||
- user2credits(info->cfg.avg * info->cfg.burst) <
- user2credits(info->cfg.avg)) {
- pr_info("overflow, try lower: %u/%u\n",
- info->cfg.avg, info->cfg.burst);
- return -ERANGE;
- }
if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
return -EINVAL;
if (info->name[sizeof(info->name)-1] != '\0')
@@ -587,6 +677,26 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
}
+ if (info->cfg.mode & ~XT_HASHLIMIT_ALL) {
+ pr_info("Unknown mode mask %X, kernel too old?\n",
+ info->cfg.mode);
+ return -EINVAL;
+ }
+
+ /* Check for overflow. */
+ if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
+ if (user2credits_byte(info->cfg.avg) == 0) {
+ pr_info("overflow, rate too high: %u\n", info->cfg.avg);
+ return -EINVAL;
+ }
+ } else if (info->cfg.burst == 0 ||
+ user2credits(info->cfg.avg * info->cfg.burst) <
+ user2credits(info->cfg.avg)) {
+ pr_info("overflow, try lower: %u/%u\n",
+ info->cfg.avg, info->cfg.burst);
+ return -ERANGE;
+ }
+
mutex_lock(&hashlimit_mutex);
info->hinfo = htable_find_get(net, info->name, par->family);
if (info->hinfo == NULL) {
@@ -618,7 +728,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
},
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "hashlimit",
.revision = 1,
@@ -679,10 +789,11 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
int res;
+ const struct xt_hashlimit_htable *ht = s->private;
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
- rateinfo_recalc(ent, jiffies);
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode);
switch (family) {
case NFPROTO_IPV4:
@@ -695,7 +806,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
ent->rateinfo.credit, ent->rateinfo.credit_cap,
ent->rateinfo.cost);
break;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
(long)(ent->expires - jiffies)/HZ,
@@ -720,10 +831,9 @@ static int dl_seq_show(struct seq_file *s, void *v)
struct xt_hashlimit_htable *htable = s->private;
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
- struct hlist_node *pos;
if (!hlist_empty(&htable->hash[*bucket])) {
- hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
+ hlist_for_each_entry(ent, &htable->hash[*bucket], node)
if (dl_seq_real_show(ent, htable->family, s))
return -1;
}
@@ -743,7 +853,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- sf->private = PDE(inode)->data;
+ sf->private = PDE_DATA(inode);
}
return ret;
}
@@ -763,10 +873,10 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);
if (!hashlimit_net->ipt_hashlimit)
return -ENOMEM;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
if (!hashlimit_net->ip6t_hashlimit) {
- proc_net_remove(net, "ipt_hashlimit");
+ remove_proc_entry("ipt_hashlimit", net->proc_net);
return -ENOMEM;
}
#endif
@@ -775,9 +885,23 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
static void __net_exit hashlimit_proc_net_exit(struct net *net)
{
- proc_net_remove(net, "ipt_hashlimit");
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
- proc_net_remove(net, "ip6t_hashlimit");
+ struct xt_hashlimit_htable *hinfo;
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+ /* hashlimit_net_exit() is called before hashlimit_mt_destroy().
+ * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc
+ * entries is empty before trying to remove it.
+ */
+ mutex_lock(&hashlimit_mutex);
+ hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
+ htable_remove_proc_entry(hinfo);
+ hashlimit_net->ipt_hashlimit = NULL;
+ hashlimit_net->ip6t_hashlimit = NULL;
+ mutex_unlock(&hashlimit_mutex);
+
+ remove_proc_entry("ipt_hashlimit", net->proc_net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ remove_proc_entry("ip6t_hashlimit", net->proc_net);
#endif
}
@@ -791,9 +915,6 @@ static int __net_init hashlimit_net_init(struct net *net)
static void __net_exit hashlimit_net_exit(struct net *net)
{
- struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
-
- BUG_ON(!hlist_empty(&hashlimit_net->htables));
hashlimit_proc_net_exit(net);
}