aboutsummaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig33
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c573
-rw-r--r--net/sched/act_csum.c45
-rw-r--r--net/sched/act_gact.c49
-rw-r--r--net/sched/act_ipt.c89
-rw-r--r--net/sched/act_mirred.c65
-rw-r--r--net/sched/act_nat.c46
-rw-r--r--net/sched/act_pedit.c58
-rw-r--r--net/sched/act_police.c98
-rw-r--r--net/sched/act_simple.c73
-rw-r--r--net/sched/act_skbedit.c51
-rw-r--r--net/sched/cls_api.c144
-rw-r--r--net/sched/cls_basic.c35
-rw-r--r--net/sched/cls_bpf.c382
-rw-r--r--net/sched/cls_cgroup.c129
-rw-r--r--net/sched/cls_flow.c19
-rw-r--r--net/sched/cls_fw.c91
-rw-r--r--net/sched/cls_route.c30
-rw-r--r--net/sched/cls_rsvp.h17
-rw-r--r--net/sched/cls_tcindex.c64
-rw-r--r--net/sched/cls_u32.c65
-rw-r--r--net/sched/em_ipset.c7
-rw-r--r--net/sched/em_meta.c163
-rw-r--r--net/sched/sch_api.c34
-rw-r--r--net/sched/sch_atm.c3
-rw-r--r--net/sched/sch_cbq.c16
-rw-r--r--net/sched/sch_choke.c16
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c39
-rw-r--r--net/sched/sch_fq.c94
-rw-r--r--net/sched/sch_fq_codel.c12
-rw-r--r--net/sched/sch_generic.c19
-rw-r--r--net/sched/sch_gred.c4
-rw-r--r--net/sched/sch_hfsc.c3
-rw-r--r--net/sched/sch_hhf.c740
-rw-r--r--net/sched/sch_htb.c91
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_mq.c13
-rw-r--r--net/sched/sch_mqprio.c10
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c198
-rw-r--r--net/sched/sch_pie.c566
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_sfq.c23
-rw-r--r--net/sched/sch_tbf.c175
46 files changed, 2959 insertions, 1440 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c03a32a0418..a1a8e29e5fc 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -286,6 +286,28 @@ config NET_SCH_FQ
If unsure, say N.
+config NET_SCH_HHF
+ tristate "Heavy-Hitter Filter (HHF)"
+ help
+ Say Y here if you want to use the Heavy-Hitter Filter (HHF)
+ packet scheduling algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_hhf.
+
+config NET_SCH_PIE
+ tristate "Proportional Integral controller Enhanced (PIE) scheduler"
+ help
+ Say Y here if you want to use the Proportional Integral controller
+ Enhanced scheduler packet scheduling algorithm.
+ For more information, please see
+ http://tools.ietf.org/html/draft-pan-tsvwg-pie-00
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_pie.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
@@ -435,6 +457,7 @@ config NET_CLS_FLOW
config NET_CLS_CGROUP
tristate "Control Group Classifier"
select NET_CLS
+ select CGROUP_NET_CLASSID
depends on CGROUPS
---help---
Say Y here if you want to classify packets based on the control
@@ -443,6 +466,16 @@ config NET_CLS_CGROUP
To compile this code as a module, choose M here: the
module will be called cls_cgroup.
+config NET_CLS_BPF
+ tristate "BPF-based classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ programmable BPF (JIT'ed) filters as an alternative to ematches.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_bpf.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e5f9abe9a5d..0a869a11f3e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -40,6 +40,8 @@ obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
+obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
+obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
@@ -50,6 +52,7 @@ obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
+obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fd7072827a4..648778aef1a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,42 +27,40 @@
#include <net/act_api.h>
#include <net/netlink.h>
-void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+void tcf_hash_destroy(struct tc_action *a)
{
- unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
- struct tcf_common **p1p;
-
- for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
- if (*p1p == p) {
- write_lock_bh(hinfo->lock);
- *p1p = p->tcfc_next;
- write_unlock_bh(hinfo->lock);
- gen_kill_estimator(&p->tcfc_bstats,
- &p->tcfc_rate_est);
- /*
- * gen_estimator est_timer() might access p->tcfc_lock
- * or bstats, wait a RCU grace period before freeing p
- */
- kfree_rcu(p, tcfc_rcu);
- return;
- }
- }
- WARN_ON(1);
+ struct tcf_common *p = a->priv;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
+ spin_lock_bh(&hinfo->lock);
+ hlist_del(&p->tcfc_head);
+ spin_unlock_bh(&hinfo->lock);
+ gen_kill_estimator(&p->tcfc_bstats,
+ &p->tcfc_rate_est);
+ /*
+ * gen_estimator est_timer() might access p->tcfc_lock
+ * or bstats, wait a RCU grace period before freeing p
+ */
+ kfree_rcu(p, tcfc_rcu);
}
EXPORT_SYMBOL(tcf_hash_destroy);
-int tcf_hash_release(struct tcf_common *p, int bind,
- struct tcf_hashinfo *hinfo)
+int tcf_hash_release(struct tc_action *a, int bind)
{
+ struct tcf_common *p = a->priv;
int ret = 0;
if (p) {
if (bind)
p->tcfc_bindcnt--;
+ else if (p->tcfc_bindcnt > 0)
+ return -EPERM;
p->tcfc_refcnt--;
if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
- tcf_hash_destroy(p, hinfo);
+ if (a->ops->cleanup)
+ a->ops->cleanup(a, bind);
+ tcf_hash_destroy(a);
ret = 1;
}
}
@@ -71,20 +69,22 @@ int tcf_hash_release(struct tcf_common *p, int bind,
EXPORT_SYMBOL(tcf_hash_release);
static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
- struct tc_action *a, struct tcf_hashinfo *hinfo)
+ struct tc_action *a)
{
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct hlist_head *head;
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
- read_lock_bh(hinfo->lock);
+ spin_lock_bh(&hinfo->lock);
s_i = cb->args[0];
for (i = 0; i < (hinfo->hmask + 1); i++) {
- p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+ head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
- for (; p; p = p->tcfc_next) {
+ hlist_for_each_entry_rcu(p, head, tcfc_head) {
index++;
if (index < s_i)
continue;
@@ -107,7 +107,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
}
}
done:
- read_unlock_bh(hinfo->lock);
+ spin_unlock_bh(&hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
@@ -117,12 +117,15 @@ nla_put_failure:
goto done;
}
-static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
- struct tcf_hashinfo *hinfo)
+static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
{
- struct tcf_common *p, *s_p;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct hlist_head *head;
+ struct hlist_node *n;
+ struct tcf_common *p;
struct nlattr *nest;
int i = 0, n_i = 0;
+ int ret = -EINVAL;
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
@@ -130,14 +133,15 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
goto nla_put_failure;
for (i = 0; i < (hinfo->hmask + 1); i++) {
- p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
- while (p != NULL) {
- s_p = p->tcfc_next;
- if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
+ head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
+ hlist_for_each_entry_safe(p, n, head, tcfc_head) {
+ a->priv = p;
+ ret = tcf_hash_release(a, 0);
+ if (ret == ACT_P_DELETED) {
module_put(a->ops->owner);
- n_i++;
- p = s_p;
+ n_i++;
+ } else if (ret < 0)
+ goto nla_put_failure;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -147,51 +151,48 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
- return -EINVAL;
+ return ret;
}
-int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
- int type, struct tc_action *a)
+static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+ int type, struct tc_action *a)
{
- struct tcf_hashinfo *hinfo = a->ops->hinfo;
-
if (type == RTM_DELACTION) {
- return tcf_del_walker(skb, a, hinfo);
+ return tcf_del_walker(skb, a);
} else if (type == RTM_GETACTION) {
- return tcf_dump_walker(skb, cb, a, hinfo);
+ return tcf_dump_walker(skb, cb, a);
} else {
WARN(1, "tcf_generic_walker: unknown action %d\n", type);
return -EINVAL;
}
}
-EXPORT_SYMBOL(tcf_generic_walker);
-struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
- struct tcf_common *p;
+ struct tcf_common *p = NULL;
+ struct hlist_head *head;
- read_lock_bh(hinfo->lock);
- for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
- p = p->tcfc_next) {
+ spin_lock_bh(&hinfo->lock);
+ head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
+ hlist_for_each_entry_rcu(p, head, tcfc_head)
if (p->tcfc_index == index)
break;
- }
- read_unlock_bh(hinfo->lock);
+ spin_unlock_bh(&hinfo->lock);
return p;
}
-EXPORT_SYMBOL(tcf_hash_lookup);
-u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
+u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo)
{
- u32 val = *idx_gen;
+ u32 val = hinfo->index;
do {
if (++val == 0)
val = 1;
} while (tcf_hash_lookup(val, hinfo));
- return (*idx_gen = val);
+ hinfo->index = val;
+ return val;
}
EXPORT_SYMBOL(tcf_hash_new_index);
@@ -208,34 +209,46 @@ int tcf_hash_search(struct tc_action *a, u32 index)
}
EXPORT_SYMBOL(tcf_hash_search);
-struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
- struct tcf_hashinfo *hinfo)
+int tcf_hash_check(u32 index, struct tc_action *a, int bind)
{
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = NULL;
if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
if (bind)
p->tcfc_bindcnt++;
p->tcfc_refcnt++;
a->priv = p;
+ return 1;
}
- return p;
+ return 0;
}
EXPORT_SYMBOL(tcf_hash_check);
-struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
- struct tc_action *a, int size, int bind,
- u32 *idx_gen, struct tcf_hashinfo *hinfo)
+void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
{
+ struct tcf_common *pc = a->priv;
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
+}
+EXPORT_SYMBOL(tcf_hash_cleanup);
+
+int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
+ int size, int bind)
+{
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
if (unlikely(!p))
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
p->tcfc_refcnt = 1;
if (bind)
p->tcfc_bindcnt = 1;
spin_lock_init(&p->tcfc_lock);
- p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
+ INIT_HLIST_NODE(&p->tcfc_head);
+ p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
@@ -243,42 +256,64 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
&p->tcfc_lock, est);
if (err) {
kfree(p);
- return ERR_PTR(err);
+ return err;
}
}
a->priv = (void *) p;
- return p;
+ return 0;
}
EXPORT_SYMBOL(tcf_hash_create);
-void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+void tcf_hash_insert(struct tc_action *a)
{
+ struct tcf_common *p = a->priv;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
- write_lock_bh(hinfo->lock);
- p->tcfc_next = hinfo->htab[h];
- hinfo->htab[h] = p;
- write_unlock_bh(hinfo->lock);
+ spin_lock_bh(&hinfo->lock);
+ hlist_add_head(&p->tcfc_head, &hinfo->htab[h]);
+ spin_unlock_bh(&hinfo->lock);
}
EXPORT_SYMBOL(tcf_hash_insert);
-static struct tc_action_ops *act_base = NULL;
+static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
-int tcf_register_action(struct tc_action_ops *act)
+int tcf_register_action(struct tc_action_ops *act, unsigned int mask)
{
- struct tc_action_ops *a, **ap;
+ struct tc_action_ops *a;
+ int err;
+
+ /* Must supply act, dump and init */
+ if (!act->act || !act->dump || !act->init)
+ return -EINVAL;
+
+ /* Supply defaults */
+ if (!act->lookup)
+ act->lookup = tcf_hash_search;
+ if (!act->walk)
+ act->walk = tcf_generic_walker;
+
+ act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL);
+ if (!act->hinfo)
+ return -ENOMEM;
+ err = tcf_hashinfo_init(act->hinfo, mask);
+ if (err) {
+ kfree(act->hinfo);
+ return err;
+ }
write_lock(&act_mod_lock);
- for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
+ list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
+ tcf_hashinfo_destroy(act->hinfo);
+ kfree(act->hinfo);
return -EEXIST;
}
}
- act->next = NULL;
- *ap = act;
+ list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
return 0;
}
@@ -286,17 +321,18 @@ EXPORT_SYMBOL(tcf_register_action);
int tcf_unregister_action(struct tc_action_ops *act)
{
- struct tc_action_ops *a, **ap;
+ struct tc_action_ops *a;
int err = -ENOENT;
write_lock(&act_mod_lock);
- for (ap = &act_base; (a = *ap) != NULL; ap = &a->next)
- if (a == act)
+ list_for_each_entry(a, &act_base, head) {
+ if (a == act) {
+ list_del(&act->head);
+ tcf_hashinfo_destroy(act->hinfo);
+ kfree(act->hinfo);
+ err = 0;
break;
- if (a) {
- *ap = a->next;
- a->next = NULL;
- err = 0;
+ }
}
write_unlock(&act_mod_lock);
return err;
@@ -306,69 +342,42 @@ EXPORT_SYMBOL(tcf_unregister_action);
/* lookup by name */
static struct tc_action_ops *tc_lookup_action_n(char *kind)
{
- struct tc_action_ops *a = NULL;
+ struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
- for (a = act_base; a; a = a->next) {
+ list_for_each_entry(a, &act_base, head) {
if (strcmp(kind, a->kind) == 0) {
- if (!try_module_get(a->owner)) {
- read_unlock(&act_mod_lock);
- return NULL;
- }
+ if (try_module_get(a->owner))
+ res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
- return a;
+ return res;
}
/* lookup by nlattr */
static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
{
- struct tc_action_ops *a = NULL;
+ struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
- for (a = act_base; a; a = a->next) {
+ list_for_each_entry(a, &act_base, head) {
if (nla_strcmp(kind, a->kind) == 0) {
- if (!try_module_get(a->owner)) {
- read_unlock(&act_mod_lock);
- return NULL;
- }
+ if (try_module_get(a->owner))
+ res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
- return a;
+ return res;
}
-#if 0
-/* lookup by id */
-static struct tc_action_ops *tc_lookup_action_id(u32 type)
-{
- struct tc_action_ops *a = NULL;
-
- if (type) {
- read_lock(&act_mod_lock);
- for (a = act_base; a; a = a->next) {
- if (a->type == type) {
- if (!try_module_get(a->owner)) {
- read_unlock(&act_mod_lock);
- return NULL;
- }
- break;
- }
- }
- read_unlock(&act_mod_lock);
- }
- return a;
-}
-#endif
-
-int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
+int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
struct tcf_result *res)
{
const struct tc_action *a;
@@ -379,53 +388,44 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
ret = TC_ACT_OK;
goto exec_done;
}
- while ((a = act) != NULL) {
+ list_for_each_entry(a, actions, list) {
repeat:
- if (a->ops && a->ops->act) {
- ret = a->ops->act(skb, a, res);
- if (TC_MUNGED & skb->tc_verd) {
- /* copied already, allow trampling */
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
- skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
- }
- if (ret == TC_ACT_REPEAT)
- goto repeat; /* we need a ttl - JHS */
- if (ret != TC_ACT_PIPE)
- goto exec_done;
+ ret = a->ops->act(skb, a, res);
+ if (TC_MUNGED & skb->tc_verd) {
+ /* copied already, allow trampling */
+ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+ skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
}
- act = a->next;
+ if (ret == TC_ACT_REPEAT)
+ goto repeat; /* we need a ttl - JHS */
+ if (ret != TC_ACT_PIPE)
+ goto exec_done;
}
exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
-void tcf_action_destroy(struct tc_action *act, int bind)
+int tcf_action_destroy(struct list_head *actions, int bind)
{
- struct tc_action *a;
+ struct tc_action *a, *tmp;
+ int ret = 0;
- for (a = act; a; a = act) {
- if (a->ops && a->ops->cleanup) {
- if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
- module_put(a->ops->owner);
- act = act->next;
- kfree(a);
- } else {
- /*FIXME: Remove later - catch insertion bugs*/
- WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n");
- act = act->next;
- kfree(a);
- }
+ list_for_each_entry_safe(a, tmp, actions, list) {
+ ret = tcf_hash_release(a, bind);
+ if (ret == ACT_P_DELETED)
+ module_put(a->ops->owner);
+ else if (ret < 0)
+ return ret;
+ list_del(&a->list);
+ kfree(a);
}
+ return ret;
}
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- int err = -EINVAL;
-
- if (a->ops == NULL || a->ops->dump == NULL)
- return err;
return a->ops->dump(skb, a, bind, ref);
}
@@ -436,9 +436,6 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
- if (a->ops == NULL || a->ops->dump == NULL)
- return err;
-
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
@@ -459,14 +456,13 @@ nla_put_failure:
EXPORT_SYMBOL(tcf_action_dump_1);
int
-tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
+tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
struct nlattr *nest;
- while ((a = act) != NULL) {
- act = a->next;
+ list_for_each_entry(a, actions, list) {
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
goto nla_put_failure;
@@ -541,6 +537,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
if (a == NULL)
goto err_mod;
+ a->ops = a_o;
+ INIT_LIST_HEAD(&a->list);
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
@@ -555,7 +553,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
*/
if (err != ACT_P_CREATED)
module_put(a_o->owner);
- a->ops = a_o;
return a;
@@ -567,37 +564,33 @@ err_out:
return ERR_PTR(err);
}
-struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla,
+int tcf_action_init(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
- int bind)
+ int bind, struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
- struct tc_action *head = NULL, *act, *act_prev = NULL;
+ struct tc_action *act;
int err;
int i;
err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (err < 0)
- return ERR_PTR(err);
+ return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
- if (IS_ERR(act))
+ if (IS_ERR(act)) {
+ err = PTR_ERR(act);
goto err;
+ }
act->order = i;
-
- if (head == NULL)
- head = act;
- else
- act_prev->next = act;
- act_prev = act;
+ list_add_tail(&act->list, actions);
}
- return head;
+ return 0;
err:
- if (head != NULL)
- tcf_action_destroy(head, bind);
- return act;
+ tcf_action_destroy(actions, bind);
+ return err;
}
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
@@ -605,9 +598,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
{
int err = 0;
struct gnet_dump d;
- struct tcf_act_hdr *h = a->priv;
+ struct tcf_common *p = a->priv;
- if (h == NULL)
+ if (p == NULL)
goto errout;
/* compat_mode being true specifies a call that is supposed
@@ -616,24 +609,20 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (compat_mode) {
if (a->type == TCA_OLD_COMPAT)
err = gnet_stats_start_copy_compat(skb, 0,
- TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d);
+ TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d);
else
return 0;
} else
err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
- &h->tcf_lock, &d);
+ &p->tcfc_lock, &d);
if (err < 0)
goto errout;
- if (a->ops != NULL && a->ops->get_stats != NULL)
- if (a->ops->get_stats(skb, a) < 0)
- goto errout;
-
- if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &h->tcf_bstats,
- &h->tcf_rate_est) < 0 ||
- gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
+ if (gnet_stats_copy_basic(&d, &p->tcfc_bstats) < 0 ||
+ gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
+ &p->tcfc_rate_est) < 0 ||
+ gnet_stats_copy_queue(&d, &p->tcfc_qstats) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
@@ -646,7 +635,7 @@ errout:
}
static int
-tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
+tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq,
u16 flags, int event, int bind, int ref)
{
struct tcamsg *t;
@@ -666,7 +655,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
if (nest == NULL)
goto out_nlmsg_trim;
- if (tcf_action_dump(skb, a, bind, ref) < 0)
+ if (tcf_action_dump(skb, actions, bind, ref) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
@@ -681,14 +670,14 @@ out_nlmsg_trim:
static int
act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
- struct tc_action *a, int event)
+ struct list_head *actions, int event)
{
struct sk_buff *skb;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+ if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -696,6 +685,20 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
return rtnl_unicast(skb, net, portid);
}
+static struct tc_action *create_a(int i)
+{
+ struct tc_action *act;
+
+ act = kzalloc(sizeof(*act), GFP_KERNEL);
+ if (act == NULL) {
+ pr_debug("create_a: failed to alloc!\n");
+ return NULL;
+ }
+ act->order = i;
+ INIT_LIST_HEAD(&act->list);
+ return act;
+}
+
static struct tc_action *
tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
{
@@ -715,16 +718,14 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -ENOMEM;
- a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
+ a = create_a(0);
if (a == NULL)
goto err_out;
err = -EINVAL;
a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
- if (a->ops == NULL)
+ if (a->ops == NULL) /* could happen in batch of actions */
goto err_free;
- if (a->ops->lookup == NULL)
- goto err_mod;
err = -ENOENT;
if (a->ops->lookup(a, index) == 0)
goto err_mod;
@@ -740,29 +741,16 @@ err_out:
return ERR_PTR(err);
}
-static void cleanup_a(struct tc_action *act)
+static void cleanup_a(struct list_head *actions)
{
- struct tc_action *a;
+ struct tc_action *a, *tmp;
- for (a = act; a; a = act) {
- act = a->next;
+ list_for_each_entry_safe(a, tmp, actions, list) {
+ list_del(&a->list);
kfree(a);
}
}
-static struct tc_action *create_a(int i)
-{
- struct tc_action *act;
-
- act = kzalloc(sizeof(*act), GFP_KERNEL);
- if (act == NULL) {
- pr_debug("create_a: failed to alloc!\n");
- return NULL;
- }
- act->order = i;
- return act;
-}
-
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
@@ -774,18 +762,12 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlattr *nest;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
- struct tc_action *a = create_a(0);
+ struct tc_action a;
int err = -ENOMEM;
- if (a == NULL) {
- pr_debug("tca_action_flush: couldnt create tc_action\n");
- return err;
- }
-
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
pr_debug("tca_action_flush: failed skb alloc\n");
- kfree(a);
return err;
}
@@ -797,8 +779,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
- a->ops = tc_lookup_action(kind);
- if (a->ops == NULL)
+ memset(&a, 0, sizeof(struct tc_action));
+ INIT_LIST_HEAD(&a.list);
+ a.ops = tc_lookup_action(kind);
+ if (a.ops == NULL) /*some idjot trying to flush unknown action */
goto err_out;
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
@@ -813,7 +797,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (nest == NULL)
goto out_module_put;
- err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
+ err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a);
if (err < 0)
goto out_module_put;
if (err == 0)
@@ -823,8 +807,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
- module_put(a->ops->owner);
- kfree(a);
+ module_put(a.ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
@@ -833,21 +816,52 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
return err;
out_module_put:
- module_put(a->ops->owner);
+ module_put(a.ops->owner);
err_out:
noflush_out:
kfree_skb(skb);
- kfree(a);
return err;
}
static int
+tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+ u32 portid)
+{
+ int ret;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
+ 0, 1) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ /* now do the delete */
+ ret = tcf_action_destroy(actions, 0);
+ if (ret < 0) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (ret > 0)
+ return 0;
+ return ret;
+}
+
+static int
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
u32 portid, int event)
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
- struct tc_action *head = NULL, *act, *act_prev = NULL;
+ struct tc_action *act;
+ LIST_HEAD(actions);
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (ret < 0)
@@ -867,117 +881,62 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
act->order = i;
-
- if (head == NULL)
- head = act;
- else
- act_prev->next = act;
- act_prev = act;
+ list_add_tail(&act->list, &actions);
}
if (event == RTM_GETACTION)
- ret = act_get_notify(net, portid, n, head, event);
+ ret = act_get_notify(net, portid, n, &actions, event);
else { /* delete */
- struct sk_buff *skb;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb) {
- ret = -ENOBUFS;
- goto err;
- }
-
- if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event,
- 0, 1) <= 0) {
- kfree_skb(skb);
- ret = -EINVAL;
+ ret = tcf_del_notify(net, n, &actions, portid);
+ if (ret)
goto err;
- }
-
- /* now do the delete */
- tcf_action_destroy(head, 0);
- ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (ret > 0)
- return 0;
return ret;
}
err:
- cleanup_a(head);
+ cleanup_a(&actions);
return ret;
}
-static int tcf_add_notify(struct net *net, struct tc_action *a,
- u32 portid, u32 seq, int event, u16 flags)
+static int
+tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+ u32 portid)
{
- struct tcamsg *t;
- struct nlmsghdr *nlh;
struct sk_buff *skb;
- struct nlattr *nest;
- unsigned char *b;
int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- b = skb_tail_pointer(skb);
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
- if (!nlh)
- goto out_kfree_skb;
- t = nlmsg_data(nlh);
- t->tca_family = AF_UNSPEC;
- t->tca__pad1 = 0;
- t->tca__pad2 = 0;
-
- nest = nla_nest_start(skb, TCA_ACT_TAB);
- if (nest == NULL)
- goto out_kfree_skb;
-
- if (tcf_action_dump(skb, a, 0, 0) < 0)
- goto out_kfree_skb;
-
- nla_nest_end(skb, nest);
-
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- NETLINK_CB(skb).dst_group = RTNLGRP_TC;
+ if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
+ RTM_NEWACTION, 0, 0) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
err = 0;
return err;
-
-out_kfree_skb:
- kfree_skb(skb);
- return -1;
}
-
static int
tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
u32 portid, int ovr)
{
int ret = 0;
- struct tc_action *act;
- struct tc_action *a;
- u32 seq = n->nlmsg_seq;
+ LIST_HEAD(actions);
- act = tcf_action_init(net, nla, NULL, NULL, ovr, 0);
- if (act == NULL)
- goto done;
- if (IS_ERR(act)) {
- ret = PTR_ERR(act);
+ ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
+ if (ret)
goto done;
- }
/* dump then free all the actions after update; inserted policy
* stays intact
*/
- ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
- for (a = act; a; a = act) {
- act = a->next;
- kfree(a);
- }
+ ret = tcf_add_notify(net, n, &actions, portid);
+ cleanup_a(&actions);
done:
return ret;
}
@@ -989,7 +948,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
- if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
@@ -1084,12 +1043,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
memset(&a, 0, sizeof(struct tc_action));
a.ops = a_o;
- if (a_o->walk == NULL) {
- WARN(1, "tc_dump_action: %s !capable of dumping table\n",
- a_o->kind);
- goto out_module_put;
- }
-
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3a4c0caa1f7..edbf40dac70 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,15 +37,6 @@
#include <net/tc_act/tc_csum.h>
#define CSUM_TAB_MASK 15
-static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
-static u32 csum_idx_gen;
-static DEFINE_RWLOCK(csum_lock);
-
-static struct tcf_hashinfo csum_hash_info = {
- .htab = tcf_csum_ht,
- .hmask = CSUM_TAB_MASK,
- .lock = &csum_lock,
-};
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -56,7 +47,6 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
{
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
- struct tcf_common *pc;
struct tcf_csum *p;
int ret = 0, err;
@@ -71,39 +61,31 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
- &csum_idx_gen, &csum_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
- p = to_tcf_csum(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
- p = to_tcf_csum(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &csum_hash_info);
+ if (bind)/* dont override defaults */
+ return 0;
+ tcf_hash_release(a, bind);
+ if (!ovr)
return -EEXIST;
- }
}
+ p = to_tcf_csum(a);
spin_lock_bh(&p->tcf_lock);
p->tcf_action = parm->action;
p->update_flags = parm->update_flags;
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &csum_hash_info);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_csum_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_csum *p = a->priv;
- return tcf_hash_release(&p->common, bind, &csum_hash_info);
-}
-
/**
* tcf_csum_skb_nextlayer - Get next layer pointer
* @skb: sk_buff to use
@@ -578,16 +560,11 @@ nla_put_failure:
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
- .hinfo = &csum_hash_info,
.type = TCA_ACT_CSUM,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_csum,
.dump = tcf_csum_dump,
- .cleanup = tcf_csum_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_csum_init,
- .walk = tcf_generic_walker
};
MODULE_DESCRIPTION("Checksum updating actions");
@@ -595,7 +572,7 @@ MODULE_LICENSE("GPL");
static int __init csum_init_module(void)
{
- return tcf_register_action(&act_csum_ops);
+ return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK);
}
static void __exit csum_cleanup_module(void)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index fd2b3cff5fa..d6bcbd9f779 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -24,20 +24,11 @@
#include <net/tc_act/tc_gact.h>
#define GACT_TAB_MASK 15
-static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
-static u32 gact_idx_gen;
-static DEFINE_RWLOCK(gact_lock);
-
-static struct tcf_hashinfo gact_hash_info = {
- .htab = tcf_gact_ht,
- .hmask = GACT_TAB_MASK,
- .lock = &gact_lock,
-};
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || net_random() % gact->tcfg_pval)
+ if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
@@ -65,7 +56,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tc_gact *parm;
struct tcf_gact *gact;
- struct tcf_common *pc;
int ret = 0;
int err;
#ifdef CONFIG_GACT_PROB
@@ -94,21 +84,20 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
- pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
- bind, &gact_idx_gen, &gact_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
- tcf_hash_release(pc, bind, &gact_hash_info);
+ if (bind)/* dont override defaults */
+ return 0;
+ tcf_hash_release(a, bind);
+ if (!ovr)
return -EEXIST;
- }
}
- gact = to_gact(pc);
+ gact = to_gact(a);
spin_lock_bh(&gact->tcf_lock);
gact->tcf_action = parm->action;
@@ -121,19 +110,10 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &gact_hash_info);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_gact_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_gact *gact = a->priv;
-
- if (gact)
- return tcf_hash_release(&gact->common, bind, &gact_hash_info);
- return 0;
-}
-
static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -199,16 +179,11 @@ nla_put_failure:
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
- .hinfo = &gact_hash_info,
.type = TCA_ACT_GACT,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_gact,
.dump = tcf_gact_dump,
- .cleanup = tcf_gact_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_gact_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -222,7 +197,7 @@ static int __init gact_init_module(void)
#else
pr_info("GACT probability NOT on\n");
#endif
- return tcf_register_action(&act_gact_ops);
+ return tcf_register_action(&act_gact_ops, GACT_TAB_MASK);
}
static void __exit gact_cleanup_module(void)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60d88b6b956..8a64a0734ae 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -29,15 +29,6 @@
#define IPT_TAB_MASK 15
-static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
-static u32 ipt_idx_gen;
-static DEFINE_RWLOCK(ipt_lock);
-
-static struct tcf_hashinfo ipt_hash_info = {
- .htab = tcf_ipt_ht,
- .hmask = IPT_TAB_MASK,
- .lock = &ipt_lock,
-};
static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
{
@@ -77,22 +68,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
module_put(par.target->me);
}
-static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
+static void tcf_ipt_release(struct tc_action *a, int bind)
{
- int ret = 0;
- if (ipt) {
- if (bind)
- ipt->tcf_bindcnt--;
- ipt->tcf_refcnt--;
- if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
- ipt_destroy_target(ipt->tcfi_t);
- kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
- tcf_hash_destroy(&ipt->common, &ipt_hash_info);
- ret = ACT_P_DELETED;
- }
- }
- return ret;
+ struct tcf_ipt *ipt = to_ipt(a);
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_tname);
+ kfree(ipt->tcfi_t);
}
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -107,7 +88,6 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
- struct tcf_common *pc;
struct xt_entry_target *td, *t;
char *tname;
int ret = 0, err;
@@ -133,20 +113,20 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
- pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
- if (!pc) {
- pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
- &ipt_idx_gen, &ipt_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(index, a, bind) ) {
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
- tcf_ipt_release(to_ipt(pc), bind);
+ if (bind)/* dont override defaults */
+ return 0;
+ tcf_hash_release(a, bind);
+
+ if (!ovr)
return -EEXIST;
- }
}
- ipt = to_ipt(pc);
+ ipt = to_ipt(a);
hook = nla_get_u32(tb[TCA_IPT_HOOK]);
@@ -177,7 +157,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &ipt_hash_info);
+ tcf_hash_insert(a);
return ret;
err3:
@@ -185,21 +165,11 @@ err3:
err2:
kfree(tname);
err1:
- if (ret == ACT_P_CREATED) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
- }
+ if (ret == ACT_P_CREATED)
+ tcf_hash_cleanup(a, est);
return err;
}
-static int tcf_ipt_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_ipt *ipt = a->priv;
- return tcf_ipt_release(ipt, bind);
-}
-
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -291,30 +261,22 @@ nla_put_failure:
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
- .hinfo = &ipt_hash_info,
.type = TCA_ACT_IPT,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_cleanup,
- .lookup = tcf_hash_search,
+ .cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
- .walk = tcf_generic_walker
};
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
- .hinfo = &ipt_hash_info,
- .type = TCA_ACT_IPT,
- .capab = TCA_CAP_NONE,
+ .type = TCA_ACT_XT,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_cleanup,
- .lookup = tcf_hash_search,
+ .cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
@@ -325,16 +287,17 @@ MODULE_ALIAS("act_xt");
static int __init ipt_init_module(void)
{
int ret1, ret2;
- ret1 = tcf_register_action(&act_xt_ops);
+
+ ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK);
if (ret1 < 0)
printk("Failed to load xt action\n");
- ret2 = tcf_register_action(&act_ipt_ops);
+ ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK);
if (ret2 < 0)
printk("Failed to load ipt action\n");
- if (ret1 < 0 && ret2 < 0)
+ if (ret1 < 0 && ret2 < 0) {
return ret1;
- else
+ } else
return 0;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 977c10e0631..4f912c0e225 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,32 +30,14 @@
#include <linux/if_arp.h>
#define MIRRED_TAB_MASK 7
-static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
-static u32 mirred_idx_gen;
-static DEFINE_RWLOCK(mirred_lock);
static LIST_HEAD(mirred_list);
-static struct tcf_hashinfo mirred_hash_info = {
- .htab = tcf_mirred_ht,
- .hmask = MIRRED_TAB_MASK,
- .lock = &mirred_lock,
-};
-
-static int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static void tcf_mirred_release(struct tc_action *a, int bind)
{
- if (m) {
- if (bind)
- m->tcf_bindcnt--;
- m->tcf_refcnt--;
- if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
- list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
- tcf_hash_destroy(&m->common, &mirred_hash_info);
- return 1;
- }
- }
- return 0;
+ struct tcf_mirred *m = to_mirred(a);
+ list_del(&m->tcfm_list);
+ if (m->tcfm_dev)
+ dev_put(m->tcfm_dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -69,7 +51,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tc_mirred *parm;
struct tcf_mirred *m;
- struct tcf_common *pc;
struct net_device *dev;
int ret, ok_push = 0;
@@ -109,22 +90,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev = NULL;
}
- pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info);
- if (!pc) {
+ if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
- &mirred_idx_gen, &mirred_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (!ovr) {
- tcf_mirred_release(to_mirred(pc), bind);
+ tcf_hash_release(a, bind);
return -EEXIST;
}
}
- m = to_mirred(pc);
+ m = to_mirred(a);
spin_lock_bh(&m->tcf_lock);
m->tcf_action = parm->action;
@@ -140,21 +119,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&m->tcf_lock);
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
- tcf_hash_insert(pc, &mirred_hash_info);
+ tcf_hash_insert(a);
}
return ret;
}
-static int tcf_mirred_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_mirred *m = a->priv;
-
- if (m)
- return tcf_mirred_release(m, bind);
- return 0;
-}
-
static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -261,19 +231,14 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
-
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
- .hinfo = &mirred_hash_info,
.type = TCA_ACT_MIRRED,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_mirred,
.dump = tcf_mirred_dump,
- .cleanup = tcf_mirred_cleanup,
- .lookup = tcf_hash_search,
+ .cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002)");
@@ -287,13 +252,13 @@ static int __init mirred_init_module(void)
return err;
pr_info("Mirror/redirect action on\n");
- return tcf_register_action(&act_mirred_ops);
+ return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK);
}
static void __exit mirred_cleanup_module(void)
{
- unregister_netdevice_notifier(&mirred_device_notifier);
tcf_unregister_action(&act_mirred_ops);
+ unregister_netdevice_notifier(&mirred_device_notifier);
}
module_init(mirred_init_module);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 876f0ef2969..270a030d5fd 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -30,15 +30,6 @@
#define NAT_TAB_MASK 15
-static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1];
-static u32 nat_idx_gen;
-static DEFINE_RWLOCK(nat_lock);
-
-static struct tcf_hashinfo nat_hash_info = {
- .htab = tcf_nat_ht,
- .hmask = NAT_TAB_MASK,
- .lock = &nat_lock,
-};
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
[TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
@@ -51,7 +42,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
- struct tcf_common *pc;
if (nla == NULL)
return -EINVAL;
@@ -64,21 +54,19 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
- &nat_idx_gen, &nat_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
- p = to_tcf_nat(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
- p = to_tcf_nat(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &nat_hash_info);
+ if (bind)
+ return 0;
+ tcf_hash_release(a, bind);
+ if (!ovr)
return -EEXIST;
- }
}
+ p = to_tcf_nat(a);
spin_lock_bh(&p->tcf_lock);
p->old_addr = parm->old_addr;
@@ -90,18 +78,11 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &nat_hash_info);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_nat_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_nat *p = a->priv;
-
- return tcf_hash_release(&p->common, bind, &nat_hash_info);
-}
-
static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -301,16 +282,11 @@ nla_put_failure:
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
- .hinfo = &nat_hash_info,
.type = TCA_ACT_NAT,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_nat,
.dump = tcf_nat_dump,
- .cleanup = tcf_nat_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_nat_init,
- .walk = tcf_generic_walker
};
MODULE_DESCRIPTION("Stateless NAT actions");
@@ -318,7 +294,7 @@ MODULE_LICENSE("GPL");
static int __init nat_init_module(void)
{
- return tcf_register_action(&act_nat_ops);
+ return tcf_register_action(&act_nat_ops, NAT_TAB_MASK);
}
static void __exit nat_cleanup_module(void)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7ed78c9e505..5f9bcb2e080 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,15 +24,6 @@
#include <net/tc_act/tc_pedit.h>
#define PEDIT_TAB_MASK 15
-static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
-static u32 pedit_idx_gen;
-static DEFINE_RWLOCK(pedit_lock);
-
-static struct tcf_hashinfo pedit_hash_info = {
- .htab = tcf_pedit_ht,
- .hmask = PEDIT_TAB_MASK,
- .lock = &pedit_lock,
-};
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
@@ -46,7 +37,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct tc_pedit *parm;
int ret = 0, err;
struct tcf_pedit *p;
- struct tcf_common *pc;
struct tc_pedit_key *keys = NULL;
int ksize;
@@ -64,30 +54,27 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
return -EINVAL;
- pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
- if (!pc) {
+ if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
- &pedit_idx_gen, &pedit_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
- p = to_pedit(pc);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
+ p = to_pedit(a);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ tcf_hash_cleanup(a, est);
return -ENOMEM;
}
ret = ACT_P_CREATED;
} else {
- p = to_pedit(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &pedit_hash_info);
+ p = to_pedit(a);
+ tcf_hash_release(a, bind);
+ if (bind)
+ return 0;
+ if (!ovr)
return -EEXIST;
- }
+
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL)
@@ -106,22 +93,15 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &pedit_hash_info);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_pedit_cleanup(struct tc_action *a, int bind)
+static void tcf_pedit_cleanup(struct tc_action *a, int bind)
{
struct tcf_pedit *p = a->priv;
-
- if (p) {
- struct tc_pedit_key *keys = p->tcfp_keys;
- if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
- kfree(keys);
- return 1;
- }
- }
- return 0;
+ struct tc_pedit_key *keys = p->tcfp_keys;
+ kfree(keys);
}
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
@@ -236,16 +216,12 @@ nla_put_failure:
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
- .hinfo = &pedit_hash_info,
.type = TCA_ACT_PEDIT,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_pedit,
.dump = tcf_pedit_dump,
.cleanup = tcf_pedit_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_pedit_init,
- .walk = tcf_generic_walker
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -254,7 +230,7 @@ MODULE_LICENSE("GPL");
static int __init pedit_init_module(void)
{
- return tcf_register_action(&act_pedit_ops);
+ return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK);
}
static void __exit pedit_cleanup_module(void)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 189e3c5b3d0..0566e4606a4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -41,15 +41,6 @@ struct tcf_police {
container_of(pc, struct tcf_police, common)
#define POL_TAB_MASK 15
-static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
-static u32 police_idx_gen;
-static DEFINE_RWLOCK(police_lock);
-
-static struct tcf_hashinfo police_hash_info = {
- .htab = tcf_police_ht,
- .hmask = POL_TAB_MASK,
- .lock = &police_lock,
-};
/* old policer structure from before tc actions */
struct tc_police_compat {
@@ -67,18 +58,20 @@ struct tc_police_compat {
static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
int type, struct tc_action *a)
{
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+ struct hlist_head *head;
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
- read_lock_bh(&police_lock);
+ spin_lock_bh(&hinfo->lock);
s_i = cb->args[0];
for (i = 0; i < (POL_TAB_MASK + 1); i++) {
- p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
+ head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
- for (; p; p = p->tcfc_next) {
+ hlist_for_each_entry_rcu(p, head, tcfc_head) {
index++;
if (index < s_i)
continue;
@@ -101,7 +94,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
}
}
done:
- read_unlock_bh(&police_lock);
+ spin_unlock_bh(&hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
@@ -111,29 +104,6 @@ nla_put_failure:
goto done;
}
-static void tcf_police_destroy(struct tcf_police *p)
-{
- unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
- struct tcf_common **p1p;
-
- for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
- if (*p1p == &p->common) {
- write_lock_bh(&police_lock);
- *p1p = p->tcf_next;
- write_unlock_bh(&police_lock);
- gen_kill_estimator(&p->tcf_bstats,
- &p->tcf_rate_est);
- /*
- * gen_estimator est_timer() might access p->tcf_lock
- * or bstats, wait a RCU grace period before freeing p
- */
- kfree_rcu(p, tcf_rcu);
- return;
- }
- }
- WARN_ON(1);
-}
-
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
[TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE },
[TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
@@ -151,6 +121,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
int size;
if (nla == NULL)
@@ -168,19 +139,17 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_POLICE_TBF]);
if (parm->index) {
- struct tcf_common *pc;
-
- pc = tcf_hash_lookup(parm->index, &police_hash_info);
- if (pc != NULL) {
- a->priv = pc;
- police = to_police(pc);
+ if (tcf_hash_search(a, parm->index)) {
+ police = to_police(a->priv);
if (bind) {
police->tcf_bindcnt += 1;
police->tcf_refcnt += 1;
+ return 0;
}
if (ovr)
goto override;
- return ret;
+ /* not replacing */
+ return -EEXIST;
}
}
@@ -231,14 +200,14 @@ override:
}
if (R_tab) {
police->rate_present = true;
- psched_ratecfg_precompute(&police->rate, &R_tab->rate);
+ psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0);
qdisc_put_rtab(R_tab);
} else {
police->rate_present = false;
}
if (P_tab) {
police->peak_present = true;
- psched_ratecfg_precompute(&police->peak, &P_tab->rate);
+ psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0);
qdisc_put_rtab(P_tab);
} else {
police->peak_present = false;
@@ -264,12 +233,11 @@ override:
police->tcfp_t_c = ktime_to_ns(ktime_get());
police->tcf_index = parm->index ? parm->index :
- tcf_hash_new_index(&police_idx_gen, &police_hash_info);
+ tcf_hash_new_index(hinfo);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
- write_lock_bh(&police_lock);
- police->tcf_next = tcf_police_ht[h];
- tcf_police_ht[h] = &police->common;
- write_unlock_bh(&police_lock);
+ spin_lock_bh(&hinfo->lock);
+ hlist_add_head(&police->tcf_head, &hinfo->htab[h]);
+ spin_unlock_bh(&hinfo->lock);
a->priv = police;
return ret;
@@ -277,33 +245,13 @@ override:
failure_unlock:
spin_unlock_bh(&police->tcf_lock);
failure:
- if (P_tab)
- qdisc_put_rtab(P_tab);
- if (R_tab)
- qdisc_put_rtab(R_tab);
+ qdisc_put_rtab(P_tab);
+ qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
kfree(police);
return err;
}
-static int tcf_act_police_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_police *p = a->priv;
- int ret = 0;
-
- if (p != NULL) {
- if (bind)
- p->tcf_bindcnt--;
-
- p->tcf_refcnt--;
- if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) {
- tcf_police_destroy(p);
- ret = 1;
- }
- }
- return ret;
-}
-
static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -400,14 +348,10 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
- .hinfo = &police_hash_info,
.type = TCA_ID_POLICE,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_act_police,
.dump = tcf_act_police_dump,
- .cleanup = tcf_act_police_cleanup,
- .lookup = tcf_hash_search,
.init = tcf_act_police_locate,
.walk = tcf_act_police_walker
};
@@ -415,7 +359,7 @@ static struct tc_action_ops act_police_ops = {
static int __init
police_init_module(void)
{
- return tcf_register_action(&act_police_ops);
+ return tcf_register_action(&act_police_ops, POL_TAB_MASK);
}
static void __exit
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7725eb4ab75..992c2317ce8 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -25,15 +25,6 @@
#include <net/tc_act/tc_defact.h>
#define SIMP_TAB_MASK 7
-static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
-static u32 simp_idx_gen;
-static DEFINE_RWLOCK(simp_lock);
-
-static struct tcf_hashinfo simp_hash_info = {
- .htab = tcf_simp_ht,
- .hmask = SIMP_TAB_MASK,
- .lock = &simp_lock,
-};
#define SIMP_MAX_DATA 32
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
@@ -55,20 +46,10 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
return d->tcf_action;
}
-static int tcf_simp_release(struct tcf_defact *d, int bind)
+static void tcf_simp_release(struct tc_action *a, int bind)
{
- int ret = 0;
- if (d) {
- if (bind)
- d->tcf_bindcnt--;
- d->tcf_refcnt--;
- if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
- kfree(d->tcfd_defdata);
- tcf_hash_destroy(&d->common, &simp_hash_info);
- ret = 1;
- }
- }
- return ret;
+ struct tcf_defact *d = to_defact(a);
+ kfree(d->tcfd_defdata);
}
static int alloc_defdata(struct tcf_defact *d, char *defdata)
@@ -102,7 +83,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
- struct tcf_common *pc;
char *defdata;
int ret = 0, err;
@@ -122,47 +102,36 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_DEF_PARMS]);
defdata = nla_data(tb[TCA_DEF_DATA]);
- pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
- &simp_idx_gen, &simp_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ if (ret)
+ return ret;
- d = to_defact(pc);
+ d = to_defact(a);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ tcf_hash_cleanup(a, est);
return ret;
}
d->tcf_action = parm->action;
ret = ACT_P_CREATED;
} else {
- d = to_defact(pc);
- if (!ovr) {
- tcf_simp_release(d, bind);
+ d = to_defact(a);
+
+ if (bind)
+ return 0;
+ tcf_hash_release(a, bind);
+ if (!ovr)
return -EEXIST;
- }
+
reset_policy(d, defdata, parm);
}
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &simp_hash_info);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_simp_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_defact *d = a->priv;
-
- if (d)
- return tcf_simp_release(d, bind);
- return 0;
-}
-
static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -193,15 +162,12 @@ nla_put_failure:
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
- .hinfo = &simp_hash_info,
.type = TCA_ACT_SIMP,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_simp,
.dump = tcf_simp_dump,
- .cleanup = tcf_simp_cleanup,
+ .cleanup = tcf_simp_release,
.init = tcf_simp_init,
- .walk = tcf_generic_walker,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
@@ -210,7 +176,8 @@ MODULE_LICENSE("GPL");
static int __init simp_init_module(void)
{
- int ret = tcf_register_action(&act_simp_ops);
+ int ret;
+ ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK);
if (!ret)
pr_info("Simple TC action Loaded\n");
return ret;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cb4221171f9..fcfeeaf838b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -11,8 +11,7 @@
* more details.
*
* You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Author: Alexander Duyck <alexander.h.duyck@intel.com>
*/
@@ -29,15 +28,6 @@
#include <net/tc_act/tc_skbedit.h>
#define SKBEDIT_TAB_MASK 15
-static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
-static u32 skbedit_idx_gen;
-static DEFINE_RWLOCK(skbedit_lock);
-
-static struct tcf_hashinfo skbedit_hash_info = {
- .htab = tcf_skbedit_ht,
- .hmask = SKBEDIT_TAB_MASK,
- .lock = &skbedit_lock,
-};
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
@@ -74,7 +64,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
- struct tcf_common *pc;
u32 flags = 0, *priority = NULL, *mark = NULL;
u16 *queue_mapping = NULL;
int ret = 0, err;
@@ -109,21 +98,20 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
- &skbedit_idx_gen, &skbedit_hash_info);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ if (ret)
+ return ret;
- d = to_skbedit(pc);
+ d = to_skbedit(a);
ret = ACT_P_CREATED;
} else {
- d = to_skbedit(pc);
- if (!ovr) {
- tcf_hash_release(pc, bind, &skbedit_hash_info);
+ d = to_skbedit(a);
+ if (bind)
+ return 0;
+ tcf_hash_release(a, bind);
+ if (!ovr)
return -EEXIST;
- }
}
spin_lock_bh(&d->tcf_lock);
@@ -141,19 +129,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&d->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, &skbedit_hash_info);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_skbedit *d = a->priv;
-
- if (d)
- return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
- return 0;
-}
-
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -195,15 +174,11 @@ nla_put_failure:
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
- .hinfo = &skbedit_hash_info,
.type = TCA_ACT_SKBEDIT,
- .capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_skbedit,
.dump = tcf_skbedit_dump,
- .cleanup = tcf_skbedit_cleanup,
.init = tcf_skbedit_init,
- .walk = tcf_generic_walker,
};
MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
@@ -212,7 +187,7 @@ MODULE_LICENSE("GPL");
static int __init skbedit_init_module(void)
{
- return tcf_register_action(&act_skbedit_ops);
+ return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK);
}
static void __exit skbedit_cleanup_module(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8e118af9097..45527e6b52d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,8 +31,7 @@
#include <net/pkt_cls.h>
/* The list of all installed classifier types */
-
-static struct tcf_proto_ops *tcf_proto_base __read_mostly;
+static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
@@ -41,36 +40,35 @@ static DEFINE_RWLOCK(cls_mod_lock);
static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
{
- const struct tcf_proto_ops *t = NULL;
+ const struct tcf_proto_ops *t, *res = NULL;
if (kind) {
read_lock(&cls_mod_lock);
- for (t = tcf_proto_base; t; t = t->next) {
+ list_for_each_entry(t, &tcf_proto_base, head) {
if (nla_strcmp(kind, t->kind) == 0) {
- if (!try_module_get(t->owner))
- t = NULL;
+ if (try_module_get(t->owner))
+ res = t;
break;
}
}
read_unlock(&cls_mod_lock);
}
- return t;
+ return res;
}
/* Register(unregister) new classifier type */
int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
- struct tcf_proto_ops *t, **tp;
+ struct tcf_proto_ops *t;
int rc = -EEXIST;
write_lock(&cls_mod_lock);
- for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
+ list_for_each_entry(t, &tcf_proto_base, head)
if (!strcmp(ops->kind, t->kind))
goto out;
- ops->next = NULL;
- *tp = ops;
+ list_add_tail(&ops->head, &tcf_proto_base);
rc = 0;
out:
write_unlock(&cls_mod_lock);
@@ -80,19 +78,17 @@ EXPORT_SYMBOL(register_tcf_proto_ops);
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
- struct tcf_proto_ops *t, **tp;
+ struct tcf_proto_ops *t;
int rc = -ENOENT;
write_lock(&cls_mod_lock);
- for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
- if (t == ops)
+ list_for_each_entry(t, &tcf_proto_base, head) {
+ if (t == ops) {
+ list_del(&t->head);
+ rc = 0;
break;
-
- if (!t)
- goto out;
- *tp = t->next;
- rc = 0;
-out:
+ }
+ }
write_unlock(&cls_mod_lock);
return rc;
}
@@ -138,7 +134,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
int err;
int tp_created = 0;
- if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTFILTER) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -321,7 +318,8 @@ replay:
}
}
- err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
+ err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
+ n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
if (tp_created) {
spin_lock_bh(root_lock);
@@ -344,7 +342,7 @@ errout:
return err;
}
-static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
+static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp,
unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
@@ -366,7 +364,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
tcm->tcm_handle = fh;
if (RTM_DELTFILTER != event) {
tcm->tcm_handle = 0;
- if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
+ if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
goto nla_put_failure;
}
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -389,7 +387,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
+ if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -408,8 +406,9 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
struct tcf_walker *arg)
{
struct tcf_dump_args *a = (void *)arg;
+ struct net *net = sock_net(a->skb->sk);
- return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
+ return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
}
@@ -467,7 +466,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid,
+ if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
break;
@@ -500,46 +499,41 @@ out:
void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- if (exts->action) {
- tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
- exts->action = NULL;
- }
+ tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
+ INIT_LIST_HEAD(&exts->actions);
#endif
}
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts,
- const struct tcf_ext_map *map)
+ struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
{
- memset(exts, 0, sizeof(*exts));
-
#ifdef CONFIG_NET_CLS_ACT
{
struct tc_action *act;
- if (map->police && tb[map->police]) {
- act = tcf_action_init_1(net, tb[map->police], rate_tlv,
- "police", TCA_ACT_NOREPLACE,
+ INIT_LIST_HEAD(&exts->actions);
+ if (exts->police && tb[exts->police]) {
+ act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
+ "police", ovr,
TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
- act->type = TCA_OLD_COMPAT;
- exts->action = act;
- } else if (map->action && tb[map->action]) {
- act = tcf_action_init(net, tb[map->action], rate_tlv,
- NULL, TCA_ACT_NOREPLACE,
- TCA_ACT_BIND);
- if (IS_ERR(act))
- return PTR_ERR(act);
-
- exts->action = act;
+ act->type = exts->type = TCA_OLD_COMPAT;
+ list_add(&act->list, &exts->actions);
+ } else if (exts->action && tb[exts->action]) {
+ int err;
+ err = tcf_action_init(net, tb[exts->action], rate_tlv,
+ NULL, ovr,
+ TCA_ACT_BIND, &exts->actions);
+ if (err)
+ return err;
}
}
#else
- if ((map->action && tb[map->action]) ||
- (map->police && tb[map->police]))
+ if ((exts->action && tb[exts->action]) ||
+ (exts->police && tb[exts->police]))
return -EOPNOTSUPP;
#endif
@@ -551,43 +545,42 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
- if (src->action) {
- struct tc_action *act;
- tcf_tree_lock(tp);
- act = dst->action;
- dst->action = src->action;
- tcf_tree_unlock(tp);
- if (act)
- tcf_action_destroy(act, TCA_ACT_UNBIND);
- }
+ LIST_HEAD(tmp);
+ tcf_tree_lock(tp);
+ list_splice_init(&dst->actions, &tmp);
+ list_splice(&src->actions, &dst->actions);
+ tcf_tree_unlock(tp);
+ tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
#endif
}
EXPORT_SYMBOL(tcf_exts_change);
-int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
- const struct tcf_ext_map *map)
+#define tcf_exts_first_act(ext) \
+ list_first_entry(&(exts)->actions, struct tc_action, list)
+
+int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- if (map->action && exts->action) {
+ if (exts->action && !list_empty(&exts->actions)) {
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
struct nlattr *nest;
-
- if (exts->action->type != TCA_OLD_COMPAT) {
- nest = nla_nest_start(skb, map->action);
+ if (exts->type != TCA_OLD_COMPAT) {
+ nest = nla_nest_start(skb, exts->action);
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
+ if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- } else if (map->police) {
- nest = nla_nest_start(skb, map->police);
- if (nest == NULL)
+ } else if (exts->police) {
+ struct tc_action *act = tcf_exts_first_act(exts);
+ nest = nla_nest_start(skb, exts->police);
+ if (nest == NULL || !act)
goto nla_put_failure;
- if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
+ if (tcf_action_dump_old(skb, act, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
}
@@ -600,17 +593,14 @@ nla_put_failure: __attribute__ ((unused))
EXPORT_SYMBOL(tcf_exts_dump);
-int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
- const struct tcf_ext_map *map)
+int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- if (exts->action)
- if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
- goto nla_put_failure;
+ struct tc_action *a = tcf_exts_first_act(exts);
+ if (tcf_action_copy_stats(skb, a, 1) < 0)
+ return -1;
#endif
return 0;
-nla_put_failure: __attribute__ ((unused))
- return -1;
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d76a35d0dc8..0ae1813e3e9 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -34,16 +34,11 @@ struct basic_filter {
struct list_head link;
};
-static const struct tcf_ext_map basic_ext_map = {
- .action = TCA_BASIC_ACT,
- .police = TCA_BASIC_POLICE
-};
-
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
int r;
- struct basic_head *head = (struct basic_head *) tp->root;
+ struct basic_head *head = tp->root;
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
@@ -61,7 +56,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
{
unsigned long l = 0UL;
- struct basic_head *head = (struct basic_head *) tp->root;
+ struct basic_head *head = tp->root;
struct basic_filter *f;
if (head == NULL)
@@ -112,7 +107,7 @@ static void basic_destroy(struct tcf_proto *tp)
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct basic_head *head = (struct basic_head *) tp->root;
+ struct basic_head *head = tp->root;
struct basic_filter *t, *f = (struct basic_filter *) arg;
list_for_each_entry(t, &head->flist, link)
@@ -135,13 +130,14 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
- int err = -EINVAL;
+ int err;
struct tcf_exts e;
struct tcf_ematch_tree t;
- err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map);
+ tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
@@ -165,10 +161,10 @@ errout:
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg, bool ovr)
{
int err;
- struct basic_head *head = (struct basic_head *) tp->root;
+ struct basic_head *head = tp->root;
struct nlattr *tb[TCA_BASIC_MAX + 1];
struct basic_filter *f = (struct basic_filter *) *arg;
@@ -183,7 +179,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (f != NULL) {
if (handle && f->handle != handle)
return -EINVAL;
- return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+ return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
}
err = -ENOBUFS;
@@ -191,6 +187,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout;
+ tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
err = -EINVAL;
if (handle)
f->handle = handle;
@@ -209,7 +206,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
f->handle = head->hgenerator;
}
- err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+ err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
if (err < 0)
goto errout;
@@ -228,7 +225,7 @@ errout:
static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct basic_head *head = (struct basic_head *) tp->root;
+ struct basic_head *head = tp->root;
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
@@ -244,7 +241,7 @@ skip:
}
}
-static int basic_dump(struct tcf_proto *tp, unsigned long fh,
+static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct basic_filter *f = (struct basic_filter *) fh;
@@ -263,13 +260,13 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+ if (tcf_exts_dump(skb, &f->exts) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
new file mode 100644
index 00000000000..13f64df2c71
--- /dev/null
+++ b/net/sched/cls_bpf.c
@@ -0,0 +1,382 @@
+/*
+ * Berkeley Packet Filter based traffic classifier
+ *
+ * Might be used to classify traffic through flexible, user-defined and
+ * possibly JIT-ed BPF filters for traffic control as an alternative to
+ * ematches.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("TC BPF based classifier");
+
+struct cls_bpf_head {
+ struct list_head plist;
+ u32 hgen;
+};
+
+struct cls_bpf_prog {
+ struct sk_filter *filter;
+ struct sock_filter *bpf_ops;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct list_head link;
+ u32 handle;
+ u16 bpf_len;
+};
+
+static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
+ [TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
+ [TCA_BPF_OPS] = { .type = NLA_BINARY,
+ .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+ int ret;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ int filter_res = SK_RUN_FILTER(prog->filter, skb);
+
+ if (filter_res == 0)
+ continue;
+
+ *res = prog->res;
+ if (filter_res != -1)
+ res->classid = filter_res;
+
+ ret = tcf_exts_exec(skb, &prog->exts, res);
+ if (ret < 0)
+ continue;
+
+ return ret;
+ }
+
+ return -1;
+}
+
+static int cls_bpf_init(struct tcf_proto *tp)
+{
+ struct cls_bpf_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD(&head->plist);
+ tp->root = head;
+
+ return 0;
+}
+
+static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+{
+ tcf_unbind_filter(tp, &prog->res);
+ tcf_exts_destroy(tp, &prog->exts);
+
+ sk_unattached_filter_destroy(prog->filter);
+
+ kfree(prog->bpf_ops);
+ kfree(prog);
+}
+
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (prog == todel) {
+ tcf_tree_lock(tp);
+ list_del(&prog->link);
+ tcf_tree_unlock(tp);
+
+ cls_bpf_delete_prog(tp, prog);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static void cls_bpf_destroy(struct tcf_proto *tp)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog, *tmp;
+
+ list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+ list_del(&prog->link);
+ cls_bpf_delete_prog(tp, prog);
+ }
+
+ kfree(head);
+}
+
+static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+ unsigned long ret = 0UL;
+
+ if (head == NULL)
+ return 0UL;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (prog->handle == handle) {
+ ret = (unsigned long) prog;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct sock_filter *bpf_ops, *bpf_old;
+ struct tcf_exts exts;
+ struct sock_fprog_kern tmp;
+ struct sk_filter *fp, *fp_old;
+ u16 bpf_size, bpf_len;
+ u32 classid;
+ int ret;
+
+ if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
+ return -EINVAL;
+
+ tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+ if (ret < 0)
+ return ret;
+
+ classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
+ if (bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ bpf_size = bpf_len * sizeof(*bpf_ops);
+ bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ if (bpf_ops == NULL) {
+ ret = -ENOMEM;
+ goto errout;
+ }
+
+ memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
+
+ tmp.len = bpf_len;
+ tmp.filter = bpf_ops;
+
+ ret = sk_unattached_filter_create(&fp, &tmp);
+ if (ret)
+ goto errout_free;
+
+ tcf_tree_lock(tp);
+ fp_old = prog->filter;
+ bpf_old = prog->bpf_ops;
+
+ prog->bpf_len = bpf_len;
+ prog->bpf_ops = bpf_ops;
+ prog->filter = fp;
+ prog->res.classid = classid;
+ tcf_tree_unlock(tp);
+
+ tcf_bind_filter(tp, &prog->res, base);
+ tcf_exts_change(tp, &prog->exts, &exts);
+
+ if (fp_old)
+ sk_unattached_filter_destroy(fp_old);
+ if (bpf_old)
+ kfree(bpf_old);
+
+ return 0;
+
+errout_free:
+ kfree(bpf_ops);
+errout:
+ tcf_exts_destroy(tp, &exts);
+ return ret;
+}
+
+static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
+ struct cls_bpf_head *head)
+{
+ unsigned int i = 0x80000000;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && cls_bpf_get(tp, head->hgen));
+ if (i == 0)
+ pr_err("Insufficient number of handles\n");
+
+ return i;
+}
+
+static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg, bool ovr)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
+ struct nlattr *tb[TCA_BPF_MAX + 1];
+ int ret;
+
+ if (tca[TCA_OPTIONS] == NULL)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
+ if (ret < 0)
+ return ret;
+
+ if (prog != NULL) {
+ if (handle && prog->handle != handle)
+ return -EINVAL;
+ return cls_bpf_modify_existing(net, tp, prog, base, tb,
+ tca[TCA_RATE], ovr);
+ }
+
+ prog = kzalloc(sizeof(*prog), GFP_KERNEL);
+ if (prog == NULL)
+ return -ENOBUFS;
+
+ tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ if (handle == 0)
+ prog->handle = cls_bpf_grab_new_handle(tp, head);
+ else
+ prog->handle = handle;
+ if (prog->handle == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
+ if (ret < 0)
+ goto errout;
+
+ tcf_tree_lock(tp);
+ list_add(&prog->link, &head->plist);
+ tcf_tree_unlock(tp);
+
+ *arg = (unsigned long) prog;
+
+ return 0;
+errout:
+ if (*arg == 0UL && prog)
+ kfree(prog);
+
+ return ret;
+}
+
+static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *tm)
+{
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+ struct nlattr *nest, *nla;
+
+ if (prog == NULL)
+ return skb->len;
+
+ tm->tcm_handle = prog->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ goto nla_put_failure;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ if (tcf_exts_dump(skb, &prog->exts) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
+ .kind = "bpf",
+ .owner = THIS_MODULE,
+ .classify = cls_bpf_classify,
+ .init = cls_bpf_init,
+ .destroy = cls_bpf_destroy,
+ .get = cls_bpf_get,
+ .put = cls_bpf_put,
+ .change = cls_bpf_change,
+ .delete = cls_bpf_delete,
+ .walk = cls_bpf_walk,
+ .dump = cls_bpf_dump,
+};
+
+static int __init cls_bpf_init_mod(void)
+{
+ return register_tcf_proto_ops(&cls_bpf_ops);
+}
+
+static void __exit cls_bpf_exit_mod(void)
+{
+ unregister_tcf_proto_ops(&cls_bpf_ops);
+}
+
+module_init(cls_bpf_init_mod);
+module_exit(cls_bpf_exit_mod);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 867b4a3e398..cacf01bd04f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -11,109 +11,13 @@
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
#include <linux/skbuff.h>
-#include <linux/cgroup.h>
#include <linux/rcupdate.h>
-#include <linux/fdtable.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/cls_cgroup.h>
-static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
-{
- return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
-}
-
-static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
-{
- return css_cls_state(task_css(p, net_cls_subsys_id));
-}
-
-static struct cgroup_subsys_state *
-cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
-{
- struct cgroup_cls_state *cs;
-
- cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs)
- return ERR_PTR(-ENOMEM);
- return &cs->css;
-}
-
-static int cgrp_css_online(struct cgroup_subsys_state *css)
-{
- struct cgroup_cls_state *cs = css_cls_state(css);
- struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
-
- if (parent)
- cs->classid = parent->classid;
- return 0;
-}
-
-static void cgrp_css_free(struct cgroup_subsys_state *css)
-{
- kfree(css_cls_state(css));
-}
-
-static int update_classid(const void *v, struct file *file, unsigned n)
-{
- int err;
- struct socket *sock = sock_from_file(file, &err);
- if (sock)
- sock->sk->sk_classid = (u32)(unsigned long)v;
- return 0;
-}
-
-static void cgrp_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
-{
- struct task_struct *p;
- void *v;
-
- cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- v = (void *)(unsigned long)task_cls_classid(p);
- iterate_fd(p->files, 0, update_classid, v);
- task_unlock(p);
- }
-}
-
-static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
-{
- return css_cls_state(css)->classid;
-}
-
-static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 value)
-{
- css_cls_state(css)->classid = (u32) value;
- return 0;
-}
-
-static struct cftype ss_files[] = {
- {
- .name = "classid",
- .read_u64 = read_classid,
- .write_u64 = write_classid,
- },
- { } /* terminate */
-};
-
-struct cgroup_subsys net_cls_subsys = {
- .name = "net_cls",
- .css_alloc = cgrp_css_alloc,
- .css_online = cgrp_css_online,
- .css_free = cgrp_css_free,
- .attach = cgrp_attach,
- .subsys_id = net_cls_subsys_id,
- .base_cftypes = ss_files,
- .module = THIS_MODULE,
-};
-
struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
@@ -172,11 +76,6 @@ static int cls_cgroup_init(struct tcf_proto *tp)
return 0;
}
-static const struct tcf_ext_map cgroup_ext_map = {
- .action = TCA_CGROUP_ACT,
- .police = TCA_CGROUP_POLICE,
-};
-
static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
@@ -184,7 +83,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
struct cls_cgroup_head *head = tp->root;
@@ -203,6 +102,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (head == NULL)
return -ENOBUFS;
+ tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
head->handle = handle;
tcf_tree_lock(tp);
@@ -218,8 +118,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e,
- &cgroup_ext_map);
+ tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
@@ -264,7 +164,7 @@ skip:
arg->count++;
}
-static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct cls_cgroup_head *head = tp->root;
@@ -277,13 +177,13 @@ static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+ if (tcf_exts_dump(skb, &head->exts) < 0 ||
tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &head->exts) < 0)
goto nla_put_failure;
return skb->len;
@@ -309,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
static int __init init_cgroup_cls(void)
{
- int ret;
-
- ret = cgroup_load_subsys(&net_cls_subsys);
- if (ret)
- goto out;
-
- ret = register_tcf_proto_ops(&cls_cgroup_ops);
- if (ret)
- cgroup_unload_subsys(&net_cls_subsys);
-
-out:
- return ret;
+ return register_tcf_proto_ops(&cls_cgroup_ops);
}
static void __exit exit_cgroup_cls(void)
{
unregister_tcf_proto_ops(&cls_cgroup_ops);
-
- cgroup_unload_subsys(&net_cls_subsys);
}
module_init(init_cgroup_cls);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 7881e2fccbc..35be16f7c19 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -56,11 +56,6 @@ struct flow_filter {
u32 hashrnd;
};
-static const struct tcf_ext_map flow_ext_map = {
- .action = TCA_FLOW_ACT,
- .police = TCA_FLOW_POLICE,
-};
-
static inline u32 addr_fold(void *addr)
{
unsigned long a = (unsigned long)addr;
@@ -220,7 +215,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
static u32 flow_get_rxhash(struct sk_buff *skb)
{
- return skb_get_rxhash(skb);
+ return skb_get_hash(skb);
}
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
@@ -354,7 +349,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct flow_head *head = tp->root;
struct flow_filter *f;
@@ -397,7 +392,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
return -EOPNOTSUPP;
}
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
+ tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
@@ -455,6 +451,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
f->handle = handle;
f->mask = ~0U;
+ tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
get_random_bytes(&f->hashrnd, 4);
f->perturb_timer.function = flow_perturbation;
@@ -566,7 +563,7 @@ static void flow_put(struct tcf_proto *tp, unsigned long f)
{
}
-static int flow_dump(struct tcf_proto *tp, unsigned long fh,
+static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct flow_filter *f = (struct flow_filter *)fh;
@@ -608,7 +605,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
#ifdef CONFIG_NET_EMATCH
if (f->ematches.hdr.nmatches &&
@@ -617,7 +614,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
#endif
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9b97172db84..861b03ccfed 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -29,11 +29,11 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
+#define HTSIZE 256
struct fw_head {
- struct fw_filter *ht[HTSIZE];
- u32 mask;
+ u32 mask;
+ struct fw_filter *ht[HTSIZE];
};
struct fw_filter {
@@ -41,46 +41,22 @@ struct fw_filter {
u32 id;
struct tcf_result res;
#ifdef CONFIG_NET_CLS_IND
- char indev[IFNAMSIZ];
+ int ifindex;
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
};
-static const struct tcf_ext_map fw_ext_map = {
- .action = TCA_FW_ACT,
- .police = TCA_FW_POLICE
-};
-
-static inline int fw_hash(u32 handle)
+static u32 fw_hash(u32 handle)
{
- if (HTSIZE == 4096)
- return ((handle >> 24) & 0xFFF) ^
- ((handle >> 12) & 0xFFF) ^
- (handle & 0xFFF);
- else if (HTSIZE == 2048)
- return ((handle >> 22) & 0x7FF) ^
- ((handle >> 11) & 0x7FF) ^
- (handle & 0x7FF);
- else if (HTSIZE == 1024)
- return ((handle >> 20) & 0x3FF) ^
- ((handle >> 10) & 0x3FF) ^
- (handle & 0x3FF);
- else if (HTSIZE == 512)
- return (handle >> 27) ^
- ((handle >> 18) & 0x1FF) ^
- ((handle >> 9) & 0x1FF) ^
- (handle & 0x1FF);
- else if (HTSIZE == 256) {
- u8 *t = (u8 *) &handle;
- return t[0] ^ t[1] ^ t[2] ^ t[3];
- } else
- return handle & (HTSIZE - 1);
+ handle ^= (handle >> 16);
+ handle ^= (handle >> 8);
+ return handle % HTSIZE;
}
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
struct fw_filter *f;
int r;
u32 id = skb->mark;
@@ -91,7 +67,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (f->id == id) {
*res = f->res;
#ifdef CONFIG_NET_CLS_IND
- if (!tcf_match_indev(skb, f->indev))
+ if (!tcf_match_indev(skb, f->ifindex))
continue;
#endif /* CONFIG_NET_CLS_IND */
r = tcf_exts_exec(skb, &f->exts, res);
@@ -116,7 +92,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
struct fw_filter *f;
if (head == NULL)
@@ -165,7 +141,7 @@ static void fw_destroy(struct tcf_proto *tp)
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
struct fw_filter *f = (struct fw_filter *)arg;
struct fw_filter **fp;
@@ -193,14 +169,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int
fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
- struct nlattr **tb, struct nlattr **tca, unsigned long base)
+ struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
struct tcf_exts e;
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
+ tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
@@ -211,9 +188,13 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FW_INDEV]) {
- err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]);
- if (err < 0)
+ int ret;
+ ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
+ if (ret < 0) {
+ err = ret;
goto errout;
+ }
+ f->ifindex = ret;
}
#endif /* CONFIG_NET_CLS_IND */
@@ -237,9 +218,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
struct fw_filter *f = (struct fw_filter *) *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
@@ -255,7 +236,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f != NULL) {
if (f->id != handle && handle)
return -EINVAL;
- return fw_change_attrs(net, tp, f, tb, tca, base);
+ return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
}
if (!handle)
@@ -280,9 +261,10 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
return -ENOBUFS;
+ tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
f->id = handle;
- err = fw_change_attrs(net, tp, f, tb, tca, base);
+ err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
if (err < 0)
goto errout;
@@ -301,7 +283,7 @@ errout:
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
int h;
if (head == NULL)
@@ -327,10 +309,10 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int fw_dump(struct tcf_proto *tp, unsigned long fh,
+static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct fw_head *head = (struct fw_head *)tp->root;
+ struct fw_head *head = tp->root;
struct fw_filter *f = (struct fw_filter *)fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -351,20 +333,23 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid))
goto nla_put_failure;
#ifdef CONFIG_NET_CLS_IND
- if (strlen(f->indev) &&
- nla_put_string(skb, TCA_FW_INDEV, f->indev))
- goto nla_put_failure;
+ if (f->ifindex) {
+ struct net_device *dev;
+ dev = __dev_get_by_index(net, f->ifindex);
+ if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name))
+ goto nla_put_failure;
+ }
#endif /* CONFIG_NET_CLS_IND */
if (head->mask != 0xFFFFFFFF &&
nla_put_u32(skb, TCA_FW_MASK, head->mask))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 37da567d833..dd9fc2523c7 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -59,11 +59,6 @@ struct route4_filter {
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
-static const struct tcf_ext_map route_ext_map = {
- .police = TCA_ROUTE4_POLICE,
- .action = TCA_ROUTE4_ACT
-};
-
static inline int route4_fastmap_hash(u32 id, int iif)
{
return id & 0xF;
@@ -128,7 +123,7 @@ static inline int route4_hash_wild(void)
static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct route4_head *head = (struct route4_head *)tp->root;
+ struct route4_head *head = tp->root;
struct dst_entry *dst;
struct route4_bucket *b;
struct route4_filter *f;
@@ -218,7 +213,7 @@ static inline u32 from_hash(u32 id)
static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
{
- struct route4_head *head = (struct route4_head *)tp->root;
+ struct route4_head *head = tp->root;
struct route4_bucket *b;
struct route4_filter *f;
unsigned int h1, h2;
@@ -289,7 +284,7 @@ static void route4_destroy(struct tcf_proto *tp)
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct route4_head *head = (struct route4_head *)tp->root;
+ struct route4_head *head = tp->root;
struct route4_filter **fp, *f = (struct route4_filter *)arg;
unsigned int h = 0;
struct route4_bucket *b;
@@ -338,7 +333,8 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
static int route4_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
- struct nlattr **tb, struct nlattr *est, int new)
+ struct nlattr **tb, struct nlattr *est, int new,
+ bool ovr)
{
int err;
u32 id = 0, to = 0, nhandle = 0x8000;
@@ -347,7 +343,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
struct tcf_exts e;
- err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map);
+ tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
@@ -432,7 +429,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct route4_head *head = tp->root;
struct route4_filter *f, *f1, **fp;
@@ -459,7 +456,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
old_handle = f->handle;
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 0);
+ tca[TCA_RATE], 0, ovr);
if (err < 0)
return err;
@@ -481,8 +478,9 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout;
+ tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 1);
+ tca[TCA_RATE], 1, ovr);
if (err < 0)
goto errout;
@@ -554,7 +552,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int route4_dump(struct tcf_proto *tp, unsigned long fh,
+static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct route4_filter *f = (struct route4_filter *)fh;
@@ -589,12 +587,12 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_ROUTE4_CLASSID, f->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 252d8b05872..1020e233a5d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -116,11 +116,6 @@ static inline unsigned int hash_src(__be32 *src)
return h & 0xF;
}
-static struct tcf_ext_map rsvp_ext_map = {
- .police = TCA_RSVP_POLICE,
- .action = TCA_RSVP_ACT
-};
-
#define RSVP_APPLY_RESULT() \
{ \
int r = tcf_exts_exec(skb, &f->exts, res); \
@@ -420,7 +415,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct rsvp_head *data = tp->root;
struct rsvp_filter *f, **fp;
@@ -440,7 +435,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
+ tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
@@ -471,6 +467,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout2;
+ tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
h2 = 16;
if (tb[TCA_RSVP_SRC]) {
memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -597,7 +594,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
+static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct rsvp_filter *f = (struct rsvp_filter *)fh;
@@ -633,12 +630,12 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index b86535a4016..c721cd4a469 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -24,9 +24,6 @@
#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
-#define PRIV(tp) ((struct tcindex_data *) (tp)->root)
-
-
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
@@ -50,11 +47,6 @@ struct tcindex_data {
int fall_through; /* 0: only classify if explicit match */
};
-static const struct tcf_ext_map tcindex_ext_map = {
- .police = TCA_TCINDEX_POLICE,
- .action = TCA_TCINDEX_ACT
-};
-
static inline int
tcindex_filter_is_set(struct tcindex_filter_result *r)
{
@@ -82,7 +74,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key)
static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcindex_filter_result *f;
int key = (skb->tc_index & p->mask) >> p->shift;
@@ -107,7 +99,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
{
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
@@ -145,7 +137,7 @@ static int tcindex_init(struct tcf_proto *tp)
static int
__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
{
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
struct tcindex_filter *f = NULL;
@@ -196,11 +188,17 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
+static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+{
+ memset(r, 0, sizeof(*r));
+ tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
int err, balloc = 0;
struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -209,17 +207,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_exts e;
- err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map);
+ tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
memcpy(&cp, p, sizeof(cp));
- memset(&new_filter_result, 0, sizeof(new_filter_result));
+ tcindex_filter_result_init(&new_filter_result);
+ tcindex_filter_result_init(&cr);
if (old_r)
- memcpy(&cr, r, sizeof(cr));
- else
- memset(&cr, 0, sizeof(cr));
+ cr.res = r->res;
if (tb[TCA_TCINDEX_HASH])
cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -271,9 +269,14 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = -ENOMEM;
if (!cp.perfect && !cp.h) {
if (valid_perfect_hash(&cp)) {
+ int i;
+
cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
if (!cp.perfect)
goto errout;
+ for (i = 0; i < cp.hash; i++)
+ tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+ TCA_TCINDEX_POLICE);
balloc = 1;
} else {
cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
@@ -299,14 +302,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
tcf_bind_filter(tp, &cr.res, base);
}
- tcf_exts_change(tp, &cr.exts, &e);
+ if (old_r)
+ tcf_exts_change(tp, &r->exts, &e);
+ else
+ tcf_exts_change(tp, &cr.exts, &e);
tcf_tree_lock(tp);
if (old_r && old_r != r)
- memset(old_r, 0, sizeof(*old_r));
+ tcindex_filter_result_init(old_r);
memcpy(p, &cp, sizeof(cp));
- memcpy(r, &cr, sizeof(cr));
+ r->res = cr.res;
if (r == &new_filter_result) {
struct tcindex_filter **fp;
@@ -335,11 +341,11 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg, bool ovr)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
int err;
@@ -355,13 +361,13 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE]);
+ tca[TCA_RATE], ovr);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
{
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcindex_filter *f, *next;
int i;
@@ -408,7 +414,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
static void tcindex_destroy(struct tcf_proto *tp)
{
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcf_walker walker;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
@@ -423,10 +429,10 @@ static void tcindex_destroy(struct tcf_proto *tp)
}
-static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
+static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct tcindex_data *p = PRIV(tp);
+ struct tcindex_data *p = tp->root;
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -468,11 +474,11 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
+ if (tcf_exts_dump(skb, &r->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &r->exts) < 0)
goto nla_put_failure;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index eb07a1e536e..70c0be8d012 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -38,6 +38,7 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/bitmap.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -48,7 +49,7 @@ struct tc_u_knode {
struct tc_u_hnode *ht_up;
struct tcf_exts exts;
#ifdef CONFIG_NET_CLS_IND
- char indev[IFNAMSIZ];
+ int ifindex;
#endif
u8 fshift;
struct tcf_result res;
@@ -79,11 +80,6 @@ struct tc_u_common {
u32 hgenerator;
};
-static const struct tcf_ext_map u32_ext_map = {
- .action = TCA_U32_ACT,
- .police = TCA_U32_POLICE
-};
-
static inline unsigned int u32_hash_fold(__be32 key,
const struct tc_u32_sel *sel,
u8 fshift)
@@ -100,7 +96,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
unsigned int off;
} stack[TC_U32_MAXDEPTH];
- struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
+ struct tc_u_hnode *ht = tp->root;
unsigned int off = skb_network_offset(skb);
struct tc_u_knode *n;
int sdepth = 0;
@@ -157,7 +153,7 @@ check_terminal:
*res = n->res;
#ifdef CONFIG_NET_CLS_IND
- if (!tcf_match_indev(skb, n->indev)) {
+ if (!tcf_match_indev(skb, n->ifindex)) {
n = n->next;
goto next_knode;
}
@@ -352,7 +348,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
return 0;
}
-static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
+static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
struct tc_u_knode **kp;
struct tc_u_hnode *ht = key->ht_up;
@@ -465,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
}
+#define NR_U32_NODE (1<<12)
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
struct tc_u_knode *n;
- unsigned int i = 0x7FF;
+ unsigned long i;
+ unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!bitmap)
+ return handle | 0xFFF;
for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
- if (i < TC_U32_NODE(n->handle))
- i = TC_U32_NODE(n->handle);
- i++;
+ set_bit(TC_U32_NODE(n->handle), bitmap);
+
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
+ if (i >= NR_U32_NODE)
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
- return handle | (i > 0xFFF ? 0xFFF : i);
+ kfree(bitmap);
+ return handle | (i >= NR_U32_NODE ? 0xFFF : i);
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -491,12 +495,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct tc_u_hnode *ht,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
int err;
struct tcf_exts e;
- err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map);
+ tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
@@ -531,9 +536,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_U32_INDEV]) {
- err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]);
- if (err < 0)
+ int ret;
+ ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
+ if (ret < 0)
goto errout;
+ n->ifindex = ret;
}
#endif
tcf_exts_change(tp, &n->exts, &e);
@@ -547,7 +554,7 @@ errout:
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -571,7 +578,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
return u32_set_parms(net, tp, base, n->ht_up, n, tb,
- tca[TCA_RATE]);
+ tca[TCA_RATE], ovr);
}
if (tb[TCA_U32_DIVISOR]) {
@@ -646,6 +653,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->ht_up = ht;
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
+ tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
#ifdef CONFIG_CLS_U32_MARK
if (tb[TCA_U32_MARK]) {
@@ -657,7 +665,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]);
+ err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
if (err == 0) {
struct tc_u_knode **ins;
for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
@@ -715,7 +723,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int u32_dump(struct tcf_proto *tp, unsigned long fh,
+static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode *)fh;
@@ -759,13 +767,16 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
#endif
- if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
+ if (tcf_exts_dump(skb, &n->exts) < 0)
goto nla_put_failure;
#ifdef CONFIG_NET_CLS_IND
- if (strlen(n->indev) &&
- nla_put_string(skb, TCA_U32_INDEV, n->indev))
- goto nla_put_failure;
+ if (n->ifindex) {
+ struct net_device *dev;
+ dev = __dev_get_by_index(net, n->ifindex);
+ if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
+ goto nla_put_failure;
+ }
#endif
#ifdef CONFIG_CLS_U32_PERF
if (nla_put(skb, TCA_U32_PCNT,
@@ -778,7 +789,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
nla_nest_end(skb, nest);
if (TC_U32_KEY(n->handle))
- if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &n->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 938b7cbf562..527aeb7a3ff 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
{
struct xt_set_info *set = data;
ip_set_id_t index;
+ struct net *net = dev_net(qdisc_dev(tp->q));
if (data_len != sizeof(*set))
return -EINVAL;
- index = ip_set_nfnl_get_byindex(set->index);
+ index = ip_set_nfnl_get_byindex(net, set->index);
if (index == IPSET_INVALID_ID)
return -ENOENT;
@@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
if (em->data)
return 0;
- ip_set_nfnl_put(index);
+ ip_set_nfnl_put(net, index);
return -ENOMEM;
}
@@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
{
const struct xt_set_info *set = (const void *) em->data;
if (set) {
- ip_set_nfnl_put(set->index);
+ ip_set_nfnl_put(dev_net(qdisc_dev(p->q)), set->index);
kfree((void *) em->data);
}
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 7c3de6ffa51..9b8c0b0e60d 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -222,7 +222,7 @@ META_COLLECTOR(int_maclen)
META_COLLECTOR(int_rxhash)
{
- dst->value = skb_get_rxhash(skb);
+ dst->value = skb_get_hash(skb);
}
/**************************************************************************
@@ -271,40 +271,52 @@ META_COLLECTOR(int_rtiif)
* Socket Attributes
**************************************************************************/
-#define SKIP_NONLOCAL(skb) \
- if (unlikely(skb->sk == NULL)) { \
- *err = -1; \
- return; \
- }
+#define skip_nonlocal(skb) \
+ (unlikely(skb->sk == NULL))
META_COLLECTOR(int_sk_family)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_family;
}
META_COLLECTOR(int_sk_state)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_state;
}
META_COLLECTOR(int_sk_reuse)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_reuse;
}
META_COLLECTOR(int_sk_bound_if)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
/* No error if bound_dev_if is 0, legal userspace check */
dst->value = skb->sk->sk_bound_dev_if;
}
META_COLLECTOR(var_sk_bound_if)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
if (skb->sk->sk_bound_dev_if == 0) {
dst->value = (unsigned long) "any";
@@ -322,151 +334,226 @@ META_COLLECTOR(var_sk_bound_if)
META_COLLECTOR(int_sk_refcnt)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = atomic_read(&skb->sk->sk_refcnt);
}
META_COLLECTOR(int_sk_rcvbuf)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_rcvbuf;
}
META_COLLECTOR(int_sk_shutdown)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_shutdown;
}
META_COLLECTOR(int_sk_proto)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_protocol;
}
META_COLLECTOR(int_sk_type)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_type;
}
META_COLLECTOR(int_sk_rmem_alloc)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = sk_rmem_alloc_get(skb->sk);
}
META_COLLECTOR(int_sk_wmem_alloc)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = sk_wmem_alloc_get(skb->sk);
}
META_COLLECTOR(int_sk_omem_alloc)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = atomic_read(&skb->sk->sk_omem_alloc);
}
META_COLLECTOR(int_sk_rcv_qlen)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_receive_queue.qlen;
}
META_COLLECTOR(int_sk_snd_qlen)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_write_queue.qlen;
}
META_COLLECTOR(int_sk_wmem_queued)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_wmem_queued;
}
META_COLLECTOR(int_sk_fwd_alloc)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_forward_alloc;
}
META_COLLECTOR(int_sk_sndbuf)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_sndbuf;
}
META_COLLECTOR(int_sk_alloc)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = (__force int) skb->sk->sk_allocation;
}
META_COLLECTOR(int_sk_hash)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_hash;
}
META_COLLECTOR(int_sk_lingertime)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_lingertime / HZ;
}
META_COLLECTOR(int_sk_err_qlen)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_error_queue.qlen;
}
META_COLLECTOR(int_sk_ack_bl)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_ack_backlog;
}
META_COLLECTOR(int_sk_max_ack_bl)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_max_ack_backlog;
}
META_COLLECTOR(int_sk_prio)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_priority;
}
META_COLLECTOR(int_sk_rcvlowat)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_rcvlowat;
}
META_COLLECTOR(int_sk_rcvtimeo)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_rcvtimeo / HZ;
}
META_COLLECTOR(int_sk_sndtimeo)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_sndtimeo / HZ;
}
META_COLLECTOR(int_sk_sendmsg_off)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_frag.offset;
}
META_COLLECTOR(int_sk_write_pend)
{
- SKIP_NONLOCAL(skb);
+ if (skip_nonlocal(skb)) {
+ *err = -1;
+ return;
+ }
dst->value = skb->sk->sk_write_pending;
}
@@ -793,8 +880,10 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
goto errout;
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
- if (meta == NULL)
+ if (meta == NULL) {
+ err = -ENOMEM;
goto errout;
+ }
memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2adda7fa2d3..58bed7599db 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -135,7 +135,7 @@ static DEFINE_RWLOCK(qdisc_mod_lock);
static struct Qdisc_ops *qdisc_base;
-/* Register/uregister queueing discipline */
+/* Register/unregister queueing discipline */
int register_qdisc(struct Qdisc_ops *qops)
{
@@ -271,11 +271,16 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
return NULL;
}
-static void qdisc_list_add(struct Qdisc *q)
+void qdisc_list_add(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
- list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ struct Qdisc *root = qdisc_dev(q)->qdisc;
+
+ WARN_ON_ONCE(root == &noop_qdisc);
+ list_add_tail(&q->list, &root->list);
+ }
}
+EXPORT_SYMBOL(qdisc_list_add);
void qdisc_list_del(struct Qdisc *q)
{
@@ -558,7 +563,7 @@ out:
}
EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
{
if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
@@ -737,9 +742,11 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
+ int drops;
if (n == 0)
return;
+ drops = max_t(int, n, 0);
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
return;
@@ -756,6 +763,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.drops += drops;
}
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
@@ -1076,7 +1084,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETQDISC) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1143,7 +1152,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *q, *p;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -1296,6 +1305,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
struct gnet_dump d;
struct qdisc_size_table *stab;
+ cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
@@ -1427,9 +1437,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
- rcu_read_lock();
idx = 0;
- for_each_netdev_rcu(net, dev) {
+ ASSERT_RTNL();
+ for_each_netdev(net, dev) {
struct netdev_queue *dev_queue;
if (idx < s_idx)
@@ -1452,8 +1462,6 @@ cont:
}
done:
- rcu_read_unlock();
-
cb->args[0] = idx;
cb->args[1] = q_idx;
@@ -1483,7 +1491,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTCLASS) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1610,6 +1619,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
struct gnet_dump d;
const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+ cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1f9c31411f1..8449b337f9e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -623,8 +623,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
goto nla_put_failure;
}
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 7a42c81a19e..ead526467cc 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1058,9 +1058,10 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
q->quanta[prio];
}
- if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
- pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
- cl->common.classid, cl->quantum);
+ if (cl->quantum <= 0 ||
+ cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
+ pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+ cl->common.classid, cl->quantum);
cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
}
}
@@ -1562,8 +1563,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (cbq_dump_attr(skb, &q->link) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
@@ -1598,8 +1598,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
goto nla_put_failure;
if (cbq_dump_attr(skb, cl) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
@@ -1782,8 +1781,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err) {
- if (rtab)
- qdisc_put_rtab(rtab);
+ qdisc_put_rtab(rtab);
return err;
}
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ddd73cb2d7b..ed30e436128 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -14,7 +14,6 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
-#include <linux/reciprocal_div.h>
#include <linux/vmalloc.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
@@ -77,12 +76,6 @@ struct choke_sched_data {
struct sk_buff **tab;
};
-/* deliver a random number between 0 and N - 1 */
-static u32 random_N(unsigned int N)
-{
- return reciprocal_divide(prandom_u32(), N);
-}
-
/* number of elements in queue including holes */
static unsigned int choke_len(const struct choke_sched_data *q)
{
@@ -233,7 +226,7 @@ static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
int retrys = 3;
do {
- *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
+ *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
skb = q->tab[*pidx];
if (skb)
return skb;
@@ -398,12 +391,7 @@ static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
static void choke_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static int choke_change(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8302717ea30..7bbbfe11219 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -391,8 +391,10 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
while (1) {
cl = list_first_entry(&q->active, struct drr_class, alist);
skb = cl->qdisc->ops->peek(cl->qdisc);
- if (skb == NULL)
+ if (skb == NULL) {
+ qdisc_warn_nonwc(__func__, cl->qdisc);
goto out;
+ }
len = qdisc_pkt_len(skb);
if (len <= cl->deficit) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3886365cc20..49d6ef338b5 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -47,7 +47,7 @@ struct dsmark_qdisc_data {
static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
{
- return (index <= p->indices && index > 0);
+ return index <= p->indices && index > 0;
}
/* ------------------------- Class/flow operations ------------------------- */
@@ -57,8 +57,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
- sch, p, new, old);
+ pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
+ __func__, sch, p, new, old);
if (new == NULL) {
new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
@@ -85,8 +85,8 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
{
- pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
- sch, qdisc_priv(sch), classid);
+ pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+ __func__, sch, qdisc_priv(sch), classid);
return TC_H_MIN(classid) + 1;
}
@@ -118,8 +118,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
int err = -EINVAL;
u8 mask = 0;
- pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
- "arg 0x%lx\n", sch, p, classid, parent, *arg);
+ pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
+ __func__, sch, p, classid, parent, *arg);
if (!dsmark_valid_index(p, *arg)) {
err = -ENOENT;
@@ -166,7 +166,8 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int i;
- pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+ pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
+ __func__, sch, p, walker);
if (walker->stop)
return;
@@ -199,7 +200,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int err;
- pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+ pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
switch (skb->protocol) {
@@ -275,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
struct sk_buff *skb;
u32 index;
- pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
skb = p->q->ops->dequeue(p->q);
if (skb == NULL)
@@ -303,8 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
* and don't need yet another qdisc as a bypass.
*/
if (p->mask[index] != 0xff || p->value[index])
- pr_warning("dsmark_dequeue: unsupported protocol %d\n",
- ntohs(skb->protocol));
+ pr_warn("%s: unsupported protocol %d\n",
+ __func__, ntohs(skb->protocol));
break;
}
@@ -315,7 +316,7 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
return p->q->ops->peek(p->q);
}
@@ -325,7 +326,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
unsigned int len;
- pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
if (p->q->ops->drop == NULL)
return 0;
@@ -346,7 +347,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
u16 indices;
u8 *mask;
- pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+ pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
if (!opt)
goto errout;
@@ -384,7 +385,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (p->q == NULL)
p->q = &noop_qdisc;
- pr_debug("dsmark_init: qdisc %p\n", p->q);
+ pr_debug("%s: qdisc %p\n", __func__, p->q);
err = 0;
errout:
@@ -395,7 +396,7 @@ static void dsmark_reset(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
qdisc_reset(p->q);
sch->q.qlen = 0;
}
@@ -404,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
tcf_destroy_chain(&p->filter_list);
qdisc_destroy(p->q);
@@ -417,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
struct dsmark_qdisc_data *p = qdisc_priv(sch);
struct nlattr *opts = NULL;
- pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
+ pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
if (!dsmark_valid_index(p, cl))
return -EINVAL;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a9dfdda9ed1..ba32c2b005d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -47,6 +47,7 @@
#include <linux/rbtree.h>
#include <linux/hash.h>
#include <linux/prefetch.h>
+#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -88,7 +89,7 @@ struct fq_sched_data {
struct fq_flow internal; /* for non classified or high prio packets */
u32 quantum;
u32 initial_quantum;
- u32 flow_default_rate;/* rate per flow : bytes per second */
+ u32 flow_refill_delay;
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */
struct rb_root *fq_root;
@@ -115,6 +116,7 @@ static struct fq_flow detached, throttled;
static void fq_flow_set_detached(struct fq_flow *f)
{
f->next = &detached;
+ f->age = jiffies;
}
static bool fq_flow_is_detached(const struct fq_flow *f)
@@ -209,28 +211,22 @@ static void fq_gc(struct fq_sched_data *q,
}
}
-static const u8 prio2band[TC_PRIO_MAX + 1] = {
- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
-};
-
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
{
struct rb_node **p, *parent;
struct sock *sk = skb->sk;
struct rb_root *root;
struct fq_flow *f;
- int band;
/* warning: no starvation prevention... */
- band = prio2band[skb->priority & TC_PRIO_MAX];
- if (unlikely(band == 0))
+ if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
return &q->internal;
if (unlikely(!sk)) {
/* By forcing low order bit to 1, we make sure to not
* collide with a local flow (socket pointers are word aligned)
*/
- sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
+ sk = (struct sock *)(skb_get_hash(skb) | 1L);
}
root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -255,6 +251,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ f->time_next_packet = 0ULL;
}
return f;
}
@@ -372,17 +369,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
f->qlen++;
- flow_queue_add(f, skb);
if (skb_is_retransmit(skb))
q->stat_tcp_retrans++;
sch->qstats.backlog += qdisc_pkt_len(skb);
if (fq_flow_is_detached(f)) {
fq_flow_add_tail(&q->new_flows, f);
- if (q->quantum > f->credit)
- f->credit = q->quantum;
+ if (time_after(jiffies, f->age + q->flow_refill_delay))
+ f->credit = max_t(u32, f->credit, q->quantum);
q->inactive_flows--;
qdisc_unthrottled(sch);
}
+
+ /* Note: this overwrites f->age */
+ flow_queue_add(f, skb);
+
if (unlikely(f == &q->internal)) {
q->stat_internal_packets++;
qdisc_unthrottled(sch);
@@ -460,7 +460,6 @@ begin:
fq_flow_add_tail(&q->old_flows, f);
} else {
fq_flow_set_detached(f);
- f->age = jiffies;
q->inactive_flows++;
}
goto begin;
@@ -580,28 +579,53 @@ static void fq_rehash(struct fq_sched_data *q,
q->stat_gc_flows += fcnt;
}
-static int fq_resize(struct fq_sched_data *q, u32 log)
+static void *fq_alloc_node(size_t sz, int node)
+{
+ void *ptr;
+
+ ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node);
+ if (!ptr)
+ ptr = vmalloc_node(sz, node);
+ return ptr;
+}
+
+static void fq_free(void *addr)
+{
+ kvfree(addr);
+}
+
+static int fq_resize(struct Qdisc *sch, u32 log)
{
+ struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *array;
+ void *old_fq_root;
u32 idx;
if (q->fq_root && log == q->fq_trees_log)
return 0;
- array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
+ /* If XPS was setup, we can allocate memory on right NUMA node */
+ array = fq_alloc_node(sizeof(struct rb_root) << log,
+ netdev_queue_numa_node_read(sch->dev_queue));
if (!array)
return -ENOMEM;
for (idx = 0; idx < (1U << log); idx++)
array[idx] = RB_ROOT;
- if (q->fq_root) {
- fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
- kfree(q->fq_root);
- }
+ sch_tree_lock(sch);
+
+ old_fq_root = q->fq_root;
+ if (old_fq_root)
+ fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
+
q->fq_root = array;
q->fq_trees_log = log;
+ sch_tree_unlock(sch);
+
+ fq_free(old_fq_root);
+
return 0;
}
@@ -614,6 +638,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -655,7 +680,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
- q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
+ pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
+ nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
if (tb[TCA_FQ_FLOW_MAX_RATE])
q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
@@ -669,9 +695,17 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
err = -EINVAL;
}
- if (!err)
- err = fq_resize(q, fq_log);
+ if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
+ u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
+
+ q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+ }
+ if (!err) {
+ sch_tree_unlock(sch);
+ err = fq_resize(sch, fq_log);
+ sch_tree_lock(sch);
+ }
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_dequeue(sch);
@@ -691,7 +725,7 @@ static void fq_destroy(struct Qdisc *sch)
struct fq_sched_data *q = qdisc_priv(sch);
fq_reset(sch);
- kfree(q->fq_root);
+ fq_free(q->fq_root);
qdisc_watchdog_cancel(&q->watchdog);
}
@@ -704,7 +738,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
q->flow_plimit = 100;
q->quantum = 2 * psched_mtu(qdisc_dev(sch));
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
- q->flow_default_rate = 0;
+ q->flow_refill_delay = msecs_to_jiffies(40);
q->flow_max_rate = ~0U;
q->rate_enable = 1;
q->new_flows.first = NULL;
@@ -717,7 +751,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt)
err = fq_change(sch, opt);
else
- err = fq_resize(q, q->fq_trees_log);
+ err = fq_resize(sch, q->fq_trees_log);
return err;
}
@@ -731,20 +765,20 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
- /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore,
- * do not bother giving its value
- */
+ /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
+
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
+ jiffies_to_usecs(q->flow_refill_delay)) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 55786283a3d..063b726bf1f 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -365,12 +365,7 @@ static void *fq_codel_zalloc(size_t sz)
static void fq_codel_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void fq_codel_destroy(struct Qdisc *sch)
@@ -390,7 +385,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
sch->limit = 10*1024;
q->flows_cnt = 1024;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->perturbation = net_random();
+ q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
INIT_LIST_HEAD(&q->old_flows);
codel_params_init(&q->cparams);
@@ -450,8 +445,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a74e278654a..e1543b03e39 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -310,6 +310,7 @@ void netif_carrier_on(struct net_device *dev)
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
+ atomic_inc(&dev->carrier_changes);
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
@@ -328,6 +329,7 @@ void netif_carrier_off(struct net_device *dev)
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
+ atomic_inc(&dev->carrier_changes);
linkwatch_fire_event(dev);
}
}
@@ -338,13 +340,13 @@ EXPORT_SYMBOL(netif_carrier_off);
cheaper.
*/
-static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
+static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
{
kfree_skb(skb);
return NET_XMIT_CN;
}
-static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
+static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
{
return NULL;
}
@@ -718,8 +720,8 @@ static void attach_default_qdiscs(struct net_device *dev)
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
if (qdisc) {
- qdisc->ops->attach(qdisc);
dev->qdisc = qdisc;
+ qdisc->ops->attach(qdisc);
}
}
}
@@ -829,7 +831,7 @@ void dev_deactivate_many(struct list_head *head)
struct net_device *dev;
bool sync_needed = false;
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
&noop_qdisc);
if (dev_ingress_queue(dev))
@@ -848,7 +850,7 @@ void dev_deactivate_many(struct list_head *head)
synchronize_net();
/* Wait for outstanding qdisc_run calls. */
- list_for_each_entry(dev, head, unreg_list)
+ list_for_each_entry(dev, head, close_list)
while (some_qdisc_is_busy(dev))
yield();
}
@@ -857,7 +859,7 @@ void dev_deactivate(struct net_device *dev)
{
LIST_HEAD(single);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
dev_deactivate_many(&single);
list_del(&single);
}
@@ -910,11 +912,12 @@ void dev_shutdown(struct net_device *dev)
}
void psched_ratecfg_precompute(struct psched_ratecfg *r,
- const struct tc_ratespec *conf)
+ const struct tc_ratespec *conf,
+ u64 rate64)
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
- r->rate_bytes_ps = conf->rate;
+ r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
r->mult = 1;
/*
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index d42234c0f13..12cbc09157f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -370,8 +370,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
for (i = table->DPs; i < MAX_DPs; i++) {
if (table->tab[i]) {
- pr_warning("GRED: Warning: Destroying "
- "shadowed VQ 0x%x\n", i);
+ pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
+ i);
gred_destroy_vq(table->tab[i]);
table->tab[i] = NULL;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c4075610502..ec8aeaac1dd 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1353,8 +1353,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
goto nla_put_failure;
if (hfsc_dump_curves(skb, cl) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
new file mode 100644
index 00000000000..d85b6812a7d
--- /dev/null
+++ b/net/sched/sch_hhf.c
@@ -0,0 +1,740 @@
+/* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF)
+ *
+ * Copyright (C) 2013 Terry Lam <vtlam@google.com>
+ * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
+ */
+
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <net/flow_keys.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+/* Heavy-Hitter Filter (HHF)
+ *
+ * Principles :
+ * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter
+ * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified
+ * as heavy-hitter, it is immediately switched to the heavy-hitter bucket.
+ * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler,
+ * in which the heavy-hitter bucket is served with less weight.
+ * In other words, non-heavy-hitters (e.g., short bursts of critical traffic)
+ * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have
+ * higher share of bandwidth.
+ *
+ * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the
+ * following paper:
+ * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and
+ * Accounting", in ACM SIGCOMM, 2002.
+ *
+ * Conceptually, a multi-stage filter comprises k independent hash functions
+ * and k counter arrays. Packets are indexed into k counter arrays by k hash
+ * functions, respectively. The counters are then increased by the packet sizes.
+ * Therefore,
+ * - For a heavy-hitter flow: *all* of its k array counters must be large.
+ * - For a non-heavy-hitter flow: some of its k array counters can be large
+ * due to hash collision with other small flows; however, with high
+ * probability, not *all* k counters are large.
+ *
+ * By the design of the multi-stage filter algorithm, the false negative rate
+ * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is
+ * susceptible to false positives (non-heavy-hitters mistakenly classified as
+ * heavy-hitters).
+ * Therefore, we also implement the following optimizations to reduce false
+ * positives by avoiding unnecessary increment of the counter values:
+ * - Optimization O1: once a heavy-hitter is identified, its bytes are not
+ * accounted in the array counters. This technique is called "shielding"
+ * in Section 3.3.1 of [EV02].
+ * - Optimization O2: conservative update of counters
+ * (Section 3.3.2 of [EV02]),
+ * New counter value = max {old counter value,
+ * smallest counter value + packet bytes}
+ *
+ * Finally, we refresh the counters periodically since otherwise the counter
+ * values will keep accumulating.
+ *
+ * Once a flow is classified as heavy-hitter, we also save its per-flow state
+ * in an exact-matching flow table so that its subsequent packets can be
+ * dispatched to the heavy-hitter bucket accordingly.
+ *
+ *
+ * At a high level, this qdisc works as follows:
+ * Given a packet p:
+ * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching
+ * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter
+ * bucket.
+ * - Otherwise, forward p to the multi-stage filter, denoted filter F
+ * + If F decides that p belongs to a non-heavy-hitter flow, then send p
+ * to the non-heavy-hitter bucket.
+ * + Otherwise, if F decides that p belongs to a new heavy-hitter flow,
+ * then set up a new flow entry for the flow-id of p in the table T and
+ * send p to the heavy-hitter bucket.
+ *
+ * In this implementation:
+ * - T is a fixed-size hash-table with 1024 entries. Hash collision is
+ * resolved by linked-list chaining.
+ * - F has four counter arrays, each array containing 1024 32-bit counters.
+ * That means 4 * 1024 * 32 bits = 16KB of memory.
+ * - Since each array in F contains 1024 counters, 10 bits are sufficient to
+ * index into each array.
+ * Hence, instead of having four hash functions, we chop the 32-bit
+ * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is
+ * computed as XOR sum of those three chunks.
+ * - We need to clear the counter arrays periodically; however, directly
+ * memsetting 16KB of memory can lead to cache eviction and unwanted delay.
+ * So by representing each counter by a valid bit, we only need to reset
+ * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory.
+ * - The Deficit Round Robin engine is taken from fq_codel implementation
+ * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to
+ * fq_codel_flow in fq_codel implementation.
+ *
+ */
+
+/* Non-configurable parameters */
+#define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */
+#define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */
+#define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */
+#define HHF_BIT_MASK_LEN 10 /* masking 10 bits */
+#define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */
+
+#define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */
+enum wdrr_bucket_idx {
+ WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */
+ WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */
+};
+
+#define hhf_time_before(a, b) \
+ (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
+
+/* Heavy-hitter per-flow state */
+struct hh_flow_state {
+ u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */
+ u32 hit_timestamp; /* last time heavy-hitter was seen */
+ struct list_head flowchain; /* chaining under hash collision */
+};
+
+/* Weighted Deficit Round Robin (WDRR) scheduler */
+struct wdrr_bucket {
+ struct sk_buff *head;
+ struct sk_buff *tail;
+ struct list_head bucketchain;
+ int deficit;
+};
+
+struct hhf_sched_data {
+ struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
+ u32 perturbation; /* hash perturbation */
+ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
+ u32 drop_overlimit; /* number of times max qdisc packet
+ * limit was hit
+ */
+ struct list_head *hh_flows; /* table T (currently active HHs) */
+ u32 hh_flows_limit; /* max active HH allocs */
+ u32 hh_flows_overlimit; /* num of disallowed HH allocs */
+ u32 hh_flows_total_cnt; /* total admitted HHs */
+ u32 hh_flows_current_cnt; /* total current HHs */
+ u32 *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */
+ u32 hhf_arrays_reset_timestamp; /* last time hhf_arrays
+ * was reset
+ */
+ unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits
+ * of hhf_arrays
+ */
+ /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */
+ struct list_head new_buckets; /* list of new buckets */
+ struct list_head old_buckets; /* list of old buckets */
+
+ /* Configurable HHF parameters */
+ u32 hhf_reset_timeout; /* interval to reset counter
+ * arrays in filter F
+ * (default 40ms)
+ */
+ u32 hhf_admit_bytes; /* counter thresh to classify as
+ * HH (default 128KB).
+ * With these default values,
+ * 128KB / 40ms = 25 Mbps
+ * i.e., we expect to capture HHs
+ * sending > 25 Mbps.
+ */
+ u32 hhf_evict_timeout; /* aging threshold to evict idle
+ * HHs out of table T. This should
+ * be large enough to avoid
+ * reordering during HH eviction.
+ * (default 1s)
+ */
+ u32 hhf_non_hh_weight; /* WDRR weight for non-HHs
+ * (default 2,
+ * i.e., non-HH : HH = 2 : 1)
+ */
+};
+
+static u32 hhf_time_stamp(void)
+{
+ return jiffies;
+}
+
+static unsigned int skb_hash(const struct hhf_sched_data *q,
+ const struct sk_buff *skb)
+{
+ struct flow_keys keys;
+ unsigned int hash;
+
+ if (skb->sk && skb->sk->sk_hash)
+ return skb->sk->sk_hash;
+
+ skb_flow_dissect(skb, &keys);
+ hash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src ^ keys.ip_proto,
+ (__force u32)keys.ports, q->perturbation);
+ return hash;
+}
+
+/* Looks up a heavy-hitter flow in a chaining list of table T. */
+static struct hh_flow_state *seek_list(const u32 hash,
+ struct list_head *head,
+ struct hhf_sched_data *q)
+{
+ struct hh_flow_state *flow, *next;
+ u32 now = hhf_time_stamp();
+
+ if (list_empty(head))
+ return NULL;
+
+ list_for_each_entry_safe(flow, next, head, flowchain) {
+ u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
+
+ if (hhf_time_before(prev, now)) {
+ /* Delete expired heavy-hitters, but preserve one entry
+ * to avoid kzalloc() when next time this slot is hit.
+ */
+ if (list_is_last(&flow->flowchain, head))
+ return NULL;
+ list_del(&flow->flowchain);
+ kfree(flow);
+ q->hh_flows_current_cnt--;
+ } else if (flow->hash_id == hash) {
+ return flow;
+ }
+ }
+ return NULL;
+}
+
+/* Returns a flow state entry for a new heavy-hitter. Either reuses an expired
+ * entry or dynamically alloc a new entry.
+ */
+static struct hh_flow_state *alloc_new_hh(struct list_head *head,
+ struct hhf_sched_data *q)
+{
+ struct hh_flow_state *flow;
+ u32 now = hhf_time_stamp();
+
+ if (!list_empty(head)) {
+ /* Find an expired heavy-hitter flow entry. */
+ list_for_each_entry(flow, head, flowchain) {
+ u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
+
+ if (hhf_time_before(prev, now))
+ return flow;
+ }
+ }
+
+ if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
+ q->hh_flows_overlimit++;
+ return NULL;
+ }
+ /* Create new entry. */
+ flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC);
+ if (!flow)
+ return NULL;
+
+ q->hh_flows_current_cnt++;
+ INIT_LIST_HEAD(&flow->flowchain);
+ list_add_tail(&flow->flowchain, head);
+
+ return flow;
+}
+
+/* Assigns packets to WDRR buckets. Implements a multi-stage filter to
+ * classify heavy-hitters.
+ */
+static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ u32 tmp_hash, hash;
+ u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos;
+ struct hh_flow_state *flow;
+ u32 pkt_len, min_hhf_val;
+ int i;
+ u32 prev;
+ u32 now = hhf_time_stamp();
+
+ /* Reset the HHF counter arrays if this is the right time. */
+ prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout;
+ if (hhf_time_before(prev, now)) {
+ for (i = 0; i < HHF_ARRAYS_CNT; i++)
+ bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
+ q->hhf_arrays_reset_timestamp = now;
+ }
+
+ /* Get hashed flow-id of the skb. */
+ hash = skb_hash(q, skb);
+
+ /* Check if this packet belongs to an already established HH flow. */
+ flow_pos = hash & HHF_BIT_MASK;
+ flow = seek_list(hash, &q->hh_flows[flow_pos], q);
+ if (flow) { /* found its HH flow */
+ flow->hit_timestamp = now;
+ return WDRR_BUCKET_FOR_HH;
+ }
+
+ /* Now pass the packet through the multi-stage filter. */
+ tmp_hash = hash;
+ xorsum = 0;
+ for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) {
+ /* Split the skb_hash into three 10-bit chunks. */
+ filter_pos[i] = tmp_hash & HHF_BIT_MASK;
+ xorsum ^= filter_pos[i];
+ tmp_hash >>= HHF_BIT_MASK_LEN;
+ }
+ /* The last chunk is computed as XOR sum of other chunks. */
+ filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash;
+
+ pkt_len = qdisc_pkt_len(skb);
+ min_hhf_val = ~0U;
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ u32 val;
+
+ if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) {
+ q->hhf_arrays[i][filter_pos[i]] = 0;
+ __set_bit(filter_pos[i], q->hhf_valid_bits[i]);
+ }
+
+ val = q->hhf_arrays[i][filter_pos[i]] + pkt_len;
+ if (min_hhf_val > val)
+ min_hhf_val = val;
+ }
+
+ /* Found a new HH iff all counter values > HH admit threshold. */
+ if (min_hhf_val > q->hhf_admit_bytes) {
+ /* Just captured a new heavy-hitter. */
+ flow = alloc_new_hh(&q->hh_flows[flow_pos], q);
+ if (!flow) /* memory alloc problem */
+ return WDRR_BUCKET_FOR_NON_HH;
+ flow->hash_id = hash;
+ flow->hit_timestamp = now;
+ q->hh_flows_total_cnt++;
+
+ /* By returning without updating counters in q->hhf_arrays,
+ * we implicitly implement "shielding" (see Optimization O1).
+ */
+ return WDRR_BUCKET_FOR_HH;
+ }
+
+ /* Conservative update of HHF arrays (see Optimization O2). */
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val)
+ q->hhf_arrays[i][filter_pos[i]] = min_hhf_val;
+ }
+ return WDRR_BUCKET_FOR_NON_HH;
+}
+
+/* Removes one skb from head of bucket. */
+static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
+{
+ struct sk_buff *skb = bucket->head;
+
+ bucket->head = skb->next;
+ skb->next = NULL;
+ return skb;
+}
+
+/* Tail-adds skb to bucket. */
+static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
+{
+ if (bucket->head == NULL)
+ bucket->head = skb;
+ else
+ bucket->tail->next = skb;
+ bucket->tail = skb;
+ skb->next = NULL;
+}
+
+static unsigned int hhf_drop(struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct wdrr_bucket *bucket;
+
+ /* Always try to drop from heavy-hitters first. */
+ bucket = &q->buckets[WDRR_BUCKET_FOR_HH];
+ if (!bucket->head)
+ bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH];
+
+ if (bucket->head) {
+ struct sk_buff *skb = dequeue_head(bucket);
+
+ sch->q.qlen--;
+ sch->qstats.drops++;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ kfree_skb(skb);
+ }
+
+ /* Return id of the bucket from which the packet was dropped. */
+ return bucket - q->buckets;
+}
+
+static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ enum wdrr_bucket_idx idx;
+ struct wdrr_bucket *bucket;
+
+ idx = hhf_classify(skb, sch);
+
+ bucket = &q->buckets[idx];
+ bucket_add(bucket, skb);
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ if (list_empty(&bucket->bucketchain)) {
+ unsigned int weight;
+
+ /* The logic of new_buckets vs. old_buckets is the same as
+ * new_flows vs. old_flows in the implementation of fq_codel,
+ * i.e., short bursts of non-HHs should have strict priority.
+ */
+ if (idx == WDRR_BUCKET_FOR_HH) {
+ /* Always move heavy-hitters to old bucket. */
+ weight = 1;
+ list_add_tail(&bucket->bucketchain, &q->old_buckets);
+ } else {
+ weight = q->hhf_non_hh_weight;
+ list_add_tail(&bucket->bucketchain, &q->new_buckets);
+ }
+ bucket->deficit = weight * q->quantum;
+ }
+ if (++sch->q.qlen <= sch->limit)
+ return NET_XMIT_SUCCESS;
+
+ q->drop_overlimit++;
+ /* Return Congestion Notification only if we dropped a packet from this
+ * bucket.
+ */
+ if (hhf_drop(sch) == idx)
+ return NET_XMIT_CN;
+
+ /* As we dropped a packet, better let upper stack know this. */
+ qdisc_tree_decrease_qlen(sch, 1);
+ return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb = NULL;
+ struct wdrr_bucket *bucket;
+ struct list_head *head;
+
+begin:
+ head = &q->new_buckets;
+ if (list_empty(head)) {
+ head = &q->old_buckets;
+ if (list_empty(head))
+ return NULL;
+ }
+ bucket = list_first_entry(head, struct wdrr_bucket, bucketchain);
+
+ if (bucket->deficit <= 0) {
+ int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ?
+ 1 : q->hhf_non_hh_weight;
+
+ bucket->deficit += weight * q->quantum;
+ list_move_tail(&bucket->bucketchain, &q->old_buckets);
+ goto begin;
+ }
+
+ if (bucket->head) {
+ skb = dequeue_head(bucket);
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ }
+
+ if (!skb) {
+ /* Force a pass through old_buckets to prevent starvation. */
+ if ((head == &q->new_buckets) && !list_empty(&q->old_buckets))
+ list_move_tail(&bucket->bucketchain, &q->old_buckets);
+ else
+ list_del_init(&bucket->bucketchain);
+ goto begin;
+ }
+ qdisc_bstats_update(sch, skb);
+ bucket->deficit -= qdisc_pkt_len(skb);
+
+ return skb;
+}
+
+static void hhf_reset(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = hhf_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static void *hhf_zalloc(size_t sz)
+{
+ void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
+
+ if (!ptr)
+ ptr = vzalloc(sz);
+
+ return ptr;
+}
+
+static void hhf_free(void *addr)
+{
+ kvfree(addr);
+}
+
+static void hhf_destroy(struct Qdisc *sch)
+{
+ int i;
+ struct hhf_sched_data *q = qdisc_priv(sch);
+
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ hhf_free(q->hhf_arrays[i]);
+ hhf_free(q->hhf_valid_bits[i]);
+ }
+
+ for (i = 0; i < HH_FLOWS_CNT; i++) {
+ struct hh_flow_state *flow, *next;
+ struct list_head *head = &q->hh_flows[i];
+
+ if (list_empty(head))
+ continue;
+ list_for_each_entry_safe(flow, next, head, flowchain) {
+ list_del(&flow->flowchain);
+ kfree(flow);
+ }
+ }
+ hhf_free(q->hh_flows);
+}
+
+static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
+ [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 },
+ [TCA_HHF_QUANTUM] = { .type = NLA_U32 },
+ [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 },
+ [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 },
+ [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 },
+ [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 },
+ [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
+};
+
+static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_HHF_MAX + 1];
+ unsigned int qlen;
+ int err;
+ u64 non_hh_quantum;
+ u32 new_quantum = q->quantum;
+ u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_HHF_QUANTUM])
+ new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
+
+ if (tb[TCA_HHF_NON_HH_WEIGHT])
+ new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
+
+ non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
+ if (non_hh_quantum > INT_MAX)
+ return -EINVAL;
+
+ sch_tree_lock(sch);
+
+ if (tb[TCA_HHF_BACKLOG_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+
+ q->quantum = new_quantum;
+ q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+
+ if (tb[TCA_HHF_HH_FLOWS_LIMIT])
+ q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+
+ if (tb[TCA_HHF_RESET_TIMEOUT]) {
+ u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
+
+ q->hhf_reset_timeout = usecs_to_jiffies(us);
+ }
+
+ if (tb[TCA_HHF_ADMIT_BYTES])
+ q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+
+ if (tb[TCA_HHF_EVICT_TIMEOUT]) {
+ u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
+
+ q->hhf_evict_timeout = usecs_to_jiffies(us);
+ }
+
+ qlen = sch->q.qlen;
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = hhf_dequeue(sch);
+
+ kfree_skb(skb);
+ }
+ qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ int i;
+
+ sch->limit = 1000;
+ q->quantum = psched_mtu(qdisc_dev(sch));
+ q->perturbation = prandom_u32();
+ INIT_LIST_HEAD(&q->new_buckets);
+ INIT_LIST_HEAD(&q->old_buckets);
+
+ /* Configurable HHF parameters */
+ q->hhf_reset_timeout = HZ / 25; /* 40 ms */
+ q->hhf_admit_bytes = 131072; /* 128 KB */
+ q->hhf_evict_timeout = HZ; /* 1 sec */
+ q->hhf_non_hh_weight = 2;
+
+ if (opt) {
+ int err = hhf_change(sch, opt);
+
+ if (err)
+ return err;
+ }
+
+ if (!q->hh_flows) {
+ /* Initialize heavy-hitter flow table. */
+ q->hh_flows = hhf_zalloc(HH_FLOWS_CNT *
+ sizeof(struct list_head));
+ if (!q->hh_flows)
+ return -ENOMEM;
+ for (i = 0; i < HH_FLOWS_CNT; i++)
+ INIT_LIST_HEAD(&q->hh_flows[i]);
+
+ /* Cap max active HHs at twice len of hh_flows table. */
+ q->hh_flows_limit = 2 * HH_FLOWS_CNT;
+ q->hh_flows_overlimit = 0;
+ q->hh_flows_total_cnt = 0;
+ q->hh_flows_current_cnt = 0;
+
+ /* Initialize heavy-hitter filter arrays. */
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
+ sizeof(u32));
+ if (!q->hhf_arrays[i]) {
+ hhf_destroy(sch);
+ return -ENOMEM;
+ }
+ }
+ q->hhf_arrays_reset_timestamp = hhf_time_stamp();
+
+ /* Initialize valid bits of heavy-hitter filter arrays. */
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
+ BITS_PER_BYTE);
+ if (!q->hhf_valid_bits[i]) {
+ hhf_destroy(sch);
+ return -ENOMEM;
+ }
+ }
+
+ /* Initialize Weighted DRR buckets. */
+ for (i = 0; i < WDRR_BUCKET_CNT; i++) {
+ struct wdrr_bucket *bucket = q->buckets + i;
+
+ INIT_LIST_HEAD(&bucket->bucketchain);
+ }
+ }
+
+ return 0;
+}
+
+static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
+ nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+ nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
+ jiffies_to_usecs(q->hhf_reset_timeout)) ||
+ nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+ nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
+ jiffies_to_usecs(q->hhf_evict_timeout)) ||
+ nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, opts);
+
+nla_put_failure:
+ return -1;
+}
+
+static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct tc_hhf_xstats st = {
+ .drop_overlimit = q->drop_overlimit,
+ .hh_overlimit = q->hh_flows_overlimit,
+ .hh_tot_count = q->hh_flows_total_cnt,
+ .hh_cur_count = q->hh_flows_current_cnt,
+ };
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
+ .id = "hhf",
+ .priv_size = sizeof(struct hhf_sched_data),
+
+ .enqueue = hhf_enqueue,
+ .dequeue = hhf_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .drop = hhf_drop,
+ .init = hhf_init,
+ .reset = hhf_reset,
+ .destroy = hhf_destroy,
+ .change = hhf_change,
+ .dump = hhf_dump,
+ .dump_stats = hhf_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init hhf_module_init(void)
+{
+ return register_qdisc(&hhf_qdisc_ops);
+}
+
+static void __exit hhf_module_exit(void)
+{
+ unregister_qdisc(&hhf_qdisc_ops);
+}
+
+module_init(hhf_module_init)
+module_exit(hhf_module_exit)
+MODULE_AUTHOR("Terry Lam");
+MODULE_AUTHOR("Nandita Dukkipati");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 863846cc551..9f949abcace 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -219,11 +219,16 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
if (skb->priority == sch->handle)
return HTB_DIRECT; /* X:0 (direct flow) selected */
cl = htb_find(skb->priority, sch);
- if (cl && cl->level == 0)
- return cl;
+ if (cl) {
+ if (cl->level == 0)
+ return cl;
+ /* Start with inner filter chain if a non-leaf class is selected */
+ tcf = cl->filter_list;
+ } else {
+ tcf = q->filter_list;
+ }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- tcf = q->filter_list;
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -712,7 +717,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level,
/* too much load - let's continue after a break for scheduling */
if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
- pr_warning("htb: too many events!\n");
+ pr_warn("htb: too many events!\n");
q->warned |= HTB_WARN_TOOMANYEVENTS;
}
@@ -997,6 +1002,8 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
[TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
+ [TCA_HTB_RATE64] = { .type = NLA_U64 },
+ [TCA_HTB_CEIL64] = { .type = NLA_U64 },
};
static void htb_work_func(struct work_struct *work)
@@ -1055,12 +1062,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_glob gopt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the qdisc parameters.
+ */
gopt.direct_pkts = q->direct_pkts;
gopt.version = HTB_VER;
@@ -1074,13 +1082,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1089,11 +1094,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the class parameters.
+ */
tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
if (!cl->level && cl->un.leaf.q)
@@ -1114,13 +1120,16 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.level = cl->level;
if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps))
+ goto nla_put_failure;
+ if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
+ goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1268,9 +1277,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
struct Qdisc *new_q = NULL;
int last_child = 0;
- // TODO: why don't allow to delete subtree ? references ? does
- // tc subsys quarantee us that in htb_destroy it holds no class
- // refs so that we can remove children safely there ?
+ /* TODO: why don't allow to delete subtree ? references ? does
+ * tc subsys guarantee us that in htb_destroy it holds no class
+ * refs so that we can remove children safely there ?
+ */
if (cl->children || cl->filter_cnt)
return -EBUSY;
@@ -1329,9 +1339,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)*arg, *parent;
struct nlattr *opt = tca[TCA_OPTIONS];
- struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
+ u64 rate64, ceil64;
/* extract all subattrs from opt attr */
if (!opt)
@@ -1352,16 +1362,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
goto failure;
/* Keeping backward compatible with rate_table based iproute2 tc */
- if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
- rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
- if (rtab)
- qdisc_put_rtab(rtab);
- }
- if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
- ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
- if (ctab)
- qdisc_put_rtab(ctab);
- }
+ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]));
+
+ if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]));
if (!cl) { /* new class */
struct Qdisc *new_q;
@@ -1468,21 +1473,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
}
+ rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+
+ ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
+ psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
+ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
+
/* it used to be a nasty bug here, we have to check that node
* is really leaf before changing cl->un.leaf !
*/
if (!cl->level) {
- cl->quantum = hopt->rate.rate / q->rate2quantum;
+ u64 quantum = cl->rate.rate_bytes_ps;
+
+ do_div(quantum, q->rate2quantum);
+ cl->quantum = min_t(u64, quantum, INT_MAX);
+
if (!hopt->quantum && cl->quantum < 1000) {
- pr_warning(
- "HTB: quantum of class %X is small. Consider r2q change.\n",
- cl->common.classid);
+ pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
+ cl->common.classid);
cl->quantum = 1000;
}
if (!hopt->quantum && cl->quantum > 200000) {
- pr_warning(
- "HTB: quantum of class %X is big. Consider r2q change.\n",
- cl->common.classid);
+ pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
+ cl->common.classid);
cl->quantum = 200000;
}
if (hopt->quantum)
@@ -1491,9 +1505,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->prio = TC_HTB_NUMPRIO - 1;
}
- psched_ratecfg_precompute(&cl->rate, &hopt->rate);
- psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
-
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index bce1665239b..62871c14e1f 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -100,8 +100,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 2e56185736d..a8b2864a696 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -78,14 +78,19 @@ static void mq_attach(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
- struct Qdisc *qdisc;
+ struct Qdisc *qdisc, *old;
unsigned int ntx;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = priv->qdiscs[ntx];
- qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
- if (qdisc)
- qdisc_destroy(qdisc);
+ old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ if (old)
+ qdisc_destroy(old);
+#ifdef CONFIG_NET_SCHED
+ if (ntx < dev->real_num_tx_queues)
+ qdisc_list_add(qdisc);
+#endif
+
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index d44c868cb53..6749e2f540d 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -167,15 +167,17 @@ static void mqprio_attach(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- struct Qdisc *qdisc;
+ struct Qdisc *qdisc, *old;
unsigned int ntx;
/* Attach underlying qdisc */
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = priv->qdiscs[ntx];
- qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
- if (qdisc)
- qdisc_destroy(qdisc);
+ old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ if (old)
+ qdisc_destroy(old);
+ if (ntx < dev->real_num_tx_queues)
+ qdisc_list_add(qdisc);
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 2a2b096d9a6..afb050a735f 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -11,8 +11,7 @@
* more details.
*
* You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Author: Alexander Duyck <alexander.h.duyck@intel.com>
*/
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a6d788d4521..111d70fddae 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -88,10 +88,10 @@ struct netem_sched_data {
u32 duplicate;
u32 reorder;
u32 corrupt;
- u32 rate;
+ u64 rate;
s32 packet_overhead;
u32 cell_size;
- u32 cell_size_reciprocal;
+ struct reciprocal_value cell_size_reciprocal;
s32 cell_overhead;
struct crndstate {
@@ -110,6 +110,18 @@ struct netem_sched_data {
CLG_GILB_ELL,
} loss_model;
+ enum {
+ TX_IN_GAP_PERIOD = 1,
+ TX_IN_BURST_PERIOD,
+ LOST_IN_GAP_PERIOD,
+ LOST_IN_BURST_PERIOD,
+ } _4_state_model;
+
+ enum {
+ GOOD_STATE = 1,
+ BAD_STATE,
+ } GE_state_model;
+
/* Correlated Loss Generation models */
struct clgstate {
/* state of the Markov chain */
@@ -169,7 +181,7 @@ static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
static void init_crandom(struct crndstate *state, unsigned long rho)
{
state->rho = rho;
- state->last = net_random();
+ state->last = prandom_u32();
}
/* get_crandom - correlated random number generator
@@ -182,9 +194,9 @@ static u32 get_crandom(struct crndstate *state)
unsigned long answer;
if (state->rho == 0) /* no correlation */
- return net_random();
+ return prandom_u32();
- value = net_random();
+ value = prandom_u32();
rho = (u64)state->rho + 1;
answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
state->last = answer;
@@ -198,51 +210,52 @@ static u32 get_crandom(struct crndstate *state)
static bool loss_4state(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
- u32 rnd = net_random();
+ u32 rnd = prandom_u32();
/*
* Makes a comparison between rnd and the transition
* probabilities outgoing from the current state, then decides the
* next state and if the next packet has to be transmitted or lost.
* The four states correspond to:
- * 1 => successfully transmitted packets within a gap period
- * 4 => isolated losses within a gap period
- * 3 => lost packets within a burst period
- * 2 => successfully transmitted packets within a burst period
+ * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
+ * LOST_IN_BURST_PERIOD => isolated losses within a gap period
+ * LOST_IN_GAP_PERIOD => lost packets within a burst period
+ * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
*/
switch (clg->state) {
- case 1:
+ case TX_IN_GAP_PERIOD:
if (rnd < clg->a4) {
- clg->state = 4;
+ clg->state = LOST_IN_BURST_PERIOD;
return true;
- } else if (clg->a4 < rnd && rnd < clg->a1) {
- clg->state = 3;
+ } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
+ clg->state = LOST_IN_GAP_PERIOD;
return true;
- } else if (clg->a1 < rnd)
- clg->state = 1;
+ } else if (clg->a1 + clg->a4 < rnd) {
+ clg->state = TX_IN_GAP_PERIOD;
+ }
break;
- case 2:
+ case TX_IN_BURST_PERIOD:
if (rnd < clg->a5) {
- clg->state = 3;
+ clg->state = LOST_IN_GAP_PERIOD;
return true;
- } else
- clg->state = 2;
+ } else {
+ clg->state = TX_IN_BURST_PERIOD;
+ }
break;
- case 3:
+ case LOST_IN_GAP_PERIOD:
if (rnd < clg->a3)
- clg->state = 2;
+ clg->state = TX_IN_BURST_PERIOD;
else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
- clg->state = 1;
- return true;
+ clg->state = TX_IN_GAP_PERIOD;
} else if (clg->a2 + clg->a3 < rnd) {
- clg->state = 3;
+ clg->state = LOST_IN_GAP_PERIOD;
return true;
}
break;
- case 4:
- clg->state = 1;
+ case LOST_IN_BURST_PERIOD:
+ clg->state = TX_IN_GAP_PERIOD;
break;
}
@@ -264,15 +277,16 @@ static bool loss_gilb_ell(struct netem_sched_data *q)
struct clgstate *clg = &q->clg;
switch (clg->state) {
- case 1:
- if (net_random() < clg->a1)
- clg->state = 2;
- if (net_random() < clg->a4)
+ case GOOD_STATE:
+ if (prandom_u32() < clg->a1)
+ clg->state = BAD_STATE;
+ if (prandom_u32() < clg->a4)
return true;
- case 2:
- if (net_random() < clg->a2)
- clg->state = 1;
- if (clg->a3 > net_random())
+ break;
+ case BAD_STATE:
+ if (prandom_u32() < clg->a2)
+ clg->state = GOOD_STATE;
+ if (prandom_u32() > clg->a3)
return true;
}
@@ -358,6 +372,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
return PSCHED_NS2TICKS(ticks);
}
+static void tfifo_reset(struct Qdisc *sch)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ struct rb_node *p;
+
+ while ((p = rb_first(&q->t_root))) {
+ struct sk_buff *skb = netem_rb_to_skb(p);
+
+ rb_erase(p, &q->t_root);
+ skb->next = NULL;
+ skb->prev = NULL;
+ kfree_skb(skb);
+ }
+}
+
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -442,7 +471,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb_checksum_help(skb)))
return qdisc_drop(skb, sch);
- skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
+ skb->data[prandom_u32() % skb_headlen(skb)] ^=
+ 1<<(prandom_u32() % 8);
}
if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
@@ -480,7 +510,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
now = netem_skb_cb(last)->time_to_send;
}
- delay += packet_len_2_sched_time(skb->len, q);
+ delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
}
cb->time_to_send = now + delay;
@@ -520,6 +550,7 @@ static unsigned int netem_drop(struct Qdisc *sch)
skb->next = NULL;
skb->prev = NULL;
len = qdisc_pkt_len(skb);
+ sch->qstats.backlog -= len;
kfree_skb(skb);
}
}
@@ -609,6 +640,7 @@ static void netem_reset(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset_queue(sch);
+ tfifo_reset(sch);
if (q->qdisc)
qdisc_reset(q->qdisc);
qdisc_watchdog_cancel(&q->watchdog);
@@ -616,12 +648,7 @@ static void netem_reset(struct Qdisc *sch)
static void dist_free(struct disttable *d)
{
- if (d) {
- if (is_vmalloc_addr(d))
- vfree(d);
- else
- kfree(d);
- }
+ kvfree(d);
}
/*
@@ -662,9 +689,8 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
-static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corr *c = nla_data(attr);
init_crandom(&q->delay_cor, c->delay_corr);
@@ -672,47 +698,45 @@ static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->dup_cor, c->dup_corr);
}
-static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_reorder *r = nla_data(attr);
q->reorder = r->probability;
init_crandom(&q->reorder_cor, r->correlation);
}
-static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corrupt *r = nla_data(attr);
q->corrupt = r->probability;
init_crandom(&q->corrupt_cor, r->correlation);
}
-static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_rate *r = nla_data(attr);
q->rate = r->rate;
q->packet_overhead = r->packet_overhead;
q->cell_size = r->cell_size;
+ q->cell_overhead = r->cell_overhead;
if (q->cell_size)
q->cell_size_reciprocal = reciprocal_value(q->cell_size);
- q->cell_overhead = r->cell_overhead;
+ else
+ q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
}
-static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
+static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct nlattr *la;
int rem;
nla_for_each_nested(la, attr, rem) {
u16 type = nla_type(la);
- switch(type) {
+ switch (type) {
case NETEM_LOSS_GI: {
const struct tc_netem_gimodel *gi = nla_data(la);
@@ -723,7 +747,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
q->loss_model = CLG_4_STATES;
- q->clg.state = 1;
+ q->clg.state = TX_IN_GAP_PERIOD;
q->clg.a1 = gi->p13;
q->clg.a2 = gi->p31;
q->clg.a3 = gi->p32;
@@ -741,7 +765,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
}
q->loss_model = CLG_GILB_ELL;
- q->clg.state = 1;
+ q->clg.state = GOOD_STATE;
q->clg.a1 = ge->p;
q->clg.a2 = ge->r;
q->clg.a3 = ge->h;
@@ -765,6 +789,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
[TCA_NETEM_ECN] = { .type = NLA_U32 },
+ [TCA_NETEM_RATE64] = { .type = NLA_U64 },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -791,6 +816,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
struct netem_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_NETEM_MAX + 1];
struct tc_netem_qopt *qopt;
+ struct clgstate old_clg;
+ int old_loss_model = CLG_RANDOM;
int ret;
if (opt == NULL)
@@ -801,6 +828,33 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (ret < 0)
return ret;
+ /* backup q->clg and q->loss_model */
+ old_clg = q->clg;
+ old_loss_model = q->loss_model;
+
+ if (tb[TCA_NETEM_LOSS]) {
+ ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
+ if (ret) {
+ q->loss_model = old_loss_model;
+ return ret;
+ }
+ } else {
+ q->loss_model = CLG_RANDOM;
+ }
+
+ if (tb[TCA_NETEM_DELAY_DIST]) {
+ ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
+ if (ret) {
+ /* recover clg and loss_model, in case of
+ * q->clg and q->loss_model were modified
+ * in get_loss_clg()
+ */
+ q->clg = old_clg;
+ q->loss_model = old_loss_model;
+ return ret;
+ }
+ }
+
sch->limit = qopt->limit;
q->latency = qopt->latency;
@@ -818,30 +872,24 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->reorder = ~0;
if (tb[TCA_NETEM_CORR])
- get_correlation(sch, tb[TCA_NETEM_CORR]);
-
- if (tb[TCA_NETEM_DELAY_DIST]) {
- ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
- if (ret)
- return ret;
- }
+ get_correlation(q, tb[TCA_NETEM_CORR]);
if (tb[TCA_NETEM_REORDER])
- get_reorder(sch, tb[TCA_NETEM_REORDER]);
+ get_reorder(q, tb[TCA_NETEM_REORDER]);
if (tb[TCA_NETEM_CORRUPT])
- get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+ get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
if (tb[TCA_NETEM_RATE])
- get_rate(sch, tb[TCA_NETEM_RATE]);
+ get_rate(q, tb[TCA_NETEM_RATE]);
+
+ if (tb[TCA_NETEM_RATE64])
+ q->rate = max_t(u64, q->rate,
+ nla_get_u64(tb[TCA_NETEM_RATE64]));
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
- q->loss_model = CLG_RANDOM;
- if (tb[TCA_NETEM_LOSS])
- ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
-
return ret;
}
@@ -957,7 +1005,13 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
goto nla_put_failure;
- rate.rate = q->rate;
+ if (q->rate >= (1ULL << 32)) {
+ if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate))
+ goto nla_put_failure;
+ rate.rate = ~0U;
+ } else {
+ rate.rate = q->rate;
+ }
rate.packet_overhead = q->packet_overhead;
rate.cell_size = q->cell_size;
rate.cell_overhead = q->cell_overhead;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
new file mode 100644
index 00000000000..fefeeb73f15
--- /dev/null
+++ b/net/sched/sch_pie.c
@@ -0,0 +1,566 @@
+/* Copyright (C) 2013 Cisco Systems, Inc, 2013.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Vijay Subramanian <vijaynsu@cisco.com>
+ * Author: Mythili Prabhu <mysuryan@cisco.com>
+ *
+ * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no>
+ * University of Oslo, Norway.
+ *
+ * References:
+ * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
+ * IEEE Conference on High Performance Switching and Routing 2013 :
+ * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+
+#define QUEUE_THRESHOLD 10000
+#define DQCOUNT_INVALID -1
+#define MAX_PROB 0xffffffff
+#define PIE_SCALE 8
+
+/* parameters used */
+struct pie_params {
+ psched_time_t target; /* user specified target delay in pschedtime */
+ u32 tupdate; /* timer frequency (in jiffies) */
+ u32 limit; /* number of packets that can be enqueued */
+ u32 alpha; /* alpha and beta are between 0 and 32 */
+ u32 beta; /* and are used for shift relative to 1 */
+ bool ecn; /* true if ecn is enabled */
+ bool bytemode; /* to scale drop early prob based on pkt size */
+};
+
+/* variables used */
+struct pie_vars {
+ u32 prob; /* probability but scaled by u32 limit. */
+ psched_time_t burst_time;
+ psched_time_t qdelay;
+ psched_time_t qdelay_old;
+ u64 dq_count; /* measured in bytes */
+ psched_time_t dq_tstamp; /* drain rate */
+ u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
+ u32 qlen_old; /* in bytes */
+};
+
+/* statistics gathering */
+struct pie_stats {
+ u32 packets_in; /* total number of packets enqueued */
+ u32 dropped; /* packets dropped due to pie_action */
+ u32 overlimit; /* dropped due to lack of space in queue */
+ u32 maxq; /* maximum queue size */
+ u32 ecn_mark; /* packets marked with ECN */
+};
+
+/* private data for the Qdisc */
+struct pie_sched_data {
+ struct pie_params params;
+ struct pie_vars vars;
+ struct pie_stats stats;
+ struct timer_list adapt_timer;
+};
+
+static void pie_params_init(struct pie_params *params)
+{
+ params->alpha = 2;
+ params->beta = 20;
+ params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */
+ params->limit = 1000; /* default of 1000 packets */
+ params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */
+ params->ecn = false;
+ params->bytemode = false;
+}
+
+static void pie_vars_init(struct pie_vars *vars)
+{
+ vars->dq_count = DQCOUNT_INVALID;
+ vars->avg_dq_rate = 0;
+ /* default of 100 ms in pschedtime */
+ vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+}
+
+static bool drop_early(struct Qdisc *sch, u32 packet_size)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ u32 rnd;
+ u32 local_prob = q->vars.prob;
+ u32 mtu = psched_mtu(qdisc_dev(sch));
+
+ /* If there is still burst allowance left skip random early drop */
+ if (q->vars.burst_time > 0)
+ return false;
+
+ /* If current delay is less than half of target, and
+ * if drop prob is low already, disable early_drop
+ */
+ if ((q->vars.qdelay < q->params.target / 2)
+ && (q->vars.prob < MAX_PROB / 5))
+ return false;
+
+ /* If we have fewer than 2 mtu-sized packets, disable drop_early,
+ * similar to min_th in RED
+ */
+ if (sch->qstats.backlog < 2 * mtu)
+ return false;
+
+ /* If bytemode is turned on, use packet size to compute new
+ * probablity. Smaller packets will have lower drop prob in this case
+ */
+ if (q->params.bytemode && packet_size <= mtu)
+ local_prob = (local_prob / mtu) * packet_size;
+ else
+ local_prob = q->vars.prob;
+
+ rnd = prandom_u32();
+ if (rnd < local_prob)
+ return true;
+
+ return false;
+}
+
+static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ bool enqueue = false;
+
+ if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
+ q->stats.overlimit++;
+ goto out;
+ }
+
+ if (!drop_early(sch, skb->len)) {
+ enqueue = true;
+ } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) &&
+ INET_ECN_set_ce(skb)) {
+ /* If packet is ecn capable, mark it if drop probability
+ * is lower than 10%, else drop it.
+ */
+ q->stats.ecn_mark++;
+ enqueue = true;
+ }
+
+ /* we can enqueue the packet */
+ if (enqueue) {
+ q->stats.packets_in++;
+ if (qdisc_qlen(sch) > q->stats.maxq)
+ q->stats.maxq = qdisc_qlen(sch);
+
+ return qdisc_enqueue_tail(skb, sch);
+ }
+
+out:
+ q->stats.dropped++;
+ return qdisc_drop(skb, sch);
+}
+
+static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
+ [TCA_PIE_TARGET] = {.type = NLA_U32},
+ [TCA_PIE_LIMIT] = {.type = NLA_U32},
+ [TCA_PIE_TUPDATE] = {.type = NLA_U32},
+ [TCA_PIE_ALPHA] = {.type = NLA_U32},
+ [TCA_PIE_BETA] = {.type = NLA_U32},
+ [TCA_PIE_ECN] = {.type = NLA_U32},
+ [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
+};
+
+static int pie_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_PIE_MAX + 1];
+ unsigned int qlen;
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy);
+ if (err < 0)
+ return err;
+
+ sch_tree_lock(sch);
+
+ /* convert from microseconds to pschedtime */
+ if (tb[TCA_PIE_TARGET]) {
+ /* target is in us */
+ u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
+
+ /* convert to pschedtime */
+ q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+ }
+
+ /* tupdate is in jiffies */
+ if (tb[TCA_PIE_TUPDATE])
+ q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+
+ if (tb[TCA_PIE_LIMIT]) {
+ u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
+
+ q->params.limit = limit;
+ sch->limit = limit;
+ }
+
+ if (tb[TCA_PIE_ALPHA])
+ q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
+
+ if (tb[TCA_PIE_BETA])
+ q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
+
+ if (tb[TCA_PIE_ECN])
+ q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
+
+ if (tb[TCA_PIE_BYTEMODE])
+ q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
+
+ /* Drop excess packets if new limit is lower */
+ qlen = sch->q.qlen;
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = __skb_dequeue(&sch->q);
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_drop(skb, sch);
+ }
+ qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
+{
+
+ struct pie_sched_data *q = qdisc_priv(sch);
+ int qlen = sch->qstats.backlog; /* current queue size in bytes */
+
+ /* If current queue is about 10 packets or more and dq_count is unset
+ * we have enough packets to calculate the drain rate. Save
+ * current time as dq_tstamp and start measurement cycle.
+ */
+ if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) {
+ q->vars.dq_tstamp = psched_get_time();
+ q->vars.dq_count = 0;
+ }
+
+ /* Calculate the average drain rate from this value. If queue length
+ * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset
+ * the dq_count to -1 as we don't have enough packets to calculate the
+ * drain rate anymore The following if block is entered only when we
+ * have a substantial queue built up (QUEUE_THRESHOLD bytes or more)
+ * and we calculate the drain rate for the threshold here. dq_count is
+ * in bytes, time difference in psched_time, hence rate is in
+ * bytes/psched_time.
+ */
+ if (q->vars.dq_count != DQCOUNT_INVALID) {
+ q->vars.dq_count += skb->len;
+
+ if (q->vars.dq_count >= QUEUE_THRESHOLD) {
+ psched_time_t now = psched_get_time();
+ u32 dtime = now - q->vars.dq_tstamp;
+ u32 count = q->vars.dq_count << PIE_SCALE;
+
+ if (dtime == 0)
+ return;
+
+ count = count / dtime;
+
+ if (q->vars.avg_dq_rate == 0)
+ q->vars.avg_dq_rate = count;
+ else
+ q->vars.avg_dq_rate =
+ (q->vars.avg_dq_rate -
+ (q->vars.avg_dq_rate >> 3)) + (count >> 3);
+
+ /* If the queue has receded below the threshold, we hold
+ * on to the last drain rate calculated, else we reset
+ * dq_count to 0 to re-enter the if block when the next
+ * packet is dequeued
+ */
+ if (qlen < QUEUE_THRESHOLD)
+ q->vars.dq_count = DQCOUNT_INVALID;
+ else {
+ q->vars.dq_count = 0;
+ q->vars.dq_tstamp = psched_get_time();
+ }
+
+ if (q->vars.burst_time > 0) {
+ if (q->vars.burst_time > dtime)
+ q->vars.burst_time -= dtime;
+ else
+ q->vars.burst_time = 0;
+ }
+ }
+ }
+}
+
+static void calculate_probability(struct Qdisc *sch)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ u32 qlen = sch->qstats.backlog; /* queue size in bytes */
+ psched_time_t qdelay = 0; /* in pschedtime */
+ psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
+ s32 delta = 0; /* determines the change in probability */
+ u32 oldprob;
+ u32 alpha, beta;
+ bool update_prob = true;
+
+ q->vars.qdelay_old = q->vars.qdelay;
+
+ if (q->vars.avg_dq_rate > 0)
+ qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
+ else
+ qdelay = 0;
+
+ /* If qdelay is zero and qlen is not, it means qlen is very small, less
+ * than dequeue_rate, so we do not update probabilty in this round
+ */
+ if (qdelay == 0 && qlen != 0)
+ update_prob = false;
+
+ /* In the algorithm, alpha and beta are between 0 and 2 with typical
+ * value for alpha as 0.125. In this implementation, we use values 0-32
+ * passed from user space to represent this. Also, alpha and beta have
+ * unit of HZ and need to be scaled before they can used to update
+ * probability. alpha/beta are updated locally below by 1) scaling them
+ * appropriately 2) scaling down by 16 to come to 0-2 range.
+ * Please see paper for details.
+ *
+ * We scale alpha and beta differently depending on whether we are in
+ * light, medium or high dropping mode.
+ */
+ if (q->vars.prob < MAX_PROB / 100) {
+ alpha =
+ (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
+ beta =
+ (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
+ } else if (q->vars.prob < MAX_PROB / 10) {
+ alpha =
+ (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
+ beta =
+ (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
+ } else {
+ alpha =
+ (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta =
+ (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ }
+
+ /* alpha and beta should be between 0 and 32, in multiples of 1/16 */
+ delta += alpha * ((qdelay - q->params.target));
+ delta += beta * ((qdelay - qdelay_old));
+
+ oldprob = q->vars.prob;
+
+ /* to ensure we increase probability in steps of no more than 2% */
+ if (delta > (s32) (MAX_PROB / (100 / 2)) &&
+ q->vars.prob >= MAX_PROB / 10)
+ delta = (MAX_PROB / 100) * 2;
+
+ /* Non-linear drop:
+ * Tune drop probability to increase quickly for high delays(>= 250ms)
+ * 250ms is derived through experiments and provides error protection
+ */
+
+ if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC)))
+ delta += MAX_PROB / (100 / 2);
+
+ q->vars.prob += delta;
+
+ if (delta > 0) {
+ /* prevent overflow */
+ if (q->vars.prob < oldprob) {
+ q->vars.prob = MAX_PROB;
+ /* Prevent normalization error. If probability is at
+ * maximum value already, we normalize it here, and
+ * skip the check to do a non-linear drop in the next
+ * section.
+ */
+ update_prob = false;
+ }
+ } else {
+ /* prevent underflow */
+ if (q->vars.prob > oldprob)
+ q->vars.prob = 0;
+ }
+
+ /* Non-linear drop in probability: Reduce drop probability quickly if
+ * delay is 0 for 2 consecutive Tupdate periods.
+ */
+
+ if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
+ q->vars.prob = (q->vars.prob * 98) / 100;
+
+ q->vars.qdelay = qdelay;
+ q->vars.qlen_old = qlen;
+
+ /* We restart the measurement cycle if the following conditions are met
+ * 1. If the delay has been low for 2 consecutive Tupdate periods
+ * 2. Calculated drop probability is zero
+ * 3. We have atleast one estimate for the avg_dq_rate ie.,
+ * is a non-zero value
+ */
+ if ((q->vars.qdelay < q->params.target / 2) &&
+ (q->vars.qdelay_old < q->params.target / 2) &&
+ (q->vars.prob == 0) &&
+ (q->vars.avg_dq_rate > 0))
+ pie_vars_init(&q->vars);
+}
+
+static void pie_timer(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc *)arg;
+ struct pie_sched_data *q = qdisc_priv(sch);
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+ spin_lock(root_lock);
+ calculate_probability(sch);
+
+ /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */
+ if (q->params.tupdate)
+ mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
+ spin_unlock(root_lock);
+
+}
+
+static int pie_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+
+ pie_params_init(&q->params);
+ pie_vars_init(&q->vars);
+ sch->limit = q->params.limit;
+
+ setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch);
+ mod_timer(&q->adapt_timer, jiffies + HZ / 2);
+
+ if (opt) {
+ int err = pie_change(sch, opt);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ /* convert target from pschedtime to us */
+ if (nla_put_u32(skb, TCA_PIE_TARGET,
+ ((u32) PSCHED_TICKS2NS(q->params.target)) /
+ NSEC_PER_USEC) ||
+ nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
+ nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
+ nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
+ nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
+ nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, opts);
+
+nla_put_failure:
+ nla_nest_cancel(skb, opts);
+ return -1;
+
+}
+
+static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ struct tc_pie_xstats st = {
+ .prob = q->vars.prob,
+ .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
+ NSEC_PER_USEC,
+ /* unscale and return dq_rate in bytes per sec */
+ .avg_dq_rate = q->vars.avg_dq_rate *
+ (PSCHED_TICKS_PER_SEC) >> PIE_SCALE,
+ .packets_in = q->stats.packets_in,
+ .overlimit = q->stats.overlimit,
+ .maxq = q->stats.maxq,
+ .dropped = q->stats.dropped,
+ .ecn_mark = q->stats.ecn_mark,
+ };
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+ skb = __qdisc_dequeue_head(sch, &sch->q);
+
+ if (!skb)
+ return NULL;
+
+ pie_process_dequeue(sch, skb);
+ return skb;
+}
+
+static void pie_reset(struct Qdisc *sch)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ qdisc_reset_queue(sch);
+ pie_vars_init(&q->vars);
+}
+
+static void pie_destroy(struct Qdisc *sch)
+{
+ struct pie_sched_data *q = qdisc_priv(sch);
+ q->params.tupdate = 0;
+ del_timer_sync(&q->adapt_timer);
+}
+
+static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
+ .id = "pie",
+ .priv_size = sizeof(struct pie_sched_data),
+ .enqueue = pie_qdisc_enqueue,
+ .dequeue = pie_qdisc_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = pie_init,
+ .destroy = pie_destroy,
+ .reset = pie_reset,
+ .change = pie_change,
+ .dump = pie_dump,
+ .dump_stats = pie_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init pie_module_init(void)
+{
+ return register_qdisc(&pie_qdisc_ops);
+}
+
+static void __exit pie_module_exit(void)
+{
+ unregister_qdisc(&pie_qdisc_ops);
+}
+
+module_init(pie_module_init);
+module_exit(pie_module_exit);
+
+MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler");
+MODULE_AUTHOR("Vijay Subramanian");
+MODULE_AUTHOR("Mythili Prabhu");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 30ea4674cab..9b0f7093d97 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -220,7 +220,7 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da
static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
{
- q->bins[slot].perturbation = net_random();
+ q->bins[slot].perturbation = prandom_u32();
}
static void sfb_swap_slot(struct sfb_sched_data *q)
@@ -381,7 +381,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
goto enqueue;
}
- r = net_random() & SFB_MAX_PROB;
+ r = prandom_u32() & SFB_MAX_PROB;
if (unlikely(r < p_min)) {
if (unlikely(p_min > SFB_MAX_PROB / 2)) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d3a1bc26dbf..1af2f73906d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -237,10 +237,12 @@ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
}
#define sfq_unlink(q, x, n, p) \
- n = q->slots[x].dep.next; \
- p = q->slots[x].dep.prev; \
- sfq_dep_head(q, p)->next = n; \
- sfq_dep_head(q, n)->prev = p
+ do { \
+ n = q->slots[x].dep.next; \
+ p = q->slots[x].dep.prev; \
+ sfq_dep_head(q, p)->next = n; \
+ sfq_dep_head(q, n)->prev = p; \
+ } while (0)
static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
@@ -627,7 +629,7 @@ static void sfq_perturbation(unsigned long arg)
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
- q->perturbation = net_random();
+ q->perturbation = prandom_u32();
if (!q->filter_list && q->tail)
sfq_rehash(sch);
spin_unlock(root_lock);
@@ -696,7 +698,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
del_timer(&q->perturb_timer);
if (q->perturb_period) {
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
- q->perturbation = net_random();
+ q->perturbation = prandom_u32();
}
sch_tree_unlock(sch);
kfree(p);
@@ -714,12 +716,7 @@ static void *sfq_alloc(size_t sz)
static void sfq_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void sfq_destroy(struct Qdisc *sch)
@@ -757,7 +754,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->quantum = psched_mtu(qdisc_dev(sch));
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
- q->perturbation = net_random();
+ q->perturbation = prandom_u32();
if (opt) {
int err = sfq_change(sch, opt);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1aaf1b6e51a..18ff6343370 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -101,12 +101,11 @@
struct tbf_sched_data {
/* Parameters */
u32 limit; /* Maximal length of backlog: bytes */
+ u32 max_size;
s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
s64 mtu;
- u32 max_size;
struct psched_ratecfg rate;
struct psched_ratecfg peak;
- bool peak_present;
/* Variables */
s64 tokens; /* Current number of B tokens */
@@ -117,6 +116,42 @@ struct tbf_sched_data {
};
+/* Time to Length, convert time in ns to length in bytes
+ * to determinate how many bytes can be sent in given time.
+ */
+static u64 psched_ns_t2l(const struct psched_ratecfg *r,
+ u64 time_in_ns)
+{
+ /* The formula is :
+ * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
+ */
+ u64 len = time_in_ns * r->rate_bytes_ps;
+
+ do_div(len, NSEC_PER_SEC);
+
+ if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
+ do_div(len, 53);
+ len = len * 48;
+ }
+
+ if (len > r->overhead)
+ len -= r->overhead;
+ else
+ len = 0;
+
+ return len;
+}
+
+/*
+ * Return length of individual segments of a gso packet,
+ * including all headers (MAC, IP, TCP/UDP)
+ */
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+ return hdr_len + skb_gso_transport_seglen(skb);
+}
+
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@@ -136,12 +171,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
while (segs) {
nskb = segs->next;
segs->next = NULL;
- if (likely(segs->len <= q->max_size)) {
- qdisc_skb_cb(segs)->pkt_len = segs->len;
- ret = qdisc_enqueue(segs, q->qdisc);
- } else {
- ret = qdisc_reshape_fail(skb, sch);
- }
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
sch->qstats.drops++;
@@ -163,7 +194,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
int ret;
if (qdisc_pkt_len(skb) > q->max_size) {
- if (skb_is_gso(skb))
+ if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
return tbf_segment(skb, sch);
return qdisc_reshape_fail(skb, sch);
}
@@ -190,6 +221,11 @@ static unsigned int tbf_drop(struct Qdisc *sch)
return len;
}
+static bool tbf_peak_present(const struct tbf_sched_data *q)
+{
+ return q->peak.rate_bytes_ps;
+}
+
static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -206,7 +242,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
now = ktime_to_ns(ktime_get());
toks = min_t(s64, now - q->t_c, q->buffer);
- if (q->peak_present) {
+ if (tbf_peak_present(q)) {
ptoks = toks + q->ptokens;
if (ptoks > q->mtu)
ptoks = q->mtu;
@@ -266,20 +302,26 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+ [TCA_TBF_RATE64] = { .type = NLA_U64 },
+ [TCA_TBF_PRATE64] = { .type = NLA_U64 },
+ [TCA_TBF_BURST] = { .type = NLA_U32 },
+ [TCA_TBF_PBURST] = { .type = NLA_U32 },
};
static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
{
int err;
struct tbf_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_TBF_PTAB + 1];
+ struct nlattr *tb[TCA_TBF_MAX + 1];
struct tc_tbf_qopt *qopt;
- struct qdisc_rate_table *rtab = NULL;
- struct qdisc_rate_table *ptab = NULL;
struct Qdisc *child = NULL;
- int max_size, n;
+ struct psched_ratecfg rate;
+ struct psched_ratecfg peak;
+ u64 max_size;
+ s64 buffer, mtu;
+ u64 rate64 = 0, prate64 = 0;
- err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
+ err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
if (err < 0)
return err;
@@ -288,33 +330,59 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
goto done;
qopt = nla_data(tb[TCA_TBF_PARMS]);
- rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
- if (rtab == NULL)
- goto done;
+ if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
+ tb[TCA_TBF_RTAB]));
+
+ if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
+ tb[TCA_TBF_PTAB]));
+
+ buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
+ mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
+
+ if (tb[TCA_TBF_RATE64])
+ rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
+ psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
+
+ if (tb[TCA_TBF_BURST]) {
+ max_size = nla_get_u32(tb[TCA_TBF_BURST]);
+ buffer = psched_l2t_ns(&rate, max_size);
+ } else {
+ max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
+ }
if (qopt->peakrate.rate) {
- if (qopt->peakrate.rate > qopt->rate.rate)
- ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
- if (ptab == NULL)
+ if (tb[TCA_TBF_PRATE64])
+ prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
+ psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
+ if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
+ pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
+ peak.rate_bytes_ps, rate.rate_bytes_ps);
+ err = -EINVAL;
goto done;
- }
+ }
- for (n = 0; n < 256; n++)
- if (rtab->data[n] > qopt->buffer)
- break;
- max_size = (n << qopt->rate.cell_log) - 1;
- if (ptab) {
- int size;
-
- for (n = 0; n < 256; n++)
- if (ptab->data[n] > qopt->mtu)
- break;
- size = (n << qopt->peakrate.cell_log) - 1;
- if (size < max_size)
- max_size = size;
+ if (tb[TCA_TBF_PBURST]) {
+ u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
+ max_size = min_t(u32, max_size, pburst);
+ mtu = psched_l2t_ns(&peak, pburst);
+ } else {
+ max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
+ }
+ } else {
+ memset(&peak, 0, sizeof(peak));
}
- if (max_size < 0)
+
+ if (max_size < psched_mtu(qdisc_dev(sch)))
+ pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
+ max_size, qdisc_dev(sch)->name,
+ psched_mtu(qdisc_dev(sch)));
+
+ if (!max_size) {
+ err = -EINVAL;
goto done;
+ }
if (q->qdisc != &noop_qdisc) {
err = fifo_set_limit(q->qdisc, qopt->limit);
@@ -335,27 +403,24 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->qdisc = child;
}
q->limit = qopt->limit;
- q->mtu = PSCHED_TICKS2NS(qopt->mtu);
+ if (tb[TCA_TBF_PBURST])
+ q->mtu = mtu;
+ else
+ q->mtu = PSCHED_TICKS2NS(qopt->mtu);
q->max_size = max_size;
- q->buffer = PSCHED_TICKS2NS(qopt->buffer);
+ if (tb[TCA_TBF_BURST])
+ q->buffer = buffer;
+ else
+ q->buffer = PSCHED_TICKS2NS(qopt->buffer);
q->tokens = q->buffer;
q->ptokens = q->mtu;
- psched_ratecfg_precompute(&q->rate, &rtab->rate);
- if (ptab) {
- psched_ratecfg_precompute(&q->peak, &ptab->rate);
- q->peak_present = true;
- } else {
- q->peak_present = false;
- }
+ memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
+ memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
sch_tree_unlock(sch);
err = 0;
done:
- if (rtab)
- qdisc_put_rtab(rtab);
- if (ptab)
- qdisc_put_rtab(ptab);
return err;
}
@@ -394,7 +459,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.limit = q->limit;
psched_ratecfg_getrate(&opt.rate, &q->rate);
- if (q->peak_present)
+ if (tbf_peak_present(q))
psched_ratecfg_getrate(&opt.peakrate, &q->peak);
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
@@ -402,9 +467,15 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.buffer = PSCHED_NS2TICKS(q->buffer);
if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
+ nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps))
+ goto nla_put_failure;
+ if (tbf_peak_present(q) &&
+ q->peak.rate_bytes_ps >= (1ULL << 32) &&
+ nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps))
+ goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);