aboutsummaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig22
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c18
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_nat.c4
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c33
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_cgroup.c288
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/cls_tcindex.c6
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/ematch.c18
-rw-r--r--net/sched/sch_api.c50
-rw-r--r--net/sched/sch_atm.c36
-rw-r--r--net/sched/sch_blackhole.c1
-rw-r--r--net/sched/sch_cbq.c76
-rw-r--r--net/sched/sch_drr.c519
-rw-r--r--net/sched/sch_dsmark.c22
-rw-r--r--net/sched/sch_fifo.c4
-rw-r--r--net/sched/sch_generic.c40
-rw-r--r--net/sched/sch_gred.c22
-rw-r--r--net/sched/sch_hfsc.c64
-rw-r--r--net/sched/sch_htb.c171
-rw-r--r--net/sched/sch_multiq.c82
-rw-r--r--net/sched/sch_netem.c160
-rw-r--r--net/sched/sch_prio.c50
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c71
-rw-r--r--net/sched/sch_tbf.c44
-rw-r--r--net/sched/sch_teql.c19
37 files changed, 1216 insertions, 683 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54155d..4f7ef0db302 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_NETEM
If unsure, say N.
+config NET_SCH_DRR
+ tristate "Deficit Round Robin scheduler (DRR)"
+ help
+ Say Y here if you want to use the Deficit Round Robin (DRR) packet
+ scheduling algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_drr.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
@@ -316,6 +327,17 @@ config NET_CLS_FLOW
To compile this code as a module, choose M here: the
module will be called cls_flow.
+config NET_CLS_CGROUP
+ bool "Control Group Classifier"
+ select NET_CLS
+ depends on CGROUPS
+ ---help---
+ Say Y here if you want to classify packets based on the control
+ cgroup of their process.
+
+ To compile this code as a module, choose M here: the
+ module will be called cls_cgroup.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c9925b26..54d950cd4b8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
+obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8f457f1e0ac..9d03cc33b6c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
}
EXPORT_SYMBOL(tcf_hash_check);
-struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
+ struct tc_action *a, int size, int bind,
+ u32 *idx_gen, struct tcf_hashinfo *hinfo)
{
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
if (unlikely(!p))
- return p;
+ return ERR_PTR(-ENOMEM);
p->tcfc_refcnt = 1;
if (bind)
p->tcfc_bindcnt = 1;
@@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti
p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
- if (est)
- gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ if (est) {
+ int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+ &p->tcfc_lock, est);
+ if (err) {
+ kfree(p);
+ return ERR_PTR(err);
+ }
+ }
+
a->priv = (void *) p;
return p;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ac04289da5d..e7f796aec65 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
bind, &gact_idx_gen, &gact_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0453d79ebf5..082c520b0de 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
&ipt_idx_gen, &ipt_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 70341c020b6..b9aaab4e035 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
return -EINVAL;
pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
&mirred_idx_gen, &mirred_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7b39ed485bc..d885ba31156 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
&nat_idx_gen, &nat_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
p = to_tcf_nat(pc);
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d5f4e340486..96c0ed115e2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
return -EINVAL;
pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
&pedit_idx_gen, &pedit_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 38015b49394..5c72a116b1a 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -182,17 +182,32 @@ override:
R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
if (R_tab == NULL)
goto failure;
+
+ if (!est && (ret == ACT_P_CREATED ||
+ !gen_estimator_active(&police->tcf_bstats,
+ &police->tcf_rate_est))) {
+ err = -EINVAL;
+ goto failure;
+ }
+
if (parm->peakrate.rate) {
P_tab = qdisc_get_rtab(&parm->peakrate,
tb[TCA_POLICE_PEAKRATE]);
- if (P_tab == NULL) {
- qdisc_put_rtab(R_tab);
+ if (P_tab == NULL)
goto failure;
- }
}
}
- /* No failure allowed after this point */
+
spin_lock_bh(&police->tcf_lock);
+ if (est) {
+ err = gen_replace_estimator(&police->tcf_bstats,
+ &police->tcf_rate_est,
+ &police->tcf_lock, est);
+ if (err)
+ goto failure_unlock;
+ }
+
+ /* No failure allowed after this point */
if (R_tab != NULL) {
qdisc_put_rtab(police->tcfp_R_tab);
police->tcfp_R_tab = R_tab;
@@ -217,10 +232,6 @@ override:
if (tb[TCA_POLICE_AVRATE])
police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
- if (est)
- gen_replace_estimator(&police->tcf_bstats,
- &police->tcf_rate_est,
- &police->tcf_lock, est);
spin_unlock_bh(&police->tcf_lock);
if (ret != ACT_P_CREATED)
@@ -238,7 +249,13 @@ override:
a->priv = police;
return ret;
+failure_unlock:
+ spin_unlock_bh(&police->tcf_lock);
failure:
+ if (P_tab)
+ qdisc_put_rtab(P_tab);
+ if (R_tab)
+ qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
kfree(police);
return err;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7851ce92cf..8daa1ebc741 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&simp_idx_gen, &simp_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
d = to_defact(pc);
ret = alloc_defdata(d, defdata);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fe9777e77f3..4ab916b8074 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&skbedit_idx_gen, &skbedit_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
d = to_skbedit(pc);
ret = ACT_P_CREATED;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 16e7ac9774e..173fcc4b050 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
if (src->action) {
struct tc_action *act;
tcf_tree_lock(tp);
- act = xchg(&dst->action, src->action);
+ act = dst->action;
+ dst->action = src->action;
tcf_tree_unlock(tp);
if (act)
tcf_action_destroy(act, TCA_ACT_UNBIND);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 956915c217d..4e2bda85411 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp,
static void basic_destroy(struct tcf_proto *tp)
{
- struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+ struct basic_head *head = tp->root;
struct basic_filter *f, *n;
list_for_each_entry_safe(f, n, &head->flist, link) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 00000000000..0d68b197598
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,288 @@
+/*
+ * net/sched/cls_cgroup.c Control Group Classifier
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+
+struct cgroup_cls_state
+{
+ struct cgroup_subsys_state css;
+ u32 classid;
+};
+
+static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
+{
+ return (struct cgroup_cls_state *)
+ cgroup_subsys_state(cgrp, net_cls_subsys_id);
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct cgroup_cls_state *cs;
+
+ if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ if (cgrp->parent)
+ cs->classid = net_cls_state(cgrp->parent)->classid;
+
+ return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ kfree(ss);
+}
+
+static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+{
+ return net_cls_state(cgrp)->classid;
+}
+
+static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+{
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+
+ net_cls_state(cgrp)->classid = (u32) value;
+
+ cgroup_unlock();
+
+ return 0;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "classid",
+ .read_u64 = read_classid,
+ .write_u64 = write_classid,
+ },
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+struct cgroup_subsys net_cls_subsys = {
+ .name = "net_cls",
+ .create = cgrp_create,
+ .destroy = cgrp_destroy,
+ .populate = cgrp_populate,
+ .subsys_id = net_cls_subsys_id,
+};
+
+struct cls_cgroup_head
+{
+ u32 handle;
+ struct tcf_exts exts;
+ struct tcf_ematch_tree ematches;
+};
+
+static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_cgroup_head *head = tp->root;
+ struct cgroup_cls_state *cs;
+ int ret = 0;
+
+ /*
+ * Due to the nature of the classifier it is required to ignore all
+ * packets originating from softirq context as accessing `current'
+ * would lead to false results.
+ *
+ * This test assumes that all callers of dev_queue_xmit() explicitely
+ * disable bh. Knowing this, it is possible to detect softirq based
+ * calls by looking at the number of nested bh disable calls because
+ * softirqs always disables bh.
+ */
+ if (softirq_count() != SOFTIRQ_OFFSET)
+ return -1;
+
+ rcu_read_lock();
+ cs = (struct cgroup_cls_state *) task_subsys_state(current,
+ net_cls_subsys_id);
+ if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
+ res->classid = cs->classid;
+ res->class = 0;
+ ret = tcf_exts_exec(skb, &head->exts, res);
+ } else
+ ret = -1;
+
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+ return 0UL;
+}
+
+static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_cgroup_init(struct tcf_proto *tp)
+{
+ return 0;
+}
+
+static const struct tcf_ext_map cgroup_ext_map = {
+ .action = TCA_CGROUP_ACT,
+ .police = TCA_CGROUP_POLICE,
+};
+
+static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
+ [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
+};
+
+static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg)
+{
+ struct nlattr *tb[TCA_CGROUP_MAX+1];
+ struct cls_cgroup_head *head = tp->root;
+ struct tcf_ematch_tree t;
+ struct tcf_exts e;
+ int err;
+
+ if (head == NULL) {
+ if (!handle)
+ return -EINVAL;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ head->handle = handle;
+
+ tcf_tree_lock(tp);
+ tp->root = head;
+ tcf_tree_unlock(tp);
+ }
+
+ if (handle != head->handle)
+ return -ENOENT;
+
+ err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
+ cgroup_policy);
+ if (err < 0)
+ return err;
+
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+ if (err < 0)
+ return err;
+
+ err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+ if (err < 0)
+ return err;
+
+ tcf_exts_change(tp, &head->exts, &e);
+ tcf_em_tree_change(tp, &head->ematches, &t);
+
+ return 0;
+}
+
+static void cls_cgroup_destroy(struct tcf_proto *tp)
+{
+ struct cls_cgroup_head *head = tp->root;
+
+ if (head) {
+ tcf_exts_destroy(tp, &head->exts);
+ tcf_em_tree_destroy(tp, &head->ematches);
+ kfree(head);
+ }
+}
+
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
+
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_cgroup_head *head = tp->root;
+
+ if (arg->count < arg->skip)
+ goto skip;
+
+ if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+skip:
+ arg->count++;
+}
+
+static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_cgroup_head *head = tp->root;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ t->tcm_handle = head->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+ tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
+ .kind = "cgroup",
+ .init = cls_cgroup_init,
+ .change = cls_cgroup_change,
+ .classify = cls_cgroup_classify,
+ .destroy = cls_cgroup_destroy,
+ .get = cls_cgroup_get,
+ .put = cls_cgroup_put,
+ .delete = cls_cgroup_delete,
+ .walk = cls_cgroup_walk,
+ .dump = cls_cgroup_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init init_cgroup_cls(void)
+{
+ return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup_cls(void)
+{
+ unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup_cls);
+module_exit(exit_cgroup_cls);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index b0f90e593af..6d6e87585fb 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
static void fw_destroy(struct tcf_proto *tp)
{
- struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+ struct fw_head *head = tp->root;
struct fw_filter *f;
int h;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e3d8455eebc..bdf1f4172ee 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
static void route4_destroy(struct tcf_proto *tp)
{
- struct route4_head *head = xchg(&tp->root, NULL);
+ struct route4_head *head = tp->root;
int h1, h2;
if (head == NULL)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7a7bff5ded2..e806f2314b5 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,12 +13,6 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
-
-/*
- * Not quite sure if we need all the xchgs Alexey uses when accessing things.
- * Can always add them later ... :)
- */
-
/*
* Passing parameters to the root seems to be done more awkwardly than really
* necessary. At least, u32 doesn't seem to use such dirty hacks. To be
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 246f9065ce3..05d178008cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
+ struct tc_u_hnode *root_ht = tp->root;
WARN_ON(root_ht == NULL);
@@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
err = -EINVAL;
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
- struct tc_u_hnode *ht_down = NULL;
+ struct tc_u_hnode *ht_down = NULL, *ht_old;
if (TC_U32_KEY(handle))
goto errout;
@@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
}
tcf_tree_lock(tp);
- ht_down = xchg(&n->ht_down, ht_down);
+ ht_old = n->ht_down;
+ n->ht_down = ht_down;
tcf_tree_unlock(tp);
- if (ht_down)
- ht_down->refcnt--;
+ if (ht_old)
+ ht_old->refcnt--;
}
if (tb[TCA_U32_CLASSID]) {
n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e82519e548d..aab59409728 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -71,7 +71,7 @@
*
* static void __exit exit_my_ematch(void)
* {
- * return tcf_em_unregister(&my_ops);
+ * tcf_em_unregister(&my_ops);
* }
*
* module_init(init_my_ematch);
@@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register);
*
* Returns -ENOENT if no matching ematch was found.
*/
-int tcf_em_unregister(struct tcf_ematch_ops *ops)
+void tcf_em_unregister(struct tcf_ematch_ops *ops)
{
- int err = 0;
- struct tcf_ematch_ops *e;
-
write_lock(&ematch_mod_lock);
- list_for_each_entry(e, &ematch_ops, link) {
- if (e == ops) {
- list_del(&e->link);
- goto out;
- }
- }
-
- err = -ENOENT;
-out:
+ list_del(&ops->link);
write_unlock(&ematch_mod_lock);
- return err;
}
EXPORT_SYMBOL(tcf_em_unregister);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6ab4a2f92ca..0fc4a18fd96 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
Auxiliary routines:
- ---requeue
+ ---peek
- requeues once dequeued packet. It is used for non-standard or
- just buggy devices, which can defer output even if netif_queue_stopped()=0.
+ like dequeue but without removing a packet from the queue
---reset
@@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops)
if (qops->enqueue == NULL)
qops->enqueue = noop_qdisc_ops.enqueue;
- if (qops->requeue == NULL)
- qops->requeue = noop_qdisc_ops.requeue;
+ if (qops->peek == NULL) {
+ if (qops->dequeue == NULL) {
+ qops->peek = noop_qdisc_ops.peek;
+ } else {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
if (qops->dequeue == NULL)
qops->dequeue = noop_qdisc_ops.dequeue;
@@ -184,7 +189,7 @@ EXPORT_SYMBOL(unregister_qdisc);
(root qdisc, all its children, children of children etc.)
*/
-struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
+static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
{
struct Qdisc *q;
@@ -199,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
return NULL;
}
-/*
- * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
- * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
- */
-static DEFINE_SPINLOCK(qdisc_list_lock);
-
static void qdisc_list_add(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
- spin_lock_bh(&qdisc_list_lock);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
- spin_unlock_bh(&qdisc_list_lock);
- }
}
void qdisc_list_del(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
- spin_lock_bh(&qdisc_list_lock);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
list_del(&q->list);
- spin_unlock_bh(&qdisc_list_lock);
- }
}
EXPORT_SYMBOL(qdisc_list_del);
@@ -229,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
unsigned int i;
struct Qdisc *q;
- spin_lock_bh(&qdisc_list_lock);
-
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct Qdisc *txq_root = txq->qdisc_sleeping;
q = qdisc_match_from_root(txq_root, handle);
if (q)
- goto unlock;
+ goto out;
}
q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
-
-unlock:
- spin_unlock_bh(&qdisc_list_lock);
-
+out:
return q;
}
@@ -462,7 +450,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
timer);
wd->qdisc->flags &= ~TCQ_F_THROTTLED;
- smp_wmb();
__netif_schedule(qdisc_root(wd->qdisc));
return HRTIMER_NORESTART;
@@ -892,9 +879,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
sch->stab = stab;
if (tca[TCA_RATE])
+ /* NB: ignores errors from replace_estimator
+ because change can't be undone. */
gen_replace_estimator(&sch->bstats, &sch->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 43d37256c15..2a8b83af7c4 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c