diff options
Diffstat (limited to 'net/sched/sch_red.c')
| -rw-r--r-- | net/sched/sch_red.c | 310 |
1 files changed, 223 insertions, 87 deletions
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index dccfa44c2d7..633e32defdc 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -14,11 +14,9 @@ * J Hadi Salim 980816: ECN support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> @@ -38,12 +36,14 @@ if RED works correctly. */ -struct red_sched_data -{ +struct red_sched_data { u32 limit; /* HARD maximal queue length */ unsigned char flags; + struct timer_list adapt_timer; struct red_parms parms; + struct red_vars vars; struct red_stats stats; + struct Qdisc *qdisc; }; static inline int red_use_ecn(struct red_sched_data *q) @@ -56,143 +56,208 @@ static inline int red_use_harddrop(struct red_sched_data *q) return q->flags & TC_RED_HARDDROP; } -static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); - - q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog); - - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - switch (red_action(&q->parms, q->parms.qavg)) { - case RED_DONT_MARK: - break; - - case RED_PROB_MARK: - sch->qstats.overlimits++; - if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.prob_drop++; - goto congestion_drop; - } - - q->stats.prob_mark++; - break; - - case RED_HARD_MARK: - sch->qstats.overlimits++; - if (red_use_harddrop(q) || !red_use_ecn(q) || - !INET_ECN_set_ce(skb)) { - q->stats.forced_drop++; - goto congestion_drop; - } - - q->stats.forced_mark++; - break; + struct Qdisc *child = q->qdisc; + int ret; + + q->vars.qavg = red_calc_qavg(&q->parms, + &q->vars, + child->qstats.backlog); + + if (red_is_idling(&q->vars)) + red_end_of_idle_period(&q->vars); + + switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { + case RED_DONT_MARK: + break; + + case RED_PROB_MARK: + sch->qstats.overlimits++; + if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + q->stats.prob_mark++; + break; + + case RED_HARD_MARK: + sch->qstats.overlimits++; + if (red_use_harddrop(q) || !red_use_ecn(q) || + !INET_ECN_set_ce(skb)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + q->stats.forced_mark++; + break; } - if (sch->qstats.backlog + skb->len <= q->limit) - return qdisc_enqueue_tail(skb, sch); - - q->stats.pdrop++; - return qdisc_drop(skb, sch); + ret = qdisc_enqueue(skb, child); + if (likely(ret == NET_XMIT_SUCCESS)) { + sch->q.qlen++; + } else if (net_xmit_drop_count(ret)) { + q->stats.pdrop++; + sch->qstats.drops++; + } + return ret; congestion_drop: qdisc_drop(skb, sch); return NET_XMIT_CN; } -static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) +static struct sk_buff *red_dequeue(struct Qdisc *sch) { + struct sk_buff *skb; struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - return qdisc_requeue(skb, sch); + skb = child->dequeue(child); + if (skb) { + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + } else { + if (!red_is_idling(&q->vars)) + red_start_of_idle_period(&q->vars); + } + return skb; } -static struct sk_buff * red_dequeue(struct Qdisc* sch) +static struct sk_buff *red_peek(struct Qdisc *sch) { - struct sk_buff *skb; struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; - skb = qdisc_dequeue_head(sch); - - if (skb == NULL && !red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); - - return skb; + return child->ops->peek(child); } -static unsigned int red_drop(struct Qdisc* sch) +static unsigned int red_drop(struct Qdisc *sch) { - struct sk_buff *skb; struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + unsigned int len; - skb = qdisc_dequeue_tail(sch); - if (skb) { - unsigned int len = skb->len; + if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; - qdisc_drop(skb, sch); + sch->qstats.drops++; + sch->q.qlen--; return len; } - if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); + if (!red_is_idling(&q->vars)) + red_start_of_idle_period(&q->vars); return 0; } -static void red_reset(struct Qdisc* sch) +static void red_reset(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); - qdisc_reset_queue(sch); - red_restart(&q->parms); + qdisc_reset(q->qdisc); + sch->q.qlen = 0; + red_restart(&q->vars); } -static int red_change(struct Qdisc *sch, struct rtattr *opt) +static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); - struct rtattr *tb[TCA_RED_MAX]; + + del_timer_sync(&q->adapt_timer); + qdisc_destroy(q->qdisc); +} + +static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { + [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, + [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, + [TCA_RED_MAX_P] = { .type = NLA_U32 }, +}; + +static int red_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_RED_MAX + 1]; struct tc_red_qopt *ctl; + struct Qdisc *child = NULL; + int err; + u32 max_P; - if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt)) + if (opt == NULL) return -EINVAL; - if (tb[TCA_RED_PARMS-1] == NULL || - RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || - tb[TCA_RED_STAB-1] == NULL || - RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE) + err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy); + if (err < 0) + return err; + + if (tb[TCA_RED_PARMS] == NULL || + tb[TCA_RED_STAB] == NULL) return -EINVAL; - ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); + max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; + + ctl = nla_data(tb[TCA_RED_PARMS]); + + if (ctl->limit > 0) { + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); + if (IS_ERR(child)) + return PTR_ERR(child); + } sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; + if (child) { + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_destroy(q->qdisc); + q->qdisc = child; + } + + red_set_parms(&q->parms, + ctl->qth_min, ctl->qth_max, ctl->Wlog, + ctl->Plog, ctl->Scell_log, + nla_data(tb[TCA_RED_STAB]), + max_P); + red_set_vars(&q->vars); - red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, - ctl->Plog, ctl->Scell_log, - RTA_DATA(tb[TCA_RED_STAB-1])); + del_timer(&q->adapt_timer); + if (ctl->flags & TC_RED_ADAPTATIVE) + mod_timer(&q->adapt_timer, jiffies + HZ/2); - if (skb_queue_empty(&sch->q)) - red_end_of_idle_period(&q->parms); + if (!q->qdisc->q.qlen) + red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); return 0; } -static int red_init(struct Qdisc* sch, struct rtattr *opt) +static inline void red_adaptative_timer(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc *)arg; + struct red_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + + spin_lock(root_lock); + red_adaptative_algo(&q->parms, &q->vars); + mod_timer(&q->adapt_timer, jiffies + HZ/2); + spin_unlock(root_lock); +} + +static int red_init(struct Qdisc *sch, struct nlattr *opt) { + struct red_sched_data *q = qdisc_priv(sch); + + q->qdisc = &noop_qdisc; + setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); return red_change(sch, opt); } static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); - struct rtattr *opts = NULL; + struct nlattr *opts = NULL; struct tc_red_qopt opt = { .limit = q->limit, .flags = q->flags, @@ -203,12 +268,18 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) .Scell_log = q->parms.Scell_log, }; - opts = RTA_NEST(skb, TCA_OPTIONS); - RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); - return RTA_NEST_END(skb, opts); - -rtattr_failure: - return RTA_NEST_CANCEL(skb, opts); + sch->qstats.backlog = q->qdisc->qstats.backlog; + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || + nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P)) + goto nla_put_failure; + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) @@ -224,15 +295,80 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) return gnet_stats_copy_app(d, &st, sizeof(st)); } -static struct Qdisc_ops red_qdisc_ops = { +static int red_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct red_sched_data *q = qdisc_priv(sch); + + tcm->tcm_handle |= TC_H_MIN(1); + tcm->tcm_info = q->qdisc->handle; + return 0; +} + +static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct red_sched_data *q = qdisc_priv(sch); + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->qdisc; + q->qdisc = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct red_sched_data *q = qdisc_priv(sch); + return q->qdisc; +} + +static unsigned long red_get(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void red_put(struct Qdisc *sch, unsigned long arg) +{ +} + +static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + walker->count++; + } +} + +static const struct Qdisc_class_ops red_class_ops = { + .graft = red_graft, + .leaf = red_leaf, + .get = red_get, + .put = red_put, + .walk = red_walk, + .dump = red_dump_class, +}; + +static struct Qdisc_ops red_qdisc_ops __read_mostly = { .id = "red", .priv_size = sizeof(struct red_sched_data), + .cl_ops = &red_class_ops, .enqueue = red_enqueue, .dequeue = red_dequeue, - .requeue = red_requeue, + .peek = red_peek, .drop = red_drop, .init = red_init, .reset = red_reset, + .destroy = red_destroy, .change = red_change, .dump = red_dump, .dump_stats = red_dump_stats, |
