/*
* net/sched/sch_qfq.c Quick Fair Queueing Scheduler.
*
* Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
/* Quick Fair Queueing
===================
Sources:
Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
Packet Scheduling with Tight Bandwidth Distribution Guarantees."
See also:
http://retis.sssup.it/~fabio/linux/qfq/
*/
/*
Virtual time computations.
S, F and V are all computed in fixed point arithmetic with
FRAC_BITS decimal bits.
QFQ_MAX_INDEX is the maximum index allowed for a group. We need
one bit per index.
QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
The layout of the bits is as below:
[ MTU_SHIFT ][ FRAC_BITS ]
[ MAX_INDEX ][ MIN_SLOT_SHIFT ]
^.__grp->index = 0
*.__grp->slot_shift
where MIN_SLOT_SHIFT is derived by difference from the others.
The max group index corresponds to Lmax/w_min, where
Lmax=1<<MTU_SHIFT, w_min = 1 .
From this, and knowing how many groups (MAX_INDEX) we want,
we can derive the shift corresponding to each group.
Because we often need to compute
F = S + len/w_i and V = V + len/wsum
instead of storing w_i store the value
inv_w = (1<<FRAC_BITS)/w_i
so we can do F = S + len * inv_w * wsum.
We use W_TOT in the formulas so we can easily move between
static and adaptive weight sum.
The per-scheduler-instance data contain all the data structures
for the scheduler: bitmaps and bucket lists.
*/
/*
* Maximum number of consecutive slots occupied by backlogged classes
* inside a group.
*/
#define QFQ_MAX_SLOTS 32
/*
* Shifts used for class<->group mapping. We allow class weights that are
* in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
* group with the smallest index that can support the L_i / r_i configured
* for the class.
*
* grp->index is the index of the group; and grp->slot_shift
* is the shift for the corresponding (scaled) sigma_i.
*/
#define QFQ_MAX_INDEX 19
#define QFQ_MAX_WSHIFT 16
#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
#define FRAC_BITS 30 /* fixed point arithmetic */
#define ONE_FP (1UL << FRAC_BITS)
#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
#define QFQ_MTU_SHIFT 11
#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
/*
* Possible group states. These values are used as indexes for the bitmaps
* array of struct qfq_queue.
*/
enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
struct qfq_group;
struct qfq_class {
struct Qdisc_class_common common;
unsigned int refcnt;
unsigned int filter_cnt;
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
struct Qdisc *qdisc;
struct hlist_node next; /* Link for the slot list. */
u64 S, F; /* flow timestamps (exact) */
/* group we belong to. In principle we would need the index,
* which is log_2(lmax/weight), but we never reference it
* directly, only the group.
*/
struct qfq_group *grp;
/* these are copied from the flowset. */
u32 inv_w; /* ONE_FP/weight */
u32 lmax; /* Max packet size for this flow. */
};
struct qfq_group {
u64 S, F; /* group timestamps (approx). */
unsigned int slot_shift; /* Slot shift. */
unsigned int index; /* Group index. */
unsigned int front; /* Index of the front slot. */
unsigned long full_slots; /* non-empty slots */
/* Array of RR lists of active classes. */
struct hlist_head slots[QFQ_MAX_SLOTS];
};
struct qfq_sched {
struct tcf_proto *filter_list;
struct Qdisc_class_hash clhash;
u64 V; /* Precise virtual time. */
u32 wsum; /* weight sum */
unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
};
static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
{
struct qfq_sched *q = qdisc_priv(sch);
struct Qdisc_class_common *clc;
clc = qdisc_class_find(&q->clhash, classid);
if (clc == NULL)
return NULL;
return container_of(clc, struct qfq_class, common);
}
static void qfq_purge_queue(struct qfq_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
qdisc_reset(cl->qdisc);
qdisc_tree_decrease_qlen(cl->qdisc, len);
}
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
[TCA_QFQ_LMAX] = { .type = NLA_U32 },
};
/*
* Calculate a flow index, given its weight and maximum packet length.
* index = log_2(maxlen/weight) but we need to apply the scaling.
* This is used only once at flow creation.
*/
static int