/*
* Interface for controlling IO bandwidth on a request queue
*
* Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/blktrace_api.h>
#include "blk-cgroup.h"
#include "blk.h"
/* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8;
/* Total max dispatch from all groups in one round */
static int throtl_quantum = 32;
/* Throttling is performed over 100ms slice and after that slice is renewed */
static unsigned long throtl_slice = HZ/10; /* 100 ms */
static struct blkio_policy_type blkio_policy_throtl;
/* A workqueue to queue throttle related work */
static struct workqueue_struct *kthrotld_workqueue;
static void throtl_schedule_delayed_work(struct throtl_data *td,
unsigned long delay);
struct throtl_rb_root {
struct rb_root rb;
struct rb_node *left;
unsigned int count;
unsigned long min_disptime;
};
#define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_disptime = 0}
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
struct throtl_grp {
/* active throtl group service_tree member */
struct rb_node rb_node;
/*
* Dispatch time in jiffies. This is the estimated time when group
* will unthrottle and is ready to dispatch more bio. It is used as
* key to sort active groups in service tree.
*/
unsigned long disptime;
unsigned int flags;
/* Two lists for READ and WRITE */
struct bio_list bio_lists[2];
/* Number of queued bios on READ and WRITE lists */
unsigned int nr_queued[2];
/* bytes per second rate limits */
uint64_t bps[2];
/* IOPS limits */
unsigned int iops[2];
/* Number of bytes disptached in current slice */
uint64_t bytes_disp[2];
/* Number of bio's dispatched in current slice */
unsigned int io_disp[2];
/* When did we start a new slice */
unsigned long slice_start[2];
unsigned long slice_end[2];
/* Some throttle limits got updated for the group */
int limits_changed;
};
struct throtl_data
{
/* service tree for active throtl groups */
struct throtl_rb_root tg_service_tree;
struct throtl_grp *root_tg;
struct request_queue *queue;
/* Total Number of queued bios on READ and WRITE lists */
unsigned int nr_queued[2];
/*
* number of total undestroyed groups
*/
unsigned int nr_undestroyed_grps;
/* Work for dispatching throttled bios */
struct delayed_work throtl_work;
int limits_changed;
};
static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg)
{
return blkg_to_pdata(blkg, &blkio_policy_throtl);
}
static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg)
{
return pdata_to_blkg(tg, &blkio_policy_throtl);
}
enum tg_state_flags {
THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */
};
#define THROTL_TG_FNS(name) \
static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \
{ \
(tg)->flags |= (1 << THROTL_TG_FLAG_##name); \
} \
static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \
{ \
(tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \
} \
static inline int throtl_tg_##name(const struct throtl_grp *tg) \
{ \
return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \
}
THROTL_TG_FNS(on_rr);
#define throtl_log_tg(td, tg, fmt, args...) \
blk_add_trace_msg((td)->queue, "throtl %s " fmt, \
blkg_path(tg_to_blkg(tg)), ##args); \
#define throtl_log(td, fmt, args...) \
blk_add_trace_msg((td)->queue, "throtl " fmt, ##args)
static inline unsigned int total_nr_queued(struct throtl_data *td)
{
return td->nr_queued[0] + td->nr_queued[1];
}
static void throtl_init_blkio_group(struct blkio_group *blkg)
{
struct throtl_grp *tg = blkg_to_tg(blkg);
RB_CLEAR_NODE(&tg->rb_node);
bio_list_init(&tg->bio_lists[0]);
bio_list_init(&tg->bio_lists[1]);
tg->limits_changed = false;
tg->bps[READ] = -1;
tg->bps[WRITE] = -1;
tg->iops[READ] = -1;
tg->iops[WRITE] = -1;
}
static struct
throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
{
/*
* This is the common case when there are no blkio cgroups.
* Avoid lookup in this case
*/
if (blkcg == &blkio_root_cgroup)
return td->root_tg;
return blkg_to_tg(blkg_lookup(blkcg, td->queue));
}
static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
struct blkio_cgroup *blkcg)
{
struct request_queue *q = td->queue;
struct throtl_grp *tg = NULL;
/*
* This is the common case when there are no blkio cgroups.
* Avoid lookup in this case