/*
* Interface for controlling IO bandwidth on a request queue
*
* Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/blktrace_api.h>
#include "blk-cgroup.h"
/* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8;
/* Total max dispatch from all groups in one round */
static int throtl_quantum = 32;
/* Throttling is performed over 100ms slice and after that slice is renewed */
static unsigned long throtl_slice = HZ/10; /* 100 ms */
struct throtl_rb_root {
struct rb_root rb;
struct rb_node *left;
unsigned int count;
unsigned long min_disptime;
};
#define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_disptime = 0}
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
struct throtl_grp {
/* List of throtl groups on the request queue*/
struct hlist_node tg_node;
/* active throtl group service_tree member */
struct rb_node rb_node;
/*
* Dispatch time in jiffies. This is the estimated time when group
* will unthrottle and is ready to dispatch more bio. It is used as
* key to sort active groups in service tree.
*/
unsigned long disptime;
struct blkio_group blkg;
atomic_t ref;
unsigned int flags;
/* Two lists for READ and WRITE */
struct bio_list bio_lists[2];
/* Number of queued bios on READ and WRITE lists */
unsigned int nr_queued[2];
/* bytes per second rate limits */
uint64_t bps[2];
/* IOPS limits */
unsigned int iops[2];
/* Number of bytes disptached in current slice */
uint64_t bytes_disp[2];
/* Number of bio's dispatched in current slice */
unsigned int io_disp[2];
/* When did we start a new slice */
unsigned long slice_start[2];
unsigned long slice_end[2];
/* Some throttle limits got updated for the group */
bool limits_changed;
};
struct throtl_data
{
/* List of throtl groups */
struct hlist_head tg_list;
/* service tree for active throtl groups */
struct throtl_rb_root tg_service_tree;
struct throtl_grp root_tg;
struct request_queue *queue;
/* Total Number of queued bios on READ and WRITE lists */
unsigned int nr_queued[2];
/*
* number of total undestroyed groups
*/
unsigned int nr_undestroyed_grps;
/* Work for dispatching throttled bios */
struct delayed_work throtl_work;
atomic_t limits_changed;
};
enum tg_state_flags {
THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */
};
#define THROTL_TG_FNS(name) \
static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \
{ \
(tg)->flags |= (1 << THROTL_TG_FLAG_##name); \
} \
static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \
{ \
(tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \
} \
static inline int throtl_tg_##name(const struct throtl_grp *tg) \
{ \
return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \
}
THROTL_TG_FNS(on_rr);
#define throtl_log_tg(td, tg, fmt, args...) \
blk_add_trace_msg((td)->queue, "throtl %s " fmt, \
blkg_path(&(tg)->blkg), ##args); \
#define throtl_log(td, fmt, args...) \
blk_add_trace_msg((td)->queue, "throtl " fmt, ##args)
static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
{
if (blkg)
return container_of(blkg, struct throtl_grp, blkg);
return NULL;
}
static inline int total_nr_queued(struct throtl_data *td)
{
return (td->nr_queued[0] + td->nr_queued[1]);
}
static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
{
atomic_inc(&tg->ref);
return tg;
}
static void throtl_put_tg(struct throtl_grp *tg)
{
BUG_ON(atomic_read(&tg->ref) <= 0);
if (!atomic_dec_and_test(&tg->ref))
return;
kfree(tg);
}
static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
struct cgroup *cgroup)
{
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
struct throtl_grp *tg = NULL;
void *key = td;
struct backing_dev_info *bdi = &td->queue->backing_dev_info;
unsigned int major, minor;
/*
* TODO: Speed up blkiocg_lookup_group() by maintaining a radix
* tree of blkg (instead of traversing through hash list all
* the time.
*/
tg =