From c2f6805d470af369a7337801deeecea800dbfe1c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 15 Nov 2010 19:32:42 +0100 Subject: blk-throttle: Fix calculation of max number of WRITES to be dispatched o Currently we try to dispatch more READS and less WRITES (75%, 25%) in one dispatch round. ummy pointed out that there is a bug in max_nr_writes calculation. This patch fixes it. Reported-by: ummy y Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 56ad4531b41..004be80fd89 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, { unsigned int nr_reads = 0, nr_writes = 0; unsigned int max_nr_reads = throtl_grp_quantum*3/4; - unsigned int max_nr_writes = throtl_grp_quantum - nr_reads; + unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; struct bio *bio; /* Try to dispatch 75% READS and 25% WRITES */ -- cgit v1.2.3-18-g5258 From d1ae8ffdfaa16b2ab2e9346e81cf0ab6eaaae347 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 1 Dec 2010 19:34:46 +0100 Subject: blk-throttle: Trim/adjust slice_end once a bio has been dispatched o During some testing I did following and noticed throttling stops working. - Put a very low limit on a cgroup, say 1 byte per second. - Start some reads, this will set slice_end to a very high value. - Change the limit to higher value say 1MB/s - Now IO unthrottles and finishes as expected. - Try to do the read again but IO is not limited to 1MB/s as expected. o What is happening. - Initially low value of limit sets slice_end to a very high value. - During updation of limit, slice_end is not being truncated. - Very high value of slice_end leads to keeping the existing slice valid for a very long time and new slice does not start. - tg_may_dispatch() is called in blk_throtle_bio(), and trim_slice() is not called in this path. So slice_start is some old value and practically we are able to do huge amount of IO. o There are many ways it can be fixed. I have fixed it by trying to adjust/cleanup slice_end in trim_slice(). Generally we extend slices if bio is big and can't be dispatched in one slice. After dispatch of bio, readjust the slice_end to make sure we don't end up with huge values. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 004be80fd89..2d134b7c40a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -355,6 +355,12 @@ throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) tg->slice_end[rw], jiffies); } +static inline void throtl_set_slice_end(struct throtl_data *td, + struct throtl_grp *tg, bool rw, unsigned long jiffy_end) +{ + tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); +} + static inline void throtl_extend_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw, unsigned long jiffy_end) { @@ -391,6 +397,16 @@ throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) if (throtl_slice_used(td, tg, rw)) return; + /* + * A bio has been dispatched. Also adjust slice_end. It might happen + * that initially cgroup limit was very low resulting in high + * slice_end, but later limit was bumped up and bio was dispached + * sooner, then we need to reduce slice_end. A high bogus slice_end + * is bad because it does not allow new slice to start. + */ + + throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); + time_elapsed = jiffies - tg->slice_start[rw]; nr_slices = time_elapsed / throtl_slice; -- cgit v1.2.3-18-g5258 From 04a6b516cdc6efc2500b52a540cf65be8c5aaf9e Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 1 Dec 2010 19:34:52 +0100 Subject: blk-throttle: Correct the placement of smp_rmb() o I was discussing what are the variable being updated without spin lock and why do we need barriers and Oleg pointed out that location of smp_rmb() should be between read of td->limits_changed and tg->limits_changed. This patch fixes it. o Following is one possible sequence of events. Say cpu0 is executing throtl_update_blkio_group_read_bps() and cpu1 is executing throtl_process_limit_change(). cpu0 cpu1 tg->limits_changed = true; smp_mb__before_atomic_inc(); atomic_inc(&td->limits_changed); if (!atomic_read(&td->limits_changed)) return; if (tg->limits_changed) do_something; If cpu0 has updated tg->limits_changed and td->limits_changed, we want to make sure that if update to td->limits_changed is visible on cpu1, then update to tg->limits_changed should also be visible. Oleg pointed out to ensure that we need to insert an smp_rmb() between td->limits_changed read and tg->limits_changed read. o I had erroneously put smp_rmb() before atomic_read(&td->limits_changed). This patch fixes it. Reported-by: Oleg Nesterov Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 2d134b7c40a..381b09bb562 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -725,26 +725,21 @@ static void throtl_process_limit_change(struct throtl_data *td) struct throtl_grp *tg; struct hlist_node *pos, *n; - /* - * Make sure atomic_inc() effects from - * throtl_update_blkio_group_read_bps(), group of functions are - * visible. - * Is this required or smp_mb__after_atomic_inc() was suffcient - * after the atomic_inc(). - */ - smp_rmb(); if (!atomic_read(&td->limits_changed)) return; throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); - hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { - /* - * Do I need an smp_rmb() here to make sure tg->limits_changed - * update is visible. I am relying on smp_rmb() at the - * beginning of function and not putting a new one here. - */ + /* + * Make sure updates from throtl_update_blkio_group_read_bps() group + * of functions to tg->limits_changed are visible. We do not + * want update td->limits_changed to be visible but update to + * tg->limits_changed not being visible yet on this cpu. Hence + * the read barrier. + */ + smp_rmb(); + hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { if (throtl_tg_on_rr(tg) && tg->limits_changed) { throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" " riops=%u wiops=%u", tg->bps[READ], -- cgit v1.2.3-18-g5258