From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 3 Feb 2010 08:01:28 +0800 Subject: tree-wide: Assorted spelling fixes In particular, several occurances of funny versions of 'success', 'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address', 'beginning', 'desirable', 'separate' and 'necessary' are fixed. Signed-off-by: Daniel Mack Cc: Joe Perches Cc: Junio C Hamano Signed-off-by: Jiri Kosina --- block/bsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/bsg.c b/block/bsg.c index a9fd2d84b53..46597a6bd11 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -260,7 +260,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, return ERR_PTR(ret); /* - * map scatter-gather elements seperately and string them to request + * map scatter-gather elements separately and string them to request */ rq = blk_get_request(q, rw, GFP_KERNEL); if (!rq) -- cgit v1.2.3-18-g5258 From 52cf25d0ab7f78eeecc59ac652ed5090f69b619e Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Tue, 19 Jan 2010 02:58:23 +0100 Subject: Driver core: Constify struct sysfs_ops in struct kobj_type Constify struct sysfs_ops. This is part of the ops structure constification effort started by Arjan van de Ven et al. Benefits of this constification: * prevents modification of data that is shared (referenced) by many other structure instances at runtime * detects/prevents accidental (but not intentional) modification attempts on archs that enforce read-only kernel data at runtime * potentially better optimized code as the compiler can assume that the const data cannot be changed * the compiler/linker move const data into .rodata and therefore exclude them from false sharing Signed-off-by: Emese Revfy Acked-by: David Teigland Acked-by: Matt Domsch Acked-by: Maciej Sosnowski Acked-by: Hans J. Koch Acked-by: Pekka Enberg Acked-by: Jens Axboe Acked-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- block/blk-integrity.c | 2 +- block/blk-sysfs.c | 2 +- block/elevator.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 15c630813b1..96e83c2bdb9 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -278,7 +278,7 @@ static struct attribute *integrity_attrs[] = { NULL, }; -static struct sysfs_ops integrity_ops = { +static const struct sysfs_ops integrity_ops = { .show = &integrity_attr_show, .store = &integrity_attr_store, }; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e85442415db..2ae2cb3f362 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -450,7 +450,7 @@ static void blk_release_queue(struct kobject *kobj) kmem_cache_free(blk_requestq_cachep, q); } -static struct sysfs_ops queue_sysfs_ops = { +static const struct sysfs_ops queue_sysfs_ops = { .show = queue_attr_show, .store = queue_attr_store, }; diff --git a/block/elevator.c b/block/elevator.c index ee3a883840f..df75676f667 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -892,7 +892,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr, return error; } -static struct sysfs_ops elv_sysfs_ops = { +static const struct sysfs_ops elv_sysfs_ops = { .show = elv_attr_show, .store = elv_attr_store, }; -- cgit v1.2.3-18-g5258 From 67523c48aa74d5637848edeccf285af1c60bf14a Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 10 Mar 2010 15:22:11 -0800 Subject: cgroups: blkio subsystem as module Modify the Block I/O cgroup subsystem to be able to be built as a module. As the CFQ disk scheduler optionally depends on blk-cgroup, config options in block/Kconfig, block/Kconfig.iosched, and block/blk-cgroup.h are enhanced to support the new module dependency. Signed-off-by: Ben Blum Cc: Li Zefan Cc: Paul Menage Cc: "David S. Miller" Cc: KAMEZAWA Hiroyuki Cc: Lai Jiangshan Cc: Vivek Goyal Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/Kconfig | 2 +- block/Kconfig.iosched | 2 +- block/blk-cgroup.c | 53 +++++++++++++++++++++++++++++++++++++++------------ block/blk-cgroup.h | 10 ++++++++-- 4 files changed, 51 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/Kconfig b/block/Kconfig index e20fbde0875..62a5921321c 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -78,7 +78,7 @@ config BLK_DEV_INTEGRITY Protection. If in doubt, say N. config BLK_CGROUP - bool + tristate depends on CGROUPS default n ---help--- diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index b71abfb0d72..fc71cf071fb 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -23,6 +23,7 @@ config IOSCHED_DEADLINE config IOSCHED_CFQ tristate "CFQ I/O scheduler" + select BLK_CGROUP if CFQ_GROUP_IOSCHED default y ---help--- The CFQ I/O scheduler tries to distribute bandwidth equally @@ -35,7 +36,6 @@ config IOSCHED_CFQ config CFQ_GROUP_IOSCHED bool "CFQ Group Scheduling support" depends on IOSCHED_CFQ && CGROUPS - select BLK_CGROUP default n ---help--- Enable group IO scheduling in CFQ. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c85d74cae20..4b686ad08ea 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -23,6 +23,31 @@ static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkio_root_cgroup); +static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, + struct cgroup *); +static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, + struct task_struct *, bool); +static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, + struct cgroup *, struct task_struct *, bool); +static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); +static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); + +struct cgroup_subsys blkio_subsys = { + .name = "blkio", + .create = blkiocg_create, + .can_attach = blkiocg_can_attach, + .attach = blkiocg_attach, + .destroy = blkiocg_destroy, + .populate = blkiocg_populate, +#ifdef CONFIG_BLK_CGROUP + /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ + .subsys_id = blkio_subsys_id, +#endif + .use_id = 1, + .module = THIS_MODULE, +}; +EXPORT_SYMBOL_GPL(blkio_subsys); + struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), @@ -253,7 +278,8 @@ remove_entry: done: free_css_id(&blkio_subsys, &blkcg->css); rcu_read_unlock(); - kfree(blkcg); + if (blkcg != &blkio_root_cgroup) + kfree(blkcg); } static struct cgroup_subsys_state * @@ -319,17 +345,6 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, task_unlock(tsk); } -struct cgroup_subsys blkio_subsys = { - .name = "blkio", - .create = blkiocg_create, - .can_attach = blkiocg_can_attach, - .attach = blkiocg_attach, - .destroy = blkiocg_destroy, - .populate = blkiocg_populate, - .subsys_id = blkio_subsys_id, - .use_id = 1, -}; - void blkio_policy_register(struct blkio_policy_type *blkiop) { spin_lock(&blkio_list_lock); @@ -345,3 +360,17 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop) spin_unlock(&blkio_list_lock); } EXPORT_SYMBOL_GPL(blkio_policy_unregister); + +static int __init init_cgroup_blkio(void) +{ + return cgroup_load_subsys(&blkio_subsys); +} + +static void __exit exit_cgroup_blkio(void) +{ + cgroup_unload_subsys(&blkio_subsys); +} + +module_init(init_cgroup_blkio); +module_exit(exit_cgroup_blkio); +MODULE_LICENSE("GPL"); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 84bf745fa77..8ccc20464da 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -15,7 +15,13 @@ #include -#ifdef CONFIG_BLK_CGROUP +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) + +#ifndef CONFIG_BLK_CGROUP +/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */ +extern struct cgroup_subsys blkio_subsys; +#define blkio_subsys_id blkio_subsys.subsys_id +#endif struct blkio_cgroup { struct cgroup_subsys_state css; @@ -91,7 +97,7 @@ static inline void blkiocg_update_blkio_group_dequeue_stats( struct blkio_group *blkg, unsigned long dequeue) {} #endif -#ifdef CONFIG_BLK_CGROUP +#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) extern struct blkio_cgroup blkio_root_cgroup; extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, -- cgit v1.2.3-18-g5258 From 2cda2728aa1c8c006418a24f867b25e5eb7a32e2 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 15 Mar 2010 12:46:51 +0100 Subject: block: Fix overrun in lcm() and move it to lib lcm() was defined to take integer-sized arguments. The supplied arguments are multiplied, however, causing us to overflow given sufficiently large input. That in turn led to incorrect optimal I/O size reporting in some cases (RAID over RAID). Switch lcm() over to unsigned long similar to gcd() and move the function from blk-settings.c to lib. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'block') diff --git a/block/blk-settings.c b/block/blk-settings.c index 31e7a9375c1..4c4700dca56 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -8,6 +8,7 @@ #include #include /* for max_pfn/max_low_pfn */ #include +#include #include #include "blk.h" @@ -461,16 +462,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); -static unsigned int lcm(unsigned int a, unsigned int b) -{ - if (a && b) - return (a * b) / gcd(a, b); - else if (b) - return b; - - return a; -} - /** * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) -- cgit v1.2.3-18-g5258 From c77a5710b7e23847bfdb81fcaa10b585f65c960a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 10 Mar 2010 00:48:33 -0500 Subject: block: Export max number of segments and max segment size in sysfs These two values are useful when debugging issues surrounding maximum I/O size. Put them in sysfs with the rest of the queue limits. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'block') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e85442415db..fad86550255 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -106,6 +106,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_sectors_kb, (page)); } +static ssize_t queue_max_segments_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_max_segments(q), (page)); +} + +static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) +{ + if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + return queue_var_show(queue_max_segment_size(q), (page)); + + return queue_var_show(PAGE_CACHE_SIZE, (page)); +} + static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page) { return queue_var_show(queue_logical_block_size(q), page); @@ -280,6 +293,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = { .show = queue_max_hw_sectors_show, }; +static struct queue_sysfs_entry queue_max_segments_entry = { + .attr = {.name = "max_segments", .mode = S_IRUGO }, + .show = queue_max_segments_show, +}; + +static struct queue_sysfs_entry queue_max_segment_size_entry = { + .attr = {.name = "max_segment_size", .mode = S_IRUGO }, + .show = queue_max_segment_size_show, +}; + static struct queue_sysfs_entry queue_iosched_entry = { .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, .show = elv_iosched_show, @@ -355,6 +378,8 @@ static struct attribute *default_attrs[] = { &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, + &queue_max_segments_entry.attr, + &queue_max_segment_size_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, &queue_logical_block_size_entry.attr, -- cgit v1.2.3-18-g5258 From 910ac735bad53ce54741a72a5b19ab69794ae069 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 16 Mar 2010 08:57:15 +0100 Subject: block: make CONFIG_BLK_CGROUP visible Make the config visible, so we can choose from CONFIG_BLK_CGROUP=y and CONFIG_BLK_CGROUP=m when CONFIG_IOSCHED_CFQ=m. Signed-off-by: Li Zefan Signed-off-by: Jens Axboe --- block/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/Kconfig b/block/Kconfig index e20fbde0875..f9e89f4d94b 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY Protection. If in doubt, say N. config BLK_CGROUP - bool + tristate "Block cgroup support" depends on CGROUPS + depends on CFQ_GROUP_IOSCHED default n ---help--- Generic block IO controller cgroup interface. This is the common -- cgit v1.2.3-18-g5258 From e9ce335df51ff782035a15c261a3c0c9892a1767 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 19 Mar 2010 08:03:04 +0100 Subject: cfq-iosched: fix a kbuild regression Alex Shi reported a kbuild regression which is about 10% performance lost. He bisected to this commit: 3dde36ddea3e07dd025c4c1ba47edec91606fec0. The reason is cfqq_close() can't find close cooperator. Restoring cfq_rq_close()'s threshold to original value makes the regression go away. Since for_preempt parameter isn't used anymore, this patch deletes it. Reported-by: Alex Shi Signed-off-by: Shaohua Li Acked-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index dee9d9378fe..8d5a2f2f7fb 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -47,6 +47,7 @@ static const int cfq_hist_divisor = 4; #define CFQ_SERVICE_SHIFT 12 #define CFQQ_SEEK_THR (sector_t)(8 * 100) +#define CFQQ_CLOSE_THR (sector_t)(8 * 1024) #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) @@ -1660,9 +1661,9 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, } static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct request *rq, bool for_preempt) + struct request *rq) { - return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR; + return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR; } static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, @@ -1689,7 +1690,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, * will contain the closest sector. */ __cfqq = rb_entry(parent, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) return __cfqq; if (blk_rq_pos(__cfqq->next_rq) < sector) @@ -1700,7 +1701,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, return NULL; __cfqq = rb_entry(node, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) return __cfqq; return NULL; @@ -3103,7 +3104,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * if this request is as-good as one we would expect from the * current cfqq, let it preempt */ - if (cfq_rq_close(cfqd, cfqq, rq, true)) + if (cfq_rq_close(cfqd, cfqq, rq)) return true; return false; -- cgit v1.2.3-18-g5258 From b1ffe737f5b743115ee46ffb59e338e580c54902 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 25 Mar 2010 15:45:03 +0100 Subject: cfq-iosched: Add additional blktrace log messages in CFQ for easier debugging These have helped us debug some issues we've noticed in earlier IO controller versions and should be useful now as well. The extra logging covers: - idling behavior. Since there are so many conditions based on which we decide to idle or not, this patch adds a log message for some conditions that we've found useful. - workload slices and current prio and workload type Changelog from v1: o moved log message from cfq_set_active_queue() to __cfq_set_active_queue() o changed queue_count to st->count Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8d5a2f2f7fb..2f91c535194 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1518,7 +1518,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (cfqq) { - cfq_log_cfqq(cfqd, cfqq, "set_active"); + cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", + cfqd->serving_prio, cfqd->serving_type); cfqq->slice_start = 0; cfqq->dispatch_start = jiffies; cfqq->allocated_slice = 0; @@ -1788,7 +1789,11 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) * Otherwise, we do only if they are the last ones * in their service tree. */ - return service_tree->count == 1 && cfq_cfqq_sync(cfqq); + if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) + return 1; + cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", + service_tree->count); + return 0; } static void cfq_arm_slice_timer(struct cfq_data *cfqd) @@ -1833,8 +1838,11 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * time slice. */ if (sample_valid(cic->ttime_samples) && - (cfqq->slice_end - jiffies < cic->ttime_mean)) + (cfqq->slice_end - jiffies < cic->ttime_mean)) { + cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d", + cic->ttime_mean); return; + } cfq_mark_cfqq_wait_request(cfqq); @@ -2042,6 +2050,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) slice = max(slice, 2 * cfqd->cfq_slice_idle); slice = max_t(unsigned, slice, CFQ_MIN_TT); + cfq_log(cfqd, "workload slice:%d", slice); cfqd->workload_expires = jiffies + slice; cfqd->noidle_tree_requires_idle = false; } @@ -3308,6 +3317,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) if (cfq_should_wait_busy(cfqd, cfqq)) { cfqq->slice_end = jiffies + cfqd->cfq_slice_idle; cfq_mark_cfqq_wait_busy(cfqq); + cfq_log_cfqq(cfqd, cfqq, "will busy wait"); } /* -- cgit v1.2.3-18-g5258 From 39c01b219fd30c74869b6fc8749f7900f04e9ef6 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 25 Mar 2010 15:45:57 +0100 Subject: cfq-iosched: Do not merge queues of BE and IDLE classes Even if they are found to be co-operating. The prio_trees do not have any IDLE cfqqs on them. cfq_close_cooperator() is called from cfq_select_queue() and cfq_completed_request(). The latter ensures that the close cooperator code does not get invoked if the current cfqq is of class IDLE but the former doesn't seem to have any such checks. So an IDLE cfqq may get merged with a BE cfqq from the same group which should be avoided. Signed-off-by: Divyesh Shah Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 2f91c535194..2c7a0f4f3cd 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1723,6 +1723,8 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, { struct cfq_queue *cfqq; + if (cfq_class_idle(cur_cfqq)) + return NULL; if (!cfq_cfqq_sync(cur_cfqq)) return NULL; if (CFQQ_SEEKY(cur_cfqq)) -- cgit v1.2.3-18-g5258 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- block/blk-barrier.c | 1 + block/blk-cgroup.c | 1 + block/blk-integrity.c | 1 + block/blk-ioc.c | 1 + block/blk-settings.c | 1 + block/blk-sysfs.c | 1 + block/blk-tag.c | 1 + block/bsg.c | 1 + block/cfq-iosched.c | 1 + block/compat_ioctl.c | 1 + block/ioctl.c | 1 + block/noop-iosched.c | 1 + 12 files changed, 12 insertions(+) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 8618d8996fe..6d88544b677 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "blk.h" diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4b686ad08ea..5fe03def34b 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "blk-cgroup.h" static DEFINE_SPINLOCK(blkio_list_lock); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 96e83c2bdb9..edce1ef7933 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "blk.h" diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 3f65c8aadb2..d22c4c55c40 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -7,6 +7,7 @@ #include #include #include /* for max_pfn/max_low_pfn */ +#include #include "blk.h" diff --git a/block/blk-settings.c b/block/blk-settings.c index 31e7a9375c1..d9a9db5f0a2 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -9,6 +9,7 @@ #include /* for max_pfn/max_low_pfn */ #include #include +#include #include "blk.h" diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 2ae2cb3f362..c2b821fa324 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -2,6 +2,7 @@ * Functions related to sysfs handling */ #include +#include #include #include #include diff --git a/block/blk-tag.c b/block/blk-tag.c index 6b0f52c2096..ece65fc4c79 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "blk.h" diff --git a/block/bsg.c b/block/bsg.c index 46597a6bd11..82d58829ba5 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index dee9d9378fe..fc98a48554f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -7,6 +7,7 @@ * Copyright (C) 2003 Jens Axboe */ #include +#include #include #include #include diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 4eb8e9ea4af..f26051f4468 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/block/ioctl.c b/block/ioctl.c index be48ea51fae..8905d2a2a71 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 3a0d369d08c..232c4b38cd3 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -5,6 +5,7 @@ #include #include #include +#include #include struct noop_data { -- cgit v1.2.3-18-g5258 From a506aedc51093544ff0f9610af6066d18cb6abbe Mon Sep 17 00:00:00 2001 From: "wzt.wzt@gmail.com" Date: Fri, 2 Apr 2010 08:41:14 +0200 Subject: Block: Fix block/elevator.c elevator_get() off-by-one error elevator_get() not check the name length, if the name length > sizeof(elv), elv will miss the '\0'. And elv buffer will be replace "-iosched" as something like aaaaaaaaa, then call request_module() can load an not trust module. Signed-off-by: Zhitong Wang Signed-off-by: Jens Axboe --- block/elevator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index df75676f667..76e3702d538 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -154,7 +154,7 @@ static struct elevator_type *elevator_get(const char *name) spin_unlock(&elv_list_lock); - sprintf(elv, "%s-iosched", name); + snprintf(elv, sizeof(elv), "%s-iosched", name); request_module("%s", elv); spin_lock(&elv_list_lock); -- cgit v1.2.3-18-g5258 From a74b2adae06265b8cfa335d7d40d4a5abd11e977 Mon Sep 17 00:00:00 2001 From: Ricky Benitez Date: Mon, 5 Apr 2010 18:22:17 +0200 Subject: block: expose the statistics in blkio.time and blkio.sectors for the root cgroup Currently, the io statistics for the root cgroup are maintained, but they are not shown because the device information is not available at the point that the root blkio cgroup is created. This patch updates the device information when the statistics are updated so that the statistics become visible. Signed-off-by: Ricky Benitez Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 2c7a0f4f3cd..7104ac816fb 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -948,6 +948,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) unsigned int major, minor; cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); + if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); + cfqg->blkg.dev = MKDEV(major, minor); + goto done; + } if (cfqg || !create) goto done; -- cgit v1.2.3-18-g5258 From 3440c49f5c5ecb4f29b0544aa87da71888404f8f Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Fri, 9 Apr 2010 09:29:57 +0200 Subject: cfq-iosched: Fix the incorrect timeslice accounting with forced_dispatch When CFQ dispatches requests forcefully due to a barrier or changing iosched, it runs through all cfqq's dispatching requests and then expires each queue. However, it does not activate a cfqq before flushing its IOs resulting in using stale values for computing slice_used. This patch fixes it by calling activate queue before flushing reuqests from each queue. This is useful mostly for barrier requests because when the iosched is changing it really doesnt matter if we have incorrect accounting since we're going to break down all structures anyway. We also now expire the current timeslice before moving on with the dispatch to accurately account slice used for that cfqq. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7104ac816fb..b773000f8a0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2205,10 +2205,13 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) struct cfq_queue *cfqq; int dispatched = 0; - while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) + /* Expire the timeslice of the current active queue first */ + cfq_slice_expired(cfqd, 0); + while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { + __cfq_set_active_queue(cfqd, cfqq); dispatched += __cfq_forced_dispatch_cfqq(cfqq); + } - cfq_slice_expired(cfqd, 0); BUG_ON(cfqd->busy_queues); cfq_log(cfqd, "forced_dispatch=%d", dispatched); -- cgit v1.2.3-18-g5258 From a534dbe96e9929c7245924d8252d89048c23d569 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Wed, 14 Apr 2010 20:54:03 +0200 Subject: block: ensure jiffies wrap is handled correctly in blk_rq_timed_out_timer blk_rq_timed_out_timer() relied on blk_add_timer() never returning a timer value of zero, but commit 7838c15b8dd18e78a523513749e5b54bda07b0cb removed the code that bumped this value when it was zero. Therefore when jiffies is near wrap we could get unlucky & not set the timeout value correctly. This patch uses a flag to indicate that the timeout value was set and so handles jiffies wrap correctly, and it keeps all the logic in one function so should be easier to maintain in the future. Signed-off-by: Richard Kennedy Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-timeout.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 1ba7e0aca87..4f0c06c7a33 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -109,6 +109,7 @@ void blk_rq_timed_out_timer(unsigned long data) struct request_queue *q = (struct request_queue *) data; unsigned long flags, next = 0; struct request *rq, *tmp; + int next_set = 0; spin_lock_irqsave(q->queue_lock, flags); @@ -122,16 +123,13 @@ void blk_rq_timed_out_timer(unsigned long data) if (blk_mark_rq_complete(rq)) continue; blk_rq_timed_out(rq); - } else if (!next || time_after(next, rq->deadline)) + } else if (!next_set || time_after(next, rq->deadline)) { next = rq->deadline; + next_set = 1; + } } - /* - * next can never be 0 here with the list non-empty, since we always - * bump ->deadline to 1 so we can detect if the timer was ever added - * or not. See comment in blk_add_timer() - */ - if (next) + if (next_set) mod_timer(&q->timeout, round_jiffies_up(next)); spin_unlock_irqrestore(q->queue_lock, flags); -- cgit v1.2.3-18-g5258 From dcf097b247affd8b88ad410a92298590c5600f44 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 22 Apr 2010 11:54:52 -0400 Subject: blk-cgroup: Fix RCU correctness warning in cfq_init_queue() It is necessary to be in an RCU read-side critical section when invoking css_id(), so this patch adds one to blkiocg_add_blkio_group(). This is actually a false positive, because this is called at initialization time and hence always refers to the root cgroup, which cannot go away. [ 103.790505] =================================================== [ 103.790509] [ INFO: suspicious rcu_dereference_check() usage. ] [ 103.790511] --------------------------------------------------- [ 103.790514] kernel/cgroup.c:4432 invoked rcu_dereference_check() without protection! [ 103.790517] [ 103.790517] other info that might help us debug this: [ 103.790519] [ 103.790521] [ 103.790521] rcu_scheduler_active = 1, debug_locks = 1 [ 103.790524] 4 locks held by bash/4422: [ 103.790526] #0: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x3c/0x144 [ 103.790537] #1: (s_active#102){.+.+.+}, at: [] sysfs_write_file+0xe7/0x144 [ 103.790544] #2: (&q->sysfs_lock){+.+.+.}, at: [] queue_attr_store+0x49/0x8f [ 103.790552] #3: (&(&blkcg->lock)->rlock){......}, at: [] blkiocg_add_blkio_group+0x2b/0xad [ 103.790560] [ 103.790561] stack backtrace: [ 103.790564] Pid: 4422, comm: bash Not tainted 2.6.34-rc4-blkio-second-crash #81 [ 103.790567] Call Trace: [ 103.790572] [] lockdep_rcu_dereference+0x9d/0xa5 [ 103.790577] [] css_id+0x44/0x57 [ 103.790581] [] blkiocg_add_blkio_group+0x53/0xad [ 103.790586] [] cfq_init_queue+0x139/0x32c [ 103.790591] [] elv_iosched_store+0xbf/0x1bf [ 103.790595] [] queue_attr_store+0x70/0x8f [ 103.790599] [] ? sysfs_write_file+0xe7/0x144 [ 103.790603] [] sysfs_write_file+0x108/0x144 [ 103.790609] [] vfs_write+0xae/0x10b [ 103.790612] [] ? trace_hardirqs_on_caller+0x10c/0x130 [ 103.790616] [] sys_write+0x4a/0x6e [ 103.790622] [] system_call_fastpath+0x16/0x1b [ 103.790625] Located-by: Miles Lane Signed-off-by: Vivek Goyal Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 838834be115..5f127cfb2e9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3694,8 +3694,10 @@ static void *cfq_init_queue(struct request_queue *q) * to make sure that cfq_put_cfqg() does not try to kfree root group */ atomic_set(&cfqg->ref, 1); + rcu_read_lock(); blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd, 0); + rcu_read_unlock(); #endif /* * Not strictly needed (since RB_ROOT just clears the node and we -- cgit v1.2.3-18-g5258 From 0341509fdfc9519f7de6aabc5dd23217cef72b73 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 May 2010 08:57:00 +0200 Subject: blk-cgroup: Fix an RCU warning in blkiocg_create() with CONFIG_PROVE_RCU=y, a warning can be triggered: # mount -t cgroup -o blkio xxx /mnt # mkdir /mnt/subgroup ... kernel/cgroup.c:4442 invoked rcu_dereference_check() without protection! ... To fix this, we avoid caling css_depth() here, which is a bit simpler than the original code. Signed-off-by: Li Zefan Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5fe03def34b..2cc682b860e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -286,16 +286,16 @@ done: static struct cgroup_subsys_state * blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) { - struct blkio_cgroup *blkcg, *parent_blkcg; + struct blkio_cgroup *blkcg; + struct cgroup *parent = cgroup->parent; - if (!cgroup->parent) { + if (!parent) { blkcg = &blkio_root_cgroup; goto done; } /* Currently we do not support hierarchy deeper than two level (0,1) */ - parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent); - if (css_depth(&parent_blkcg->css) > 0) + if (parent != cgroup->top_cgroup) return ERR_PTR(-EINVAL); blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); -- cgit v1.2.3-18-g5258