diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 15 | ||||
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/blk-core.c | 33 | ||||
-rw-r--r-- | block/blk-integrity.c | 1 | ||||
-rw-r--r-- | block/blk-merge.c | 6 | ||||
-rw-r--r-- | block/blk-settings.c | 88 | ||||
-rw-r--r-- | block/blk-sysfs.c | 13 | ||||
-rw-r--r-- | block/bsg.c | 5 | ||||
-rw-r--r-- | block/cfq-iosched.c | 178 | ||||
-rw-r--r-- | block/cmd-filter.c | 233 | ||||
-rw-r--r-- | block/elevator.c | 13 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 44 |
12 files changed, 238 insertions, 393 deletions
diff --git a/block/Kconfig b/block/Kconfig index 2c39527aa7d..9be0b56eaee 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -23,8 +23,8 @@ menuconfig BLOCK if BLOCK -config LBD - bool "Support for large block devices and files" +config LBDAF + bool "Support for large (2TB+) block devices and files" depends on !64BIT default y help @@ -48,9 +48,9 @@ config LBD If unsure, say Y. config BLK_DEV_BSG - bool "Block layer SG support v4 (EXPERIMENTAL)" - depends on EXPERIMENTAL - ---help--- + bool "Block layer SG support v4" + default y + help Saying Y here will enable generic SG (SCSI generic) v4 support for any block device. @@ -60,7 +60,10 @@ config BLK_DEV_BSG protocols (e.g. Task Management Functions and SMP in Serial Attached SCSI). - If unsure, say N. + This option is required by recent UDEV versions to properly + access device serial numbers, etc. + + If unsure, say Y. config BLK_DEV_INTEGRITY bool "Block layer data integrity support" diff --git a/block/Makefile b/block/Makefile index e9fa4dd690f..6c54ed0ff75 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - ioctl.o genhd.o scsi_ioctl.o cmd-filter.o + ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-core.c b/block/blk-core.c index b06cf5c2a82..e3299a77a0d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -575,13 +575,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return NULL; } - /* - * if caller didn't supply a lock, they get per-queue locking with - * our embedded lock - */ - if (!lock) - lock = &q->__queue_lock; - q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; @@ -595,8 +588,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->sg_reserved_size = INT_MAX; - blk_set_cmd_filter_defaults(&q->cmd_filter); - /* * all done */ @@ -1172,6 +1163,11 @@ static int __make_request(struct request_queue *q, struct bio *bio) const int unplug = bio_unplug(bio); int rw_flags; + if (bio_barrier(bio) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + bio_endio(bio, -EOPNOTSUPP); + return 0; + } /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1472,11 +1468,6 @@ static inline void __generic_make_request(struct bio *bio) err = -EOPNOTSUPP; goto end_io; } - if (bio_barrier(bio) && bio_has_data(bio) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { - err = -EOPNOTSUPP; - goto end_io; - } ret = q->make_request_fn(q, bio); } while (ret); @@ -2145,7 +2136,7 @@ bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { return blk_end_bidi_request(rq, error, nr_bytes, 0); } -EXPORT_SYMBOL_GPL(blk_end_request); +EXPORT_SYMBOL(blk_end_request); /** * blk_end_request_all - Helper function for drives to finish the request. @@ -2166,7 +2157,7 @@ void blk_end_request_all(struct request *rq, int error) pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } -EXPORT_SYMBOL_GPL(blk_end_request_all); +EXPORT_SYMBOL(blk_end_request_all); /** * blk_end_request_cur - Helper function to finish the current request chunk. @@ -2184,7 +2175,7 @@ bool blk_end_request_cur(struct request *rq, int error) { return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } -EXPORT_SYMBOL_GPL(blk_end_request_cur); +EXPORT_SYMBOL(blk_end_request_cur); /** * __blk_end_request - Helper function for drivers to complete the request. @@ -2203,7 +2194,7 @@ bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { return __blk_end_bidi_request(rq, error, nr_bytes, 0); } -EXPORT_SYMBOL_GPL(__blk_end_request); +EXPORT_SYMBOL(__blk_end_request); /** * __blk_end_request_all - Helper function for drives to finish the request. @@ -2224,7 +2215,7 @@ void __blk_end_request_all(struct request *rq, int error) pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } -EXPORT_SYMBOL_GPL(__blk_end_request_all); +EXPORT_SYMBOL(__blk_end_request_all); /** * __blk_end_request_cur - Helper function to finish the current request chunk. @@ -2243,7 +2234,7 @@ bool __blk_end_request_cur(struct request *rq, int error) { return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } -EXPORT_SYMBOL_GPL(__blk_end_request_cur); +EXPORT_SYMBOL(__blk_end_request_cur); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) @@ -2365,7 +2356,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, __bio_clone(bio, bio_src); if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask)) + bio_integrity_clone(bio, bio_src, gfp_mask, bs)) goto free_and_out; if (bio_ctr && bio_ctr(bio, bio_src, data)) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 73e28d35568..15c630813b1 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -379,6 +379,7 @@ void blk_integrity_unregister(struct gendisk *disk) kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_del(&bi->kobj); + kobject_put(&bi->kobj); kmem_cache_free(integrity_cachep, bi); disk->integrity = NULL; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 39ce64432ba..e1999679a4d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -350,6 +350,12 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (blk_integrity_rq(req) != blk_integrity_rq(next)) return 0; + /* don't merge requests of different failfast settings */ + if (blk_failfast_dev(req) != blk_failfast_dev(next) || + blk_failfast_transport(req) != blk_failfast_transport(next) || + blk_failfast_driver(req) != blk_failfast_driver(next)) + return 0; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn diff --git a/block/blk-settings.c b/block/blk-settings.c index 7541ea4bf9f..476d8706507 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -7,6 +7,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ +#include <linux/gcd.h> #include "blk.h" @@ -97,7 +98,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); /** * blk_set_default_limits - reset limits to default values - * @limits: the queue_limits structure to reset + * @lim: the queue_limits structure to reset * * Description: * Returns a queue_limit struct to its default state. Can be used by @@ -112,7 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_segment_size = MAX_SEGMENT_SIZE; lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; - lim->bounce_pfn = BLK_BOUNCE_ANY; + lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; @@ -165,6 +166,13 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) blk_set_default_limits(&q->limits); /* + * If the caller didn't supply a lock, fall back to our embedded + * per-queue locks + */ + if (!q->queue_lock) + q->queue_lock = &q->__queue_lock; + + /* * by default assume old behaviour and bounce for any highmem page */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); @@ -377,8 +385,8 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) EXPORT_SYMBOL(blk_queue_alignment_offset); /** - * blk_queue_io_min - set minimum request size for the queue - * @q: the request queue for the device + * blk_limits_io_min - set minimum request size for a device + * @limits: the queue limits * @min: smallest I/O size in bytes * * Description: @@ -387,15 +395,35 @@ EXPORT_SYMBOL(blk_queue_alignment_offset); * smallest I/O the device can perform without incurring a performance * penalty. */ -void blk_queue_io_min(struct request_queue *q, unsigned int min) +void blk_limits_io_min(struct queue_limits *limits, unsigned int min) { - q->limits.io_min = min; + limits->io_min = min; - if (q->limits.io_min < q->limits.logical_block_size) - q->limits.io_min = q->limits.logical_block_size; + if (limits->io_min < limits->logical_block_size) + limits->io_min = limits->logical_block_size; - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; + if (limits->io_min < limits->physical_block_size) + limits->io_min = limits->physical_block_size; +} +EXPORT_SYMBOL(blk_limits_io_min); + +/** + * blk_queue_io_min - set minimum request size for the queue + * @q: the request queue for the device + * @min: smallest I/O size in bytes + * + * Description: + * Storage devices may report a granularity or preferred minimum I/O + * size which is the smallest request the device can perform without + * incurring a performance penalty. For disk drives this is often the + * physical block size. For RAID arrays it is often the stripe chunk + * size. A properly aligned multiple of minimum_io_size is the + * preferred request size for workloads where a high number of I/O + * operations is desired. + */ +void blk_queue_io_min(struct request_queue *q, unsigned int min) +{ + blk_limits_io_min(&q->limits, min); } EXPORT_SYMBOL(blk_queue_io_min); @@ -405,8 +433,12 @@ EXPORT_SYMBOL(blk_queue_io_min); * @opt: optimal request size in bytes * * Description: - * Drivers can call this function to set the preferred I/O request - * size for devices that report such a value. + * Storage devices may report an optimal I/O size, which is the + * device's preferred unit for sustained I/O. This is rarely reported + * for disk drives. For RAID arrays it is usually the stripe width or + * the internal track size. A properly aligned multiple of + * optimal_io_size is the preferred request size for workloads where + * sustained throughput is desired. */ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) { @@ -426,27 +458,7 @@ EXPORT_SYMBOL(blk_queue_io_opt); **/ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { - /* zero is "infinity" */ - t->limits.max_sectors = min_not_zero(queue_max_sectors(t), - queue_max_sectors(b)); - - t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t), - queue_max_hw_sectors(b)); - - t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t), - queue_segment_boundary(b)); - - t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t), - queue_max_phys_segments(b)); - - t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t), - queue_max_hw_segments(b)); - - t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t), - queue_max_segment_size(b)); - - t->limits.logical_block_size = max(queue_logical_block_size(t), - queue_logical_block_size(b)); + blk_stack_limits(&t->limits, &b->limits, 0); if (!t->queue_lock) WARN_ON_ONCE(1); @@ -516,6 +528,16 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, return -1; } + /* Find lcm() of optimal I/O size */ + if (t->io_opt && b->io_opt) + t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt); + else if (b->io_opt) + t->io_opt = b->io_opt; + + /* Verify that optimal I/O size is a multiple of io_min */ + if (t->io_min && t->io_opt % t->io_min) + return -1; + return 0; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b1cd04087d6..d3aa2aadb3e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -16,9 +16,9 @@ struct queue_sysfs_entry { }; static ssize_t -queue_var_show(unsigned int var, char *page) +queue_var_show(unsigned long var, char *page) { - return sprintf(page, "%d\n", var); + return sprintf(page, "%lu\n", var); } static ssize_t @@ -77,7 +77,8 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_ra_show(struct request_queue *q, char *page) { - int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); + unsigned long ra_kb = q->backing_dev_info.ra_pages << + (PAGE_CACHE_SHIFT - 10); return queue_var_show(ra_kb, (page)); } @@ -132,7 +133,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) return -EINVAL; spin_lock_irq(q->queue_lock); - blk_queue_max_sectors(q, max_sectors_kb << 1); + q->limits.max_sectors = max_sectors_kb << 1; spin_unlock_irq(q->queue_lock); return ret; @@ -189,9 +190,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) { - unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); + bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); - return queue_var_show(set != 0, page); + return queue_var_show(set, page); } static ssize_t diff --git a/block/bsg.c b/block/bsg.c index 54106f052f7..5f184bb3ff9 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, return -EFAULT; if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { - if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm)) + if (blk_verify_command(rq->cmd, has_write_perm)) return -EPERM; } else if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -315,7 +315,6 @@ out: blk_put_request(rq); if (next_rq) { blk_rq_unmap_user(next_rq->bio); - next_rq->bio = NULL; blk_put_request(next_rq); } return ERR_PTR(ret); @@ -449,7 +448,6 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, hdr->dout_resid = rq->resid_len; hdr->din_resid = rq->next_rq->resid_len; blk_rq_unmap_user(bidi_bio); - rq->next_rq->bio = NULL; blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) hdr->din_resid = rq->resid_len; @@ -468,7 +466,6 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, blk_rq_unmap_user(bio); if (rq->cmd != rq->__cmd) kfree(rq->cmd); - rq->bio = NULL; blk_put_request(rq); return ret; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 833ec18eaa6..fd7080ed793 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -71,6 +71,51 @@ struct cfq_rb_root { #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } /* + * Per process-grouping structure + */ +struct cfq_queue { + /* reference count */ + atomic_t ref; + /* various state flags, see below */ + unsigned int flags; + /* parent cfq_data */ + struct cfq_data *cfqd; + /* service_tree member */ + struct rb_node rb_node; + /* service_tree key */ + unsigned long rb_key; + /* prio tree member */ + struct rb_node p_node; + /* prio tree root we belong to, if any */ + struct rb_root *p_root; + /* sorted list of pending requests */ + struct rb_root sort_list; + /* if fifo isn't expired, next request to serve */ + struct request *next_rq; + /* requests queued in sort_list */ + int queued[2]; + /* currently allocated requests */ + int allocated[2]; + /* fifo list of requests in sort_list */ + struct list_head fifo; + + unsigned long slice_end; + long slice_resid; + unsigned int slice_dispatch; + + /* pending metadata requests */ + int meta_pending; + /* number of requests that are on the dispatch list or inside driver */ + int dispatched; + + /* io prio of this group */ + unsigned short ioprio, org_ioprio; + unsigned short ioprio_class, org_ioprio_class; + + pid_t pid; +}; + +/* * Per block device queue structure */ struct cfq_data { @@ -135,51 +180,11 @@ struct cfq_data { unsigned int cfq_slice_idle; struct list_head cic_list; -}; -/* - * Per process-grouping structure - */ -struct cfq_queue { - /* reference count */ - atomic_t ref; - /* various state flags, see below */ - unsigned int flags; - /* parent cfq_data */ - struct cfq_data *cfqd; - /* service_tree member */ - struct rb_node rb_node; - /* service_tree key */ - unsigned long rb_key; - /* prio tree member */ - struct rb_node p_node; - /* prio tree root we belong to, if any */ - struct rb_root *p_root; - /* sorted list of pending requests */ - struct rb_root sort_list; - /* if fifo isn't expired, next request to serve */ - struct request *next_rq; - /* requests queued in sort_list */ - int queued[2]; - /* currently allocated requests */ - int allocated[2]; - /* fifo list of requests in sort_list */ - struct list_head fifo; - - unsigned long slice_end; - long slice_resid; - unsigned int slice_dispatch; - - /* pending metadata requests */ - int meta_pending; - /* number of requests that are on the dispatch list or inside driver */ - int dispatched; - - /* io prio of this group */ - unsigned short ioprio, org_ioprio; - unsigned short ioprio_class, org_ioprio_class; - - pid_t pid; + /* + * Fallback dummy cfqq for extreme OOM conditions + */ + struct cfq_queue oom_cfqq; }; enum cfqq_state_flags { @@ -1641,6 +1646,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc) ioc->ioprio_changed = 0; } +static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&cfqq->rb_node); + RB_CLEAR_NODE(&cfqq->p_node); + INIT_LIST_HEAD(&cfqq->fifo); + + atomic_set(&cfqq->ref, 0); + cfqq->cfqd = cfqd; + + cfq_mark_cfqq_prio_changed(cfqq); + + if (is_sync) { + if (!cfq_class_idle(cfqq)) + cfq_mark_cfqq_idle_window(cfqq); + cfq_mark_cfqq_sync(cfqq); + } + cfqq->pid = pid; +} + static struct cfq_queue * cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, gfp_t gfp_mask) @@ -1653,56 +1678,40 @@ retry: /* cic always exists here */ cfqq = cic_to_cfqq(cic, is_sync); - if (!cfqq) { + /* + * Always try a new alloc if we fell back to the OOM cfqq + * originally, since it should just be a temporary situation. + */ + if (!cfqq || cfqq == &cfqd->oom_cfqq) { + cfqq = NULL; if (new_cfqq) { cfqq = new_cfqq; new_cfqq = NULL; } else if (gfp_mask & __GFP_WAIT) { - /* - * Inform the allocator of the fact that we will - * just repeat this allocation if it fails, to allow - * the allocator to do whatever it needs to attempt to - * free memory. - */ spin_unlock_irq(cfqd->queue->queue_lock); new_cfqq = kmem_cache_alloc_node(cfq_pool, - gfp_mask | __GFP_NOFAIL | __GFP_ZERO, + gfp_mask | __GFP_ZERO, cfqd->queue->node); spin_lock_irq(cfqd->queue->queue_lock); - goto retry; + if (new_cfqq) + goto retry; } else { cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask | __GFP_ZERO, cfqd->queue->node); - if (!cfqq) - goto out; } - RB_CLEAR_NODE(&cfqq->rb_node); - RB_CLEAR_NODE(&cfqq->p_node); - INIT_LIST_HEAD(&cfqq->fifo); - - atomic_set(&cfqq->ref, 0); - cfqq->cfqd = cfqd; - - cfq_mark_cfqq_prio_changed(cfqq); - - cfq_init_prio_data(cfqq, ioc); - - if (is_sync) { - if (!cfq_class_idle(cfqq)) - cfq_mark_cfqq_idle_window(cfqq); - cfq_mark_cfqq_sync(cfqq); - } - cfqq->pid = current->pid; - cfq_log_cfqq(cfqd, cfqq, "alloced"); + if (cfqq) { + cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); + cfq_init_prio_data(cfqq, ioc); + cfq_log_cfqq(cfqd, cfqq, "alloced"); + } else + cfqq = &cfqd->oom_cfqq; } if (new_cfqq) kmem_cache_free(cfq_pool, new_cfqq); -out: - WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); return cfqq; } @@ -1735,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, cfqq = *async_cfqq; } - if (!cfqq) { + if (!cfqq) cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); - if (!cfqq) - return NULL; - } /* * pin the queue now that it's allocated, scheduler exit will prune it @@ -2305,12 +2311,8 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) goto queue_fail; cfqq = cic_to_cfqq(cic, is_sync); - if (!cfqq) { + if (!cfqq || cfqq == &cfqd->oom_cfqq) { cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); - - if (!cfqq) - goto queue_fail; - cic_set_cfqq(cic, cfqq, is_sync); } @@ -2465,6 +2467,14 @@ static void *cfq_init_queue(struct request_queue *q) for (i = 0; i < CFQ_PRIO_LISTS; i++) cfqd->prio_trees[i] = RB_ROOT; + /* + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); + atomic_inc(&cfqd->oom_cfqq.ref); + INIT_LIST_HEAD(&cfqd->cic_list); cfqd->queue = q; diff --git a/block/cmd-filter.c b/block/cmd-filter.c deleted file mode 100644 index 572bbc2f900..00000000000 --- a/block/cmd-filter.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright 2004 Peter M. Jones <pjones@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - * - */ - -#include <linux/list.h> -#include <linux/genhd.h> -#include <linux/spinlock.h> -#include <linux/capability.h> -#include <linux/bitops.h> -#include <linux/blkdev.h> - -#include <scsi/scsi.h> -#include <linux/cdrom.h> - -int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, fmode_t has_write_perm) -{ - /* root can do any command. */ - if (capable(CAP_SYS_RAWIO)) - return 0; - - /* if there's no filter set, assume we're filtering everything out */ - if (!filter) - return -EPERM; - - /* Anybody who can open the device can do a read-safe command */ - if (test_bit(cmd[0], filter->read_ok)) - return 0; - - /* Write-safe commands require a writable open */ - if (test_bit(cmd[0], filter->write_ok) && has_write_perm) - return 0; - - return -EPERM; -} -EXPORT_SYMBOL(blk_verify_command); - -#if 0 -/* and now, the sysfs stuff */ -static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page, - int rw) -{ - char *npage = page; - unsigned long *okbits; - int i; - - if (rw == READ) - okbits = filter->read_ok; - else - okbits = filter->write_ok; - - for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) { - if (test_bit(i, okbits)) { - npage += sprintf(npage, "0x%02x", i); - if (i < BLK_SCSI_MAX_CMDS - 1) - sprintf(npage++, " "); - } - } - - if (npage != page) - npage += sprintf(npage, "\n"); - - return npage - page; -} - -static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page) -{ - return rcf_cmds_show(filter, page, READ); -} - -static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter, - char *page) -{ - return rcf_cmds_show(filter, page, WRITE); -} - -static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter, - const char *page, size_t count, int rw) -{ - unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits; - int cmd, set; - char *p, *status; - - if (rw == READ) { - memcpy(&okbits, filter->read_ok, sizeof(okbits)); - target_okbits = filter->read_ok; - } else { - memcpy(&okbits, filter->write_ok, sizeof(okbits)); - target_okbits = filter->write_ok; - } - - while ((p = strsep((char **)&page, " ")) != NULL) { - set = 1; - - if (p[0] == '+') { - p++; - } else if (p[0] == '-') { - set = 0; - p++; - } - - cmd = simple_strtol(p, &status, 16); - - /* either of these cases means invalid input, so do nothing. */ - if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS) - return -EINVAL; - - if (set) - __set_bit(cmd, okbits); - else - __clear_bit(cmd, okbits); - } - - memcpy(target_okbits, okbits, sizeof(okbits)); - return count; -} - -static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter, - const char *page, size_t count) -{ - return rcf_cmds_store(filter, page, count, READ); -} - -static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter, - const char *page, size_t count) -{ - return rcf_cmds_store(filter, page, count, WRITE); -} - -struct rcf_sysfs_entry { - struct attribute attr; - ssize_t (*show)(struct blk_cmd_filter *, char *); - ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t); -}; - -static struct rcf_sysfs_entry rcf_readcmds_entry = { - .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR }, - .show = rcf_readcmds_show, - .store = rcf_readcmds_store, -}; - -static struct rcf_sysfs_entry rcf_writecmds_entry = { - .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR }, - .show = rcf_writecmds_show, - .store = rcf_writecmds_store, -}; - -static struct attribute *default_attrs[] = { - &rcf_readcmds_entry.attr, - &rcf_writecmds_entry.attr, - NULL, -}; - -#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr) - -static ssize_t -rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - struct rcf_sysfs_entry *entry = to_rcf(attr); - struct blk_cmd_filter *filter; - - filter = container_of(kobj, struct blk_cmd_filter, kobj); - if (entry->show) - return entry->show(filter, page); - - return 0; -} - -static ssize_t -rcf_attr_store(struct kobject *kobj, struct attribute *attr, - const char *page, size_t length) -{ - struct rcf_sysfs_entry *entry = to_rcf(attr); - struct blk_cmd_filter *filter; - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (!entry->store) - return -EINVAL; - - filter = container_of(kobj, struct blk_cmd_filter, kobj); - return entry->store(filter, page, length); -} - -static struct sysfs_ops rcf_sysfs_ops = { - .show = rcf_attr_show, - .store = rcf_attr_store, -}; - -static struct kobj_type rcf_ktype = { - .sysfs_ops = &rcf_sysfs_ops, - .default_attrs = default_attrs, -}; - -int blk_register_filter(struct gendisk *disk) -{ - int ret; - struct blk_cmd_filter *filter = &disk->queue->cmd_filter; - - ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, - &disk_to_dev(disk)->kobj, - "%s", "cmd_filter"); - if (ret < 0) - return ret; - - return 0; -} -EXPORT_SYMBOL(blk_register_filter); - -void blk_unregister_filter(struct gendisk *disk) -{ - struct blk_cmd_filter *filter = &disk->queue->cmd_filter; - - kobject_put(&filter->kobj); -} -EXPORT_SYMBOL(blk_unregister_filter); -#endif diff --git a/block/elevator.c b/block/elevator.c index ca861927ba4..2d511f9105e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -100,6 +100,19 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_integrity(bio) != blk_integrity_rq(rq)) return 0; + /* + * Don't merge if failfast settings don't match. + * + * FIXME: The negation in front of each condition is necessary + * because bio and request flags use different bit positions + * and the accessors return those bits directly. This + * ugliness will soon go away. + */ + if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) || + !bio_failfast_transport(bio) != !blk_failfast_transport(rq) || + !bio_failfast_driver(bio) != !blk_failfast_driver(rq)) + return 0; + if (!elv_iosched_allow_merge(rq, bio)) return 0; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 5f8e798ede4..e5b10017a50 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -32,6 +32,11 @@ #include <scsi/scsi_ioctl.h> #include <scsi/scsi_cmnd.h> +struct blk_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; +} blk_default_cmd_filter; + /* Command group 3 is reserved and should never be used. */ const unsigned char scsi_command_size_tbl[8] = { @@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p) return put_user(1, p); } -void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) +static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) { /* Basic read-only commands */ __set_bit(TEST_UNIT_READY, filter->read_ok); @@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) __set_bit(GPCMD_SET_STREAMING, filter->write_ok); __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); } -EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); + +int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm) +{ + struct blk_cmd_filter *filter = &blk_default_cmd_filter; + + /* root can do any command. */ + if (capable(CAP_SYS_RAWIO)) + return 0; + + /* if there's no filter set, assume we're filtering everything out */ + if (!filter) + return -EPERM; + + /* Anybody who can open the device can do a read-safe command */ + if (test_bit(cmd[0], filter->read_ok)) + return 0; + + /* Write-safe commands require a writable open */ + if (test_bit(cmd[0], filter->write_ok) && has_write_perm) + return 0; + + return -EPERM; +} +EXPORT_SYMBOL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, fmode_t mode) { if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; - if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE)) + if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) return -EPERM; /* @@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE); + err = blk_verify_command(rq->cmd, mode & FMODE_WRITE); if (err) goto error; @@ -645,5 +673,11 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod blk_put_queue(q); return err; } - EXPORT_SYMBOL(scsi_cmd_ioctl); + +int __init blk_scsi_ioctl_init(void) +{ + blk_set_cmd_filter_defaults(&blk_default_cmd_filter); + return 0; +} +fs_initcall(blk_scsi_ioctl_init); |