aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig15
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-core.c33
-rw-r--r--block/blk-integrity.c1
-rw-r--r--block/blk-merge.c6
-rw-r--r--block/blk-settings.c88
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/bsg.c5
-rw-r--r--block/cfq-iosched.c178
-rw-r--r--block/cmd-filter.c233
-rw-r--r--block/elevator.c13
-rw-r--r--block/scsi_ioctl.c44
12 files changed, 238 insertions, 393 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 2c39527aa7d..9be0b56eaee 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -23,8 +23,8 @@ menuconfig BLOCK
if BLOCK
-config LBD
- bool "Support for large block devices and files"
+config LBDAF
+ bool "Support for large (2TB+) block devices and files"
depends on !64BIT
default y
help
@@ -48,9 +48,9 @@ config LBD
If unsure, say Y.
config BLK_DEV_BSG
- bool "Block layer SG support v4 (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- ---help---
+ bool "Block layer SG support v4"
+ default y
+ help
Saying Y here will enable generic SG (SCSI generic) v4 support
for any block device.
@@ -60,7 +60,10 @@ config BLK_DEV_BSG
protocols (e.g. Task Management Functions and SMP in Serial
Attached SCSI).
- If unsure, say N.
+ This option is required by recent UDEV versions to properly
+ access device serial numbers, etc.
+
+ If unsure, say Y.
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
diff --git a/block/Makefile b/block/Makefile
index e9fa4dd690f..6c54ed0ff75 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
+ ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b06cf5c2a82..e3299a77a0d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -575,13 +575,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
return NULL;
}
- /*
- * if caller didn't supply a lock, they get per-queue locking with
- * our embedded lock
- */
- if (!lock)
- lock = &q->__queue_lock;
-
q->request_fn = rfn;
q->prep_rq_fn = NULL;
q->unplug_fn = generic_unplug_device;
@@ -595,8 +588,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
q->sg_reserved_size = INT_MAX;
- blk_set_cmd_filter_defaults(&q->cmd_filter);
-
/*
* all done
*/
@@ -1172,6 +1163,11 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const int unplug = bio_unplug(bio);
int rw_flags;
+ if (bio_barrier(bio) && bio_has_data(bio) &&
+ (q->next_ordered == QUEUE_ORDERED_NONE)) {
+ bio_endio(bio, -EOPNOTSUPP);
+ return 0;
+ }
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1472,11 +1468,6 @@ static inline void __generic_make_request(struct bio *bio)
err = -EOPNOTSUPP;
goto end_io;
}
- if (bio_barrier(bio) && bio_has_data(bio) &&
- (q->next_ordered == QUEUE_ORDERED_NONE)) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
ret = q->make_request_fn(q, bio);
} while (ret);
@@ -2145,7 +2136,7 @@ bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
{
return blk_end_bidi_request(rq, error, nr_bytes, 0);
}
-EXPORT_SYMBOL_GPL(blk_end_request);
+EXPORT_SYMBOL(blk_end_request);
/**
* blk_end_request_all - Helper function for drives to finish the request.
@@ -2166,7 +2157,7 @@ void blk_end_request_all(struct request *rq, int error)
pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
BUG_ON(pending);
}
-EXPORT_SYMBOL_GPL(blk_end_request_all);
+EXPORT_SYMBOL(blk_end_request_all);
/**
* blk_end_request_cur - Helper function to finish the current request chunk.
@@ -2184,7 +2175,7 @@ bool blk_end_request_cur(struct request *rq, int error)
{
return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
}
-EXPORT_SYMBOL_GPL(blk_end_request_cur);
+EXPORT_SYMBOL(blk_end_request_cur);
/**
* __blk_end_request - Helper function for drivers to complete the request.
@@ -2203,7 +2194,7 @@ bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
{
return __blk_end_bidi_request(rq, error, nr_bytes, 0);
}
-EXPORT_SYMBOL_GPL(__blk_end_request);
+EXPORT_SYMBOL(__blk_end_request);
/**
* __blk_end_request_all - Helper function for drives to finish the request.
@@ -2224,7 +2215,7 @@ void __blk_end_request_all(struct request *rq, int error)
pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
BUG_ON(pending);
}
-EXPORT_SYMBOL_GPL(__blk_end_request_all);
+EXPORT_SYMBOL(__blk_end_request_all);
/**
* __blk_end_request_cur - Helper function to finish the current request chunk.
@@ -2243,7 +2234,7 @@ bool __blk_end_request_cur(struct request *rq, int error)
{
return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
}
-EXPORT_SYMBOL_GPL(__blk_end_request_cur);
+EXPORT_SYMBOL(__blk_end_request_cur);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio)
@@ -2365,7 +2356,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
__bio_clone(bio, bio_src);
if (bio_integrity(bio_src) &&
- bio_integrity_clone(bio, bio_src, gfp_mask))
+ bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out;
if (bio_ctr && bio_ctr(bio, bio_src, data))
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 73e28d35568..15c630813b1 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -379,6 +379,7 @@ void blk_integrity_unregister(struct gendisk *disk)
kobject_uevent(&bi->kobj, KOBJ_REMOVE);
kobject_del(&bi->kobj);
+ kobject_put(&bi->kobj);
kmem_cache_free(integrity_cachep, bi);
disk->integrity = NULL;
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 39ce64432ba..e1999679a4d 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -350,6 +350,12 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (blk_integrity_rq(req) != blk_integrity_rq(next))
return 0;
+ /* don't merge requests of different failfast settings */
+ if (blk_failfast_dev(req) != blk_failfast_dev(next) ||
+ blk_failfast_transport(req) != blk_failfast_transport(next) ||
+ blk_failfast_driver(req) != blk_failfast_driver(next))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7541ea4bf9f..476d8706507 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -7,6 +7,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
+#include <linux/gcd.h>
#include "blk.h"
@@ -97,7 +98,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
/**
* blk_set_default_limits - reset limits to default values
- * @limits: the queue_limits structure to reset
+ * @lim: the queue_limits structure to reset
*
* Description:
* Returns a queue_limit struct to its default state. Can be used by
@@ -112,7 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_segment_size = MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
- lim->bounce_pfn = BLK_BOUNCE_ANY;
+ lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
lim->alignment_offset = 0;
lim->io_opt = 0;
lim->misaligned = 0;
@@ -165,6 +166,13 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
blk_set_default_limits(&q->limits);
/*
+ * If the caller didn't supply a lock, fall back to our embedded
+ * per-queue locks
+ */
+ if (!q->queue_lock)
+ q->queue_lock = &q->__queue_lock;
+
+ /*
* by default assume old behaviour and bounce for any highmem page
*/
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
@@ -377,8 +385,8 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
EXPORT_SYMBOL(blk_queue_alignment_offset);
/**
- * blk_queue_io_min - set minimum request size for the queue
- * @q: the request queue for the device
+ * blk_limits_io_min - set minimum request size for a device
+ * @limits: the queue limits
* @min: smallest I/O size in bytes
*
* Description:
@@ -387,15 +395,35 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
* smallest I/O the device can perform without incurring a performance
* penalty.
*/
-void blk_queue_io_min(struct request_queue *q, unsigned int min)
+void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
{
- q->limits.io_min = min;
+ limits->io_min = min;
- if (q->limits.io_min < q->limits.logical_block_size)
- q->limits.io_min = q->limits.logical_block_size;
+ if (limits->io_min < limits->logical_block_size)
+ limits->io_min = limits->logical_block_size;
- if (q->limits.io_min < q->limits.physical_block_size)
- q->limits.io_min = q->limits.physical_block_size;
+ if (limits->io_min < limits->physical_block_size)
+ limits->io_min = limits->physical_block_size;
+}
+EXPORT_SYMBOL(blk_limits_io_min);
+
+/**
+ * blk_queue_io_min - set minimum request size for the queue
+ * @q: the request queue for the device
+ * @min: smallest I/O size in bytes
+ *
+ * Description:
+ * Storage devices may report a granularity or preferred minimum I/O
+ * size which is the smallest request the device can perform without
+ * incurring a performance penalty. For disk drives this is often the
+ * physical block size. For RAID arrays it is often the stripe chunk
+ * size. A properly aligned multiple of minimum_io_size is the
+ * preferred request size for workloads where a high number of I/O
+ * operations is desired.
+ */
+void blk_queue_io_min(struct request_queue *q, unsigned int min)
+{
+ blk_limits_io_min(&q->limits, min);
}
EXPORT_SYMBOL(blk_queue_io_min);
@@ -405,8 +433,12 @@ EXPORT_SYMBOL(blk_queue_io_min);
* @opt: optimal request size in bytes
*
* Description:
- * Drivers can call this function to set the preferred I/O request
- * size for devices that report such a value.
+ * Storage devices may report an optimal I/O size, which is the
+ * device's preferred unit for sustained I/O. This is rarely reported
+ * for disk drives. For RAID arrays it is usually the stripe width or
+ * the internal track size. A properly aligned multiple of
+ * optimal_io_size is the preferred request size for workloads where
+ * sustained throughput is desired.
*/
void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
{
@@ -426,27 +458,7 @@ EXPORT_SYMBOL(blk_queue_io_opt);
**/
void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
{
- /* zero is "infinity" */
- t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
- queue_max_sectors(b));
-
- t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
- queue_max_hw_sectors(b));
-
- t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
- queue_segment_boundary(b));
-
- t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
- queue_max_phys_segments(b));
-
- t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
- queue_max_hw_segments(b));
-
- t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
- queue_max_segment_size(b));
-
- t->limits.logical_block_size = max(queue_logical_block_size(t),
- queue_logical_block_size(b));
+ blk_stack_limits(&t->limits, &b->limits, 0);
if (!t->queue_lock)
WARN_ON_ONCE(1);
@@ -516,6 +528,16 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
return -1;
}
+ /* Find lcm() of optimal I/O size */
+ if (t->io_opt && b->io_opt)
+ t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);
+ else if (b->io_opt)
+ t->io_opt = b->io_opt;
+
+ /* Verify that optimal I/O size is a multiple of io_min */
+ if (t->io_min && t->io_opt % t->io_min)
+ return -1;
+
return 0;
}
EXPORT_SYMBOL(blk_stack_limits);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b1cd04087d6..d3aa2aadb3e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -16,9 +16,9 @@ struct queue_sysfs_entry {
};
static ssize_t
-queue_var_show(unsigned int var, char *page)
+queue_var_show(unsigned long var, char *page)
{
- return sprintf(page, "%d\n", var);
+ return sprintf(page, "%lu\n", var);
}
static ssize_t
@@ -77,7 +77,8 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
static ssize_t queue_ra_show(struct request_queue *q, char *page)
{
- int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
+ unsigned long ra_kb = q->backing_dev_info.ra_pages <<
+ (PAGE_CACHE_SHIFT - 10);
return queue_var_show(ra_kb, (page));
}
@@ -132,7 +133,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
return -EINVAL;
spin_lock_irq(q->queue_lock);
- blk_queue_max_sectors(q, max_sectors_kb << 1);
+ q->limits.max_sectors = max_sectors_kb << 1;
spin_unlock_irq(q->queue_lock);
return ret;
@@ -189,9 +190,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
- unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+ bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
- return queue_var_show(set != 0, page);
+ return queue_var_show(set, page);
}
static ssize_t
diff --git a/block/bsg.c b/block/bsg.c
index 54106f052f7..5f184bb3ff9 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm))
+ if (blk_verify_command(rq->cmd, has_write_perm))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -315,7 +315,6 @@ out:
blk_put_request(rq);
if (next_rq) {
blk_rq_unmap_user(next_rq->bio);
- next_rq->bio = NULL;
blk_put_request(next_rq);
}
return ERR_PTR(ret);
@@ -449,7 +448,6 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
hdr->dout_resid = rq->resid_len;
hdr->din_resid = rq->next_rq->resid_len;
blk_rq_unmap_user(bidi_bio);
- rq->next_rq->bio = NULL;
blk_put_request(rq->next_rq);
} else if (rq_data_dir(rq) == READ)
hdr->din_resid = rq->resid_len;
@@ -468,7 +466,6 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
blk_rq_unmap_user(bio);
if (rq->cmd != rq->__cmd)
kfree(rq->cmd);
- rq->bio = NULL;
blk_put_request(rq);
return ret;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 833ec18eaa6..fd7080ed793 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -71,6 +71,51 @@ struct cfq_rb_root {
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
/*
+ * Per process-grouping structure
+ */
+struct cfq_queue {
+ /* reference count */
+ atomic_t ref;
+ /* various state flags, see below */
+ unsigned int flags;
+ /* parent cfq_data */
+ struct cfq_data *cfqd;
+ /* service_tree member */
+ struct rb_node rb_node;
+ /* service_tree key */
+ unsigned long rb_key;
+ /* prio tree member */
+ struct rb_node p_node;
+ /* prio tree root we belong to, if any */
+ struct rb_root *p_root;
+ /* sorted list of pending requests */
+ struct rb_root sort_list;
+ /* if fifo isn't expired, next request to serve */
+ struct request *next_rq;
+ /* requests queued in sort_list */
+ int queued[2];
+ /* currently allocated requests */
+ int allocated[2];
+ /* fifo list of requests in sort_list */
+ struct list_head fifo;
+
+ unsigned long slice_end;
+ long slice_resid;
+ unsigned int slice_dispatch;
+
+ /* pending metadata requests */
+ int meta_pending;
+ /* number of requests that are on the dispatch list or inside driver */
+ int dispatched;
+
+ /* io prio of this group */
+ unsigned short ioprio, org_ioprio;
+ unsigned short ioprio_class, org_ioprio_class;
+
+ pid_t pid;
+};
+
+/*
* Per block device queue structure
*/
struct cfq_data {
@@ -135,51 +180,11 @@ struct cfq_data {
unsigned int cfq_slice_idle;
struct list_head cic_list;
-};
-/*
- * Per process-grouping structure
- */
-struct cfq_queue {
- /* reference count */
- atomic_t ref;
- /* various state flags, see below */
- unsigned int flags;
- /* parent cfq_data */
- struct cfq_data *cfqd;
- /* service_tree member */
- struct rb_node rb_node;
- /* service_tree key */
- unsigned long rb_key;
- /* prio tree member */
- struct rb_node p_node;
- /* prio tree root we belong to, if any */
- struct rb_root *p_root;
- /* sorted list of pending requests */
- struct rb_root sort_list;
- /* if fifo isn't expired, next request to serve */
- struct request *next_rq;
- /* requests queued in sort_list */
- int queued[2];
- /* currently allocated requests */
- int allocated[2];
- /* fifo list of requests in sort_list */
- struct list_head fifo;
-
- unsigned long slice_end;
- long slice_resid;
- unsigned int slice_dispatch;
-
- /* pending metadata requests */
- int meta_pending;
- /* number of requests that are on the dispatch list or inside driver */
- int dispatched;
-
- /* io prio of this group */
- unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class, org_ioprio_class;
-
- pid_t pid;
+ /*
+ * Fallback dummy cfqq for extreme OOM conditions
+ */
+ struct cfq_queue oom_cfqq;
};
enum cfqq_state_flags {
@@ -1641,6 +1646,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
ioc->ioprio_changed = 0;
}
+static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ pid_t pid, int is_sync)
+{
+ RB_CLEAR_NODE(&cfqq->rb_node);
+ RB_CLEAR_NODE(&cfqq->p_node);
+ INIT_LIST_HEAD(&cfqq->fifo);
+
+ atomic_set(&cfqq->ref, 0);
+ cfqq->cfqd = cfqd;
+
+ cfq_mark_cfqq_prio_changed(cfqq);
+
+ if (is_sync) {
+ if (!cfq_class_idle(cfqq))
+ cfq_mark_cfqq_idle_window(cfqq);
+ cfq_mark_cfqq_sync(cfqq);
+ }
+ cfqq->pid = pid;
+}
+
static struct cfq_queue *
cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
struct io_context *ioc, gfp_t gfp_mask)
@@ -1653,56 +1678,40 @@ retry:
/* cic always exists here */
cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq) {
+ /*
+ * Always try a new alloc if we fell back to the OOM cfqq
+ * originally, since it should just be a temporary situation.
+ */
+ if (!cfqq || cfqq == &cfqd->oom_cfqq) {
+ cfqq = NULL;
if (new_cfqq) {
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
- /*
- * Inform the allocator of the fact that we will
- * just repeat this allocation if it fails, to allow
- * the allocator to do whatever it needs to attempt to
- * free memory.
- */
spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool,
- gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
+ gfp_mask | __GFP_ZERO,
cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock);
- goto retry;
+ if (new_cfqq)
+ goto retry;
} else {
cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
- if (!cfqq)
- goto out;
}
- RB_CLEAR_NODE(&cfqq->rb_node);
- RB_CLEAR_NODE(&cfqq->p_node);
- INIT_LIST_HEAD(&cfqq->fifo);
-
- atomic_set(&cfqq->ref, 0);
- cfqq->cfqd = cfqd;
-
- cfq_mark_cfqq_prio_changed(cfqq);
-
- cfq_init_prio_data(cfqq, ioc);
-
- if (is_sync) {
- if (!cfq_class_idle(cfqq))
- cfq_mark_cfqq_idle_window(cfqq);
- cfq_mark_cfqq_sync(cfqq);
- }
- cfqq->pid = current->pid;
- cfq_log_cfqq(cfqd, cfqq, "alloced");
+ if (cfqq) {
+ cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
+ cfq_init_prio_data(cfqq, ioc);
+ cfq_log_cfqq(cfqd, cfqq, "alloced");
+ } else
+ cfqq = &cfqd->oom_cfqq;
}
if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);
-out:
- WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq;
}
@@ -1735,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
cfqq = *async_cfqq;
}
- if (!cfqq) {
+ if (!cfqq)
cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
- if (!cfqq)
- return NULL;
- }
/*
* pin the queue now that it's allocated, scheduler exit will prune it
@@ -2305,12 +2311,8 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
goto queue_fail;
cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq) {
+ if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
-
- if (!cfqq)
- goto queue_fail;
-
cic_set_cfqq(cic, cfqq, is_sync);
}
@@ -2465,6 +2467,14 @@ static void *cfq_init_queue(struct request_queue *q)
for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT;
+ /*
+ * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
+ * Grab a permanent reference to it, so that the normal code flow
+ * will not attempt to free it.
+ */
+ cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
+ atomic_inc(&cfqd->oom_cfqq.ref);
+
INIT_LIST_HEAD(&cfqd->cic_list);
cfqd->queue = q;
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
deleted file mode 100644
index 572bbc2f900..00000000000
--- a/block/cmd-filter.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright 2004 Peter M. Jones <pjones@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- *
- */
-
-#include <linux/list.h>
-#include <linux/genhd.h>
-#include <linux/spinlock.h>
-#include <linux/capability.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-
-#include <scsi/scsi.h>
-#include <linux/cdrom.h>
-
-int blk_verify_command(struct blk_cmd_filter *filter,
- unsigned char *cmd, fmode_t has_write_perm)
-{
- /* root can do any command. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- /* if there's no filter set, assume we're filtering everything out */
- if (!filter)
- return -EPERM;
-
- /* Anybody who can open the device can do a read-safe command */
- if (test_bit(cmd[0], filter->read_ok))
- return 0;
-
- /* Write-safe commands require a writable open */
- if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
- return 0;
-
- return -EPERM;
-}
-EXPORT_SYMBOL(blk_verify_command);
-
-#if 0
-/* and now, the sysfs stuff */
-static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page,
- int rw)
-{
- char *npage = page;
- unsigned long *okbits;
- int i;
-
- if (rw == READ)
- okbits = filter->read_ok;
- else
- okbits = filter->write_ok;
-
- for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
- if (test_bit(i, okbits)) {
- npage += sprintf(npage, "0x%02x", i);
- if (i < BLK_SCSI_MAX_CMDS - 1)
- sprintf(npage++, " ");
- }
- }
-
- if (npage != page)
- npage += sprintf(npage, "\n");
-
- return npage - page;
-}
-
-static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page)
-{
- return rcf_cmds_show(filter, page, READ);
-}
-
-static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter,
- char *page)
-{
- return rcf_cmds_show(filter, page, WRITE);
-}
-
-static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter,
- const char *page, size_t count, int rw)
-{
- unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
- int cmd, set;
- char *p, *status;
-
- if (rw == READ) {
- memcpy(&okbits, filter->read_ok, sizeof(okbits));
- target_okbits = filter->read_ok;
- } else {
- memcpy(&okbits, filter->write_ok, sizeof(okbits));
- target_okbits = filter->write_ok;
- }
-
- while ((p = strsep((char **)&page, " ")) != NULL) {
- set = 1;
-
- if (p[0] == '+') {
- p++;
- } else if (p[0] == '-') {
- set = 0;
- p++;
- }
-
- cmd = simple_strtol(p, &status, 16);
-
- /* either of these cases means invalid input, so do nothing. */
- if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS)
- return -EINVAL;
-
- if (set)
- __set_bit(cmd, okbits);
- else
- __clear_bit(cmd, okbits);
- }
-
- memcpy(target_okbits, okbits, sizeof(okbits));
- return count;
-}
-
-static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter,
- const char *page, size_t count)
-{
- return rcf_cmds_store(filter, page, count, READ);
-}
-
-static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter,
- const char *page, size_t count)
-{
- return rcf_cmds_store(filter, page, count, WRITE);
-}
-
-struct rcf_sysfs_entry {
- struct attribute attr;
- ssize_t (*show)(struct blk_cmd_filter *, char *);
- ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t);
-};
-
-static struct rcf_sysfs_entry rcf_readcmds_entry = {
- .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
- .show = rcf_readcmds_show,
- .store = rcf_readcmds_store,
-};
-
-static struct rcf_sysfs_entry rcf_writecmds_entry = {
- .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
- .show = rcf_writecmds_show,
- .store = rcf_writecmds_store,
-};
-
-static struct attribute *default_attrs[] = {
- &rcf_readcmds_entry.attr,
- &rcf_writecmds_entry.attr,
- NULL,
-};
-
-#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
-
-static ssize_t
-rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
-{
- struct rcf_sysfs_entry *entry = to_rcf(attr);
- struct blk_cmd_filter *filter;
-
- filter = container_of(kobj, struct blk_cmd_filter, kobj);
- if (entry->show)
- return entry->show(filter, page);
-
- return 0;
-}
-
-static ssize_t
-rcf_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *page, size_t length)
-{
- struct rcf_sysfs_entry *entry = to_rcf(attr);
- struct blk_cmd_filter *filter;
-
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- if (!entry->store)
- return -EINVAL;
-
- filter = container_of(kobj, struct blk_cmd_filter, kobj);
- return entry->store(filter, page, length);
-}
-
-static struct sysfs_ops rcf_sysfs_ops = {
- .show = rcf_attr_show,
- .store = rcf_attr_store,
-};
-
-static struct kobj_type rcf_ktype = {
- .sysfs_ops = &rcf_sysfs_ops,
- .default_attrs = default_attrs,
-};
-
-int blk_register_filter(struct gendisk *disk)
-{
- int ret;
- struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-
- ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
- &disk_to_dev(disk)->kobj,
- "%s", "cmd_filter");
- if (ret < 0)
- return ret;
-
- return 0;
-}
-EXPORT_SYMBOL(blk_register_filter);
-
-void blk_unregister_filter(struct gendisk *disk)
-{
- struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-
- kobject_put(&filter->kobj);
-}
-EXPORT_SYMBOL(blk_unregister_filter);
-#endif
diff --git a/block/elevator.c b/block/elevator.c
index ca861927ba4..2d511f9105e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -100,6 +100,19 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return 0;
+ /*
+ * Don't merge if failfast settings don't match.
+ *
+ * FIXME: The negation in front of each condition is necessary
+ * because bio and request flags use different bit positions
+ * and the accessors return those bits directly. This
+ * ugliness will soon go away.
+ */
+ if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) ||
+ !bio_failfast_transport(bio) != !blk_failfast_transport(rq) ||
+ !bio_failfast_driver(bio) != !blk_failfast_driver(rq))
+ return 0;
+
if (!elv_iosched_allow_merge(rq, bio))
return 0;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 5f8e798ede4..e5b10017a50 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -32,6 +32,11 @@
#include <scsi/scsi_ioctl.h>
#include <scsi/scsi_cmnd.h>
+struct blk_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+} blk_default_cmd_filter;
+
/* Command group 3 is reserved and should never be used. */
const unsigned char scsi_command_size_tbl[8] =
{
@@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p);
}
-void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
+static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
{
/* Basic read-only commands */
__set_bit(TEST_UNIT_READY, filter->read_ok);
@@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
}
-EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
+
+int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm)
+{
+ struct blk_cmd_filter *filter = &blk_default_cmd_filter;
+
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_verify_command);
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_hdr *hdr, fmode_t mode)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE))
+ if (blk_verify_command(rq->cmd, mode & FMODE_WRITE))
return -EPERM;
/*
@@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE);
+ err = blk_verify_command(rq->cmd, mode & FMODE_WRITE);
if (err)
goto error;
@@ -645,5 +673,11 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
blk_put_queue(q);
return err;
}
-
EXPORT_SYMBOL(scsi_cmd_ioctl);
+
+int __init blk_scsi_ioctl_init(void)
+{
+ blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
+ return 0;
+}
+fs_initcall(blk_scsi_ioctl_init);