aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 12:55:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 12:55:04 -0700
commit89e5d6f0d979f6e7dc2bbb1ebd9e239217e2e952 (patch)
tree1126044004b73df905a6183430376f1d97c3b6c9
parent516e77977085c9c50703fabb5dc61bd57a8cc1d0 (diff)
parenta4ffc152198efba2ed9e6eac0eb97f17bfebce85 (diff)
Merge tag 'dm-3.4-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull device-mapper changes for 3.4 from Alasdair Kergon: - Update thin provisioning to support read-only external snapshot origins and discards. - A new target, dm verity, for device content validation. - Mark dm uevent and dm raid as no-longer-experimental. - Miscellaneous other fixes and clean-ups. * tag 'dm-3.4-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (27 commits) dm: add verity target dm bufio: prefetch dm thin: add pool target flags to control discard dm thin: support discards dm thin: prepare to support discard dm thin: use dm_target_offset dm thin: support read only external snapshot origins dm thin: relax hard limit on the maximum size of a metadata device dm persistent data: remove space map ref_count entries if redundant dm thin: commit outstanding data every second dm: reject trailing characters in sccanf input dm raid: handle failed devices during start up dm thin metadata: pass correct space map to dm_sm_root_size dm persistent data: remove redundant value_size arg from value_ptr dm mpath: detect invalid map_context dm: clear bi_end_io on remapping failure dm table: simplify call to free_devices dm thin: correct comments dm raid: no longer experimental dm uevent: no longer experimental ...
-rw-r--r--Documentation/ABI/testing/sysfs-block-dm25
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt65
-rw-r--r--Documentation/device-mapper/verity.txt194
-rw-r--r--MAINTAINERS5
-rw-r--r--drivers/md/Kconfig28
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-bufio.c108
-rw-r--r--drivers/md/dm-bufio.h8
-rw-r--r--drivers/md/dm-crypt.c46
-rw-r--r--drivers/md/dm-delay.c9
-rw-r--r--drivers/md/dm-exception-store.c2
-rw-r--r--drivers/md/dm-flakey.c3
-rw-r--r--drivers/md/dm-ioctl.c5
-rw-r--r--drivers/md/dm-linear.c3
-rw-r--r--drivers/md/dm-log.c3
-rw-r--r--drivers/md/dm-mpath.c52
-rw-r--r--drivers/md/dm-queue-length.c3
-rw-r--r--drivers/md/dm-raid.c53
-rw-r--r--drivers/md/dm-raid1.c12
-rw-r--r--drivers/md/dm-round-robin.c3
-rw-r--r--drivers/md/dm-service-time.c5
-rw-r--r--drivers/md/dm-stripe.c3
-rw-r--r--drivers/md/dm-table.c9
-rw-r--r--drivers/md/dm-thin-metadata.c5
-rw-r--r--drivers/md/dm-thin-metadata.h13
-rw-r--r--drivers/md/dm-thin.c680
-rw-r--r--drivers/md/dm-verity.c913
-rw-r--r--drivers/md/dm.c1
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h7
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c202
-rw-r--r--drivers/md/persistent-data/dm-btree.c27
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c3
32 files changed, 2104 insertions, 392 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-dm b/Documentation/ABI/testing/sysfs-block-dm
new file mode 100644
index 00000000000..87ca5691e29
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-dm
@@ -0,0 +1,25 @@
+What: /sys/block/dm-<num>/dm/name
+Date: January 2009
+KernelVersion: 2.6.29
+Contact: dm-devel@redhat.com
+Description: Device-mapper device name.
+ Read-only string containing mapped device name.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/uuid
+Date: January 2009
+KernelVersion: 2.6.29
+Contact: dm-devel@redhat.com
+Description: Device-mapper device UUID.
+ Read-only string containing DM-UUID or empty string
+ if DM-UUID is not set.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/suspended
+Date: June 2009
+KernelVersion: 2.6.31
+Contact: dm-devel@redhat.com
+Description: Device-mapper device suspend state.
+ Contains the value 1 while the device is suspended.
+ Otherwise it contains 0. Read-only attribute.
+Users: util-linux, device-mapper udev rules
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 1ff044d87ca..3370bc4d7b9 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -75,10 +75,12 @@ less sharing than average you'll need a larger-than-average metadata device.
As a guide, we suggest you calculate the number of bytes to use in the
metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller. The largest size supported is 16GB.
+to 2MB if the answer is smaller. If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.
-If you're creating large numbers of snapshots which are recording large
-amounts of change, you may need find you need to increase this.
+The largest size supported is 16GB: If the device is larger,
+a warning will be issued and the excess space will not be used.
Reloading a pool table
----------------------
@@ -167,6 +169,38 @@ ii) Using an internal snapshot.
dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+External snapshots
+------------------
+
+You can use an external _read only_ device as an origin for a
+thinly-provisioned volume. Any read to an unprovisioned area of the
+thin device will be passed through to the origin. Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+ This is the same as creating a thin device.
+ You don't mention the origin at this stage.
+
+ dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+ Append an extra parameter to the thin target specifying the origin:
+
+ dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+ N.B. All descendants (internal snapshots) of this snapshot require the
+ same extra origin parameter.
+
Deactivation
------------
@@ -189,7 +223,13 @@ i) Constructor
<low water mark (blocks)> [<number of feature args> [<arg>]*]
Optional feature arguments:
- - 'skip_block_zeroing': skips the zeroing of newly-provisioned blocks.
+
+ skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
+
+ ignore_discard: Disable discard support.
+
+ no_discard_passdown: Don't pass discards down to the underlying
+ data device, but just remove the mapping.
Data block size must be between 64KB (128 sectors) and 1GB
(2097152 sectors) inclusive.
@@ -237,16 +277,6 @@ iii) Messages
Deletes a thin device. Irreversible.
- trim <dev id> <new size in sectors>
-
- Delete mappings from the end of a thin device. Irreversible.
- You might want to use this if you're reducing the size of
- your thinly-provisioned device. In many cases, due to the
- sharing of blocks between devices, it is not possible to
- determine in advance how much space 'trim' will release. (In
- future a userspace tool might be able to perform this
- calculation.)
-
set_transaction_id <current id> <new id>
Userland volume managers, such as LVM, need a way to
@@ -262,7 +292,7 @@ iii) Messages
i) Constructor
- thin <pool dev> <dev id>
+ thin <pool dev> <dev id> [<external origin dev>]
pool dev:
the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
@@ -271,6 +301,11 @@ i) Constructor
the internal device identifier of the device to be
activated.
+ external origin dev:
+ an optional block device outside the pool to be treated as a
+ read-only snapshot origin: reads to unprovisioned areas of the
+ thin target will be mapped to this device.
+
The pool doesn't store any size against the thin devices. If you
load a thin target that is smaller than you've been using previously,
then you'll have no access to blocks mapped beyond the end. If you
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
new file mode 100644
index 00000000000..32e48797a14
--- /dev/null
+++ b/Documentation/device-mapper/verity.txt
@@ -0,0 +1,194 @@
+dm-verity
+==========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+ <version> <dev> <hash_dev> <hash_start>
+ <data_block_size> <hash_block_size>
+ <num_data_blocks> <hash_start_block>
+ <algorithm> <digest> <salt>
+
+<version>
+ This is the version number of the on-disk format.
+
+ 0 is the original format used in the Chromium OS.
+ The salt is appended when hashing, digests are stored continuously and
+ the rest of the block is padded with zeros.
+
+ 1 is the current format that should be used for new devices.
+ The salt is prepended when hashing and each digest is
+ padded with zeros to the power of two.
+
+<dev>
+ This is the device containing the data the integrity of which needs to be
+ checked. It may be specified as a path, like /dev/sdaX, or a device number,
+ <major>:<minor>.
+
+<hash_dev>
+ This is the device that that supplies the hash tree data. It may be
+ specified similarly to the device path and may be the same device. If the
+ same device is used, the hash_start should be outside of the dm-verity
+ configured device size.
+
+<data_block_size>
+ The block size on a data device. Each block corresponds to one digest on
+ the hash device.
+
+<hash_block_size>
+ The size of a hash block.
+
+<num_data_blocks>
+ The number of data blocks on the data device. Additional blocks are
+ inaccessible. You can place hashes to the same partition as data, in this
+ case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+ This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+ to the root block of the hash tree.
+
+<algorithm>
+ The cryptographic hash algorithm used for this device. This should
+ be the name of the algorithm, like "sha1".
+
+<digest>
+ The hexadecimal encoding of the cryptographic hash of the root hash block
+ and the salt. This hash should be trusted as there is no other authenticity
+ beyond this point.
+
+<salt>
+ The hexadecimal encoding of the salt value.
+
+Theory of operation
+===================
+
+dm-verity is meant to be setup as part of a verified boot path. This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access. If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail. This should identify
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly-aligned to the nearest
+block the size of a page.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash. If it is a leaf node, the hash
+is of some block data on disk. If it is an intermediary node, then the hash is
+of a number of child nodes.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block. The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm. The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+alg = sha256, num_blocks = 32768, block_size = 4096
+
+ [ root ]
+ / . . . \
+ [entry_0] [entry_1]
+ / . . . \ . . . \
+ [entry_0_0] . . . [entry_0_127] . . . . [entry_1_127]
+ / ... \ / . . . \ / \
+ blk_0 ... blk_127 blk_16256 blk_16383 blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+Below is the recommended on-disk format. The verity kernel code does not
+read the on-disk header. It only reads the hash blocks which directly
+follow the header. It is expected that a user-space tool will verify the
+integrity of the verity_header and then call dmsetup with the correct
+parameters. Alternatively, the header can be omitted and the dmsetup
+parameters can be passed via the kernel command-line in a rooted chain
+of trust where the command-line is verified.
+
+The on-disk format is especially useful in cases where the hash blocks
+are on a separate partition. The magic number allows easy identification
+of the partition contents. Alternatively, the hash blocks can be stored
+in the same partition as the data to be verified. In such a configuration
+the filesystem on the partition would be sized a little smaller than
+the full-partition, leaving room for the hash blocks.
+
+struct superblock {
+ uint8_t signature[8]
+ "verity\0\0";
+
+ uint8_t version;
+ 1 - current format
+
+ uint8_t data_block_bits;
+ log2(data block size)
+
+ uint8_t hash_block_bits;
+ log2(hash block size)
+
+ uint8_t pad1[1];
+ zero padding
+
+ uint16_t salt_size;
+ big-endian salt size
+
+ uint8_t pad2[2];
+ zero padding
+
+ uint32_t data_blocks_hi;
+ big-endian high 32 bits of the 64-bit number of data blocks
+
+ uint32_t data_blocks_lo;
+ big-endian low 32 bits of the 64-bit number of data blocks
+
+ uint8_t algorithm[16];
+ cryptographic algorithm
+
+ uint8_t salt[384];
+ salt (the salt size is specified above)
+
+ uint8_t pad3[88];
+ zero padding to 512-byte boundary
+}
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+
+Setup a device:
+ dmsetup create vroot --table \
+ "0 2097152 "\
+ "verity 1 /dev/sda1 /dev/sda2 4096 4096 2097152 1 "\
+ "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+ "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel driver. This is available from
+the LVM2 upstream repository and may be supplied as a package called
+device-mapper-verity-tools:
+ git://sources.redhat.com/git/lvm2
+ http://sourceware.org/git/?p=lvm2.git
+ http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/verity?cvsroot=lvm2
+
+veritysetup -a vroot /dev/sda1 /dev/sda2 \
+ 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/MAINTAINERS b/MAINTAINERS
index 3d11fa581bb..2cce20bbe39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2225,13 +2225,16 @@ W: http://lanana.org/docs/device-list/index.html
S: Maintained
DEVICE-MAPPER (LVM)
-P: Alasdair Kergon
+M: Alasdair Kergon <agk@redhat.com>
+M: dm-devel@redhat.com
L: dm-devel@redhat.com
W: http://sources.redhat.com/dm
Q: http://patchwork.kernel.org/project/dm-devel/list/
+T: quilt http://people.redhat.com/agk/patches/linux/editing/
S: Maintained
F: Documentation/device-mapper/
F: drivers/md/dm*
+F: drivers/md/persistent-data/
F: include/linux/device-mapper.h
F: include/linux/dm-*.h
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index faa4741df6d..10f122a3a85 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -277,8 +277,8 @@ config DM_MIRROR
needed for live data migration tools such as 'pvmove'.
config DM_RAID
- tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "RAID 1/4/5/6 target"
+ depends on BLK_DEV_DM
select MD_RAID1
select MD_RAID456
select BLK_DEV_MD
@@ -359,8 +359,8 @@ config DM_DELAY
If unsure, say N.
config DM_UEVENT
- bool "DM uevents (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ bool "DM uevents"
+ depends on BLK_DEV_DM
---help---
Generate udev events for DM events.
@@ -370,4 +370,24 @@ config DM_FLAKEY
---help---
A target that intermittently fails I/O for debugging purposes.
+config DM_VERITY
+ tristate "Verity target support (EXPERIMENTAL)"
+ depends on BLK_DEV_DM && EXPERIMENTAL
+ select CRYPTO
+ select CRYPTO_HASH
+ select DM_BUFIO
+ ---help---
+ This device-mapper target creates a read-only device that
+ transparently validates the data on one underlying device against
+ a pre-generated tree of cryptographic checksums stored on a second
+ device.
+
+ You'll need to activate the digests you're going to use in the
+ cryptoapi configuration.
+
+ To compile this code as a module, choose M here: the module will
+ be called dm-verity.
+
+ If unsure, say N.
+
endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 046860c7a16..8b2e0dffe82 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
obj-$(CONFIG_DM_RAID) += dm-raid.o
obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
+obj-$(CONFIG_DM_VERITY) += dm-verity.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index b6e58c7b6df..cc06a1e5242 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -578,7 +578,7 @@ static void write_endio(struct bio *bio, int error)
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
b->write_error = error;
- if (error) {
+ if (unlikely(error)) {
struct dm_bufio_client *c = b->c;
(void)cmpxchg(&c->async_write_error, 0, error);
}
@@ -697,13 +697,20 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
dm_bufio_lock(c);
}
+enum new_flag {
+ NF_FRESH = 0,
+ NF_READ = 1,
+ NF_GET = 2,
+ NF_PREFETCH = 3
+};
+
/*
* Allocate a new buffer. If the allocation is not possible, wait until
* some other thread frees a buffer.
*
* May drop the lock and regain it.
*/
-static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c)
+static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
{
struct dm_buffer *b;
@@ -726,6 +733,9 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
return b;
}
+ if (nf == NF_PREFETCH)
+ return NULL;
+
if (!list_empty(&c->reserved_buffers)) {
b = list_entry(c->reserved_buffers.next,
struct dm_buffer, lru_list);
@@ -743,9 +753,12 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
}
}
-static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
+static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf)
{
- struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
+ struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf);
+
+ if (!b)
+ return NULL;
if (c->alloc_callback)
c->alloc_callback(b);
@@ -865,32 +878,23 @@ static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
* Getting a buffer
*--------------------------------------------------------------*/
-enum new_flag {
- NF_FRESH = 0,
- NF_READ = 1,
- NF_GET = 2
-};
-
static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
- enum new_flag nf, struct dm_buffer **bp,
- int *need_submit)
+ enum new_flag nf, int *need_submit)
{
struct dm_buffer *b, *new_b = NULL;
*need_submit = 0;
b = __find(c, block);
- if (b) {
- b->hold_count++;
- __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
- test_bit(B_WRITING, &b->state));
- return b;
- }
+ if (b)
+ goto found_buffer;
if (nf == NF_GET)
return NULL;
- new_b = __alloc_buffer_wait(c);
+ new_b = __alloc_buffer_wait(c, nf);
+ if (!new_b)
+ return NULL;
/*
* We've had a period where the mutex was unlocked, so need to
@@ -899,10 +903,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
b = __find(c, block);
if (b) {
__free_buffer_wake(new_b);
- b->hold_count++;
- __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
- test_bit(B_WRITING, &b->state));
- return b;
+ goto found_buffer;
}
__check_watermark(c);
@@ -922,6 +923,24 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
*need_submit = 1;
return b;
+
+found_buffer:
+ if (nf == NF_PREFETCH)
+ return NULL;
+ /*
+ * Note: it is essential that we don't wait for the buffer to be
+ * read if dm_bufio_get function is used. Both dm_bufio_get and
+ * dm_bufio_prefetch can be used in the driver request routine.
+ * If the user called both dm_bufio_prefetch and dm_bufio_get on
+ * the same buffer, it would deadlock if we waited.
+ */
+ if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
+ return NULL;
+
+ b->hold_count++;
+ __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
+ test_bit(B_WRITING, &b->state));
+ return b;
}
/*
@@ -956,10 +975,10 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
struct dm_buffer *b;
dm_bufio_lock(c);
- b = __bufio_new(c, block, nf, bp, &need_submit);
+ b = __bufio_new(c, block, nf, &need_submit);
dm_bufio_unlock(c);
- if (!b || IS_ERR(b))
+ if (!b)
return b;
if (need_submit)
@@ -1005,13 +1024,47 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
}
EXPORT_SYMBOL_GPL(dm_bufio_new);
+void dm_bufio_prefetch(struct dm_bufio_client *c,
+ sector_t block, unsigned n_blocks)
+{
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
+ dm_bufio_lock(c);
+
+ for (; n_blocks--; block++) {
+ int need_submit;
+ struct dm_buffer *b;
+ b = __bufio_new(c, block, NF_PREFETCH, &need_submit);
+ if (unlikely(b != NULL)) {
+ dm_bufio_unlock(c);
+
+ if (need_submit)
+ submit_io(b, READ, b->block, read_endio);
+ dm_bufio_release(b);
+
+ dm_bufio_cond_resched();
+
+ if (!n_blocks)
+ goto flush_plug;
+ dm_bufio_lock(c);
+ }
+
+ }
+
+ dm_bufio_unlock(c);
+
+flush_plug:
+ blk_finish_plug(&plug);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
+
void dm_bufio_release(struct dm_buffer *b)
{
struct dm_bufio_client *c = b->c;
dm_bufio_lock(c);
- BUG_ON(test_bit(B_READING, &b->state));
BUG_ON(!b->hold_count);
b->hold_count--;
@@ -1024,6 +1077,7 @@ void dm_bufio_release(struct dm_buffer *b)
* invalid buffer.
*/
if ((b->read_error || b->write_error) &&
+ !test_bit(B_READING, &b->state) &&
!test_bit(B_WRITING, &b->state) &&
!test_bit(B_DIRTY, &b->state)) {
__unlink_buffer(b);
@@ -1041,6 +1095,8 @@ void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
dm_bufio_lock(c);
+ BUG_ON(test_bit(B_READING, &b->state));
+
if (!test_and_set_bit(B_DIRTY, &b->state))
__relink_lru(b, LIST_DIRTY);
diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h
index 5c4c3a04e38..b142946a9e3 100644
--- a/drivers/md/dm-bufio.h
+++ b/drivers/md/dm-bufio.h
@@ -63,6 +63,14 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp);
/*
+ * Prefetch the specified blocks to the cache.
+ * The function starts to read the blocks and returns without waiting for
+ * I/O to finish.
+ */
+void dm_bufio_prefetch(struct dm_bufio_client *c,
+ sector_t block, unsigned n_blocks);
+
+/*
* Release a reference obtained with dm_bufio_{read,get,new}. The data
* pointer and dm_buffer pointer is no longer valid after this call.
*/
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index db6b51639ce..3f06df59fd8 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -176,7 +176,6 @@ struct crypt_config {
#define MIN_IOS 16
#define MIN_POOL_PAGES 32
-#define MIN_BIO_PAGES 8
static struct kmem_cache *_crypt_io_pool;
@@ -848,12 +847,11 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
}
/*
- * if additional pages cannot be allocated without waiting,
- * return a partially allocated bio, the caller will then try
- * to allocate additional bios while submitting this partial bio
+ * If additional pages cannot be allocated without waiting,
+ * return a partially-allocated bio. The caller will then try
+ * to allocate more bios while submitting this partial bio.
*/
- if (i == (MIN_BIO_PAGES - 1))
- gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
+ gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
@@ -1046,16 +1044,14 @@ static void kcryptd_queue_io(struct dm_crypt_io *io)
queue_work(cc->io_queue, &io->work);
}
-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
- int error, int async)
+static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
{
struct bio *clone = io->ctx.bio_out;
struct crypt_config *cc = io->target->private;
- if (unlikely(error < 0)) {
+ if (unlikely(io->error < 0)) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
- io->error = -EIO;
crypt_dec_pending(io);
return;
}
@@ -1106,12 +1102,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
sector += bio_sectors(clone);
crypt_inc_pending(io);
+
r = crypt_convert(cc, &io->ctx);
+ if (r < 0)
+ io->error = -EIO;
+
crypt_finished = atomic_dec_and_test(&io->ctx.pending);
/* Encryption was already finished, submit io now */
if (crypt_finished) {
- kcryptd_crypt_write_io_submit(io, r, 0);
+ kcryptd_crypt_write_io_submit(io, 0);
/*
* If there was an error, do not try next fragments.
@@ -1162,11 +1162,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_dec_pending(io);
}
-static void kcryptd_crypt_read_done(struct dm_crypt_io *io, int error)
+static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
{
- if (unlikely(error < 0))
- io->error = -EIO;
-
crypt_dec_pending(io);
}
@@ -1181,9 +1178,11 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
io->sector);
r = crypt_convert(cc, &io->ctx);
+ if (r < 0)
+ io->error = -EIO;
if (atomic_dec_and_test(&io->ctx.pending))
- kcryptd_crypt_read_done(io, r);
+ kcryptd_crypt_read_done(io);
crypt_dec_pending(io);
}
@@ -1204,15 +1203,18 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
+ if (error < 0)
+ io->error = -EIO;
+
mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
if (!atomic_dec_and_test(&ctx->pending))
return;
if (bio_data_dir(io->base_bio) == READ)
- kcryptd_crypt_read_done(io, error);
+ kcryptd_crypt_read_done(io);
else
- kcryptd_crypt_write_io_submit(io, error, 1);
+ kcryptd_crypt_write_io_submit(io, 1);
}
static void kcryptd_crypt(struct work_struct *work)
@@ -1413,6 +1415,7 @@ static int crypt_ctr_cipher(struct dm_target *ti,
char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount;
char *cipher_api = NULL;
int cpu, ret = -EINVAL;
+ char dummy;
/* Convert to crypto api definition? */
if (strchr(cipher_in, '(')) {
@@ -1434,7 +1437,7 @@ static int crypt_ctr_cipher(struct dm_target *ti,
if (!keycount)
cc->tfms_count = 1;
- else if (sscanf(keycount, "%u", &cc->tfms_count) != 1 ||
+ else if (sscanf(keycount, "%u%c", &cc->tfms_count, &dummy) != 1 ||
!is_power_of_2(cc->tfms_count)) {
ti->error = "Bad cipher key count specification";
return