aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 17:55:21 -0700
committerDan Williams <dan.j.williams@intel.com>2009-09-08 17:55:21 -0700
commitbbb20089a3275a19e475dbc21320c3742e3ca423 (patch)
tree216fdc1cbef450ca688135c5b8969169482d9a48 /drivers/md
parent3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff)
parent657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff)
Merge branch 'dmaengine' into async-tx-next
Conflicts: crypto/async_tx/async_xor.c drivers/dma/ioat/dma_v2.h drivers/dma/ioat/pci.c drivers/md/raid5.c
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig30
-rw-r--r--drivers/md/Makefile5
-rw-r--r--drivers/md/bitmap.c4
-rw-r--r--drivers/md/dm-crypt.c19
-rw-r--r--drivers/md/dm-delay.c26
-rw-r--r--drivers/md/dm-exception-store.c4
-rw-r--r--drivers/md/dm-exception-store.h4
-rw-r--r--drivers/md/dm-io.c14
-rw-r--r--drivers/md/dm-ioctl.c28
-rw-r--r--drivers/md/dm-linear.c15
-rw-r--r--drivers/md/dm-log-userspace-base.c696
-rw-r--r--drivers/md/dm-log-userspace-transfer.c276
-rw-r--r--drivers/md/dm-log-userspace-transfer.h18
-rw-r--r--drivers/md/dm-log.c8
-rw-r--r--drivers/md/dm-mpath.c334
-rw-r--r--drivers/md/dm-path-selector.h8
-rw-r--r--drivers/md/dm-queue-length.c263
-rw-r--r--drivers/md/dm-raid1.c17
-rw-r--r--drivers/md/dm-region-hash.c2
-rw-r--r--drivers/md/dm-round-robin.c2
-rw-r--r--drivers/md/dm-service-time.c339
-rw-r--r--drivers/md/dm-snap-persistent.c4
-rw-r--r--drivers/md/dm-snap.c11
-rw-r--r--drivers/md/dm-stripe.c33
-rw-r--r--drivers/md/dm-sysfs.c9
-rw-r--r--drivers/md/dm-table.c463
-rw-r--r--drivers/md/dm.c1142
-rw-r--r--drivers/md/dm.h35
-rw-r--r--drivers/md/faulty.c21
-rw-r--r--drivers/md/linear.c220
-rw-r--r--drivers/md/linear.h12
-rw-r--r--drivers/md/md.c198
-rw-r--r--drivers/md/md.h14
-rw-r--r--drivers/md/multipath.c27
-rw-r--r--drivers/md/multipath.h6
-rw-r--r--drivers/md/raid0.c405
-rw-r--r--drivers/md/raid0.h10
-rw-r--r--drivers/md/raid1.c50
-rw-r--r--drivers/md/raid1.h6
-rw-r--r--drivers/md/raid10.c70
-rw-r--r--drivers/md/raid10.h6
-rw-r--r--drivers/md/raid5.c223
-rw-r--r--drivers/md/raid5.h8
43 files changed, 3987 insertions, 1098 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 09c0c6e49ab..2158377a135 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -257,6 +257,17 @@ config DM_MIRROR
Allow volume managers to mirror logical volumes, also
needed for live data migration tools such as 'pvmove'.
+config DM_LOG_USERSPACE
+ tristate "Mirror userspace logging (EXPERIMENTAL)"
+ depends on DM_MIRROR && EXPERIMENTAL && NET
+ select CONNECTOR
+ ---help---
+ The userspace logging module provides a mechanism for
+ relaying the dm-dirty-log API to userspace. Log designs
+ which are more suited to userspace implementation (e.g.
+ shared storage logs) or experimental logs can be implemented
+ by leveraging this framework.
+
config DM_ZERO
tristate "Zero target"
depends on BLK_DEV_DM
@@ -275,6 +286,25 @@ config DM_MULTIPATH
---help---
Allow volume managers to support multipath hardware.
+config DM_MULTIPATH_QL
+ tristate "I/O Path Selector based on the number of in-flight I/Os"
+ depends on DM_MULTIPATH
+ ---help---
+ This path selector is a dynamic load balancer which selects
+ the path with the least number of in-flight I/Os.
+
+ If unsure, say N.
+
+config DM_MULTIPATH_ST
+ tristate "I/O Path Selector based on the service time"
+ depends on DM_MULTIPATH
+ ---help---
+ This path selector is a dynamic load balancer which selects
+ the path expected to complete the incoming I/O in the shortest
+ time.
+
+ If unsure, say N.
+
config DM_DELAY
tristate "I/O delaying target (EXPERIMENTAL)"
depends on BLK_DEV_DM && EXPERIMENTAL
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 45cc5951d92..1dc4185bd78 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
dm-mirror-y += dm-raid1.o
+dm-log-userspace-y \
+ += dm-log-userspace-base.o dm-log-userspace-transfer.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o
raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
@@ -36,8 +38,11 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
obj-$(CONFIG_DM_DELAY) += dm-delay.o
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
+obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o
+obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
+obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
quiet_cmd_unroll = UNROLL $@
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 56df1cee8fb..3319c2fec28 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
if (sync_page_io(rdev->bdev, target,
- roundup(size, bdev_hardsect_size(rdev->bdev)),
+ roundup(size, bdev_logical_block_size(rdev->bdev)),
page, READ)) {
page->index = index;
attach_page_buffers(page, NULL); /* so that free_buffer will
@@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
int size = PAGE_SIZE;
if (page->index == bitmap->file_pages-1)
size = roundup(bitmap->last_page_size,
- bdev_hardsect_size(rdev->bdev));
+ bdev_logical_block_size(rdev->bdev));
/* Just make sure we aren't corrupting data or
* metadata
*/
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 53394e863c7..9933eb861c7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1132,6 +1132,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_crypt_queue;
}
+ ti->num_flush_requests = 1;
ti->private = cc;
return 0;
@@ -1189,6 +1190,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
{
struct dm_crypt_io *io;
+ struct crypt_config *cc;
+
+ if (unlikely(bio_empty_barrier(bio))) {
+ cc = ti->private;
+ bio->bi_bdev = cc->dev->bdev;
+ return DM_MAPIO_REMAPPED;
+ }
io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin);
@@ -1305,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
+static int crypt_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct crypt_config *cc = ti->private;
+
+ return fn(ti, cc->dev, cc->start, data);
+}
+
static struct target_type crypt_target = {
.name = "crypt",
- .version= {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
@@ -1318,6 +1334,7 @@ static struct target_type crypt_target = {
.resume = crypt_resume,
.message = crypt_message,
.merge = crypt_merge,
+ .iterate_devices = crypt_iterate_devices,
};
static int __init dm_crypt_init(void)
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 559dbb52bc8..4e5b843cd4d 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -197,6 +197,7 @@ out:
mutex_init(&dc->timer_lock);
atomic_set(&dc->may_delay, 1);
+ ti->num_flush_requests = 1;
ti->private = dc;
return 0;
@@ -278,8 +279,9 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
bio->bi_bdev = dc->dev_write->bdev;
- bio->bi_sector = dc->start_write +
- (bio->bi_sector - ti->begin);
+ if (bio_sectors(bio))
+ bio->bi_sector = dc->start_write +
+ (bio->bi_sector - ti->begin);
return delay_bio(dc, dc->write_delay, bio);
}
@@ -316,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type,
return 0;
}
+static int delay_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct delay_c *dc = ti->private;
+ int ret = 0;
+
+ ret = fn(ti, dc->dev_read, dc->start_read, data);
+ if (ret)
+ goto out;
+
+ if (dc->dev_write)
+ ret = fn(ti, dc->dev_write, dc->start_write, data);
+
+out:
+ return ret;
+}
+
static struct target_type delay_target = {
.name = "delay",
- .version = {1, 0, 2},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = delay_ctr,
.dtr = delay_dtr,
@@ -326,6 +345,7 @@ static struct target_type delay_target = {
.presuspend = delay_presuspend,
.resume = delay_resume,
.status = delay_status,
+ .iterate_devices = delay_iterate_devices,
};
static int __init dm_delay_init(void)
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index a2e26c24214..c3ae51584b1 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store,
}
/* Validate the chunk size against the device block size */
- if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+ if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
@@ -216,7 +216,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
return -EINVAL;
}
- type = get_type(argv[1]);
+ type = get_type(&persistent);
if (!type) {
ti->error = "Exception store type not recognised";
r = -EINVAL;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 0a2e6e7f67b..2442c8c0789 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -111,7 +111,7 @@ struct dm_exception_store {
/*
* Funtions to manipulate consecutive chunks
*/
-# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+# if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
# define DM_CHUNK_CONSECUTIVE_BITS 8
# define DM_CHUNK_NUMBER_BITS 56
@@ -156,7 +156,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
- return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+ return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index e73aabd61cd..3a2e6a2f8bd 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -22,6 +22,7 @@ struct dm_io_client {
/* FIXME: can we shrink this ? */
struct io {
unsigned long error_bits;
+ unsigned long eopnotsupp_bits;
atomic_t count;
struct task_struct *sleeper;
struct dm_io_client *client;
@@ -107,8 +108,11 @@ static inline unsigned bio_get_region(struct bio *bio)
*---------------------------------------------------------------*/
static void dec_count(struct io *io, unsigned int region, int error)
{
- if (error)
+ if (error) {
set_bit(region, &io->error_bits);
+ if (error == -EOPNOTSUPP)
+ set_bit(region, &io->eopnotsupp_bits);
+ }
if (atomic_dec_and_test(&io->count)) {
if (io->sleeper)
@@ -360,7 +364,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
return -EIO;
}
+retry:
io.error_bits = 0;
+ io.eopnotsupp_bits = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */
io.sleeper = current;
io.client = client;
@@ -377,6 +383,11 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
}
set_current_state(TASK_RUNNING);
+ if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
+ rw &= ~(1 << BIO_RW_BARRIER);
+ goto retry;
+ }
+
if (error_bits)
*error_bits = io.error_bits;
@@ -397,6 +408,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
+ io->eopnotsupp_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = NULL;
io->client = client;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 823ceba6efa..7f77f18fcaf 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -276,7 +276,7 @@ retry:
up_write(&_hash_lock);
}
-static int dm_hash_rename(const char *old, const char *new)
+static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
{
char *new_name, *old_name;
struct hash_cell *hc;
@@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new)
dm_table_put(table);
}
- dm_kobject_uevent(hc->md);
+ dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
dm_put(hc->md);
up_write(&_hash_lock);
@@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
__hash_remove(hc);
up_write(&_hash_lock);
+
+ dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
+
dm_put(md);
param->data_size = 0;
return 0;
@@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
return r;
param->data_size = 0;
- return dm_hash_rename(param->name, new_name);
+ return dm_hash_rename(param->event_nr, param->name, new_name);
}
static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param)
if (dm_suspended(md))
r = dm_resume(md);
- if (!r)
+
+ if (!r) {
+ dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
r = __dev_status(md, param);
+ }
dm_put(md);
return r;
@@ -1044,6 +1050,12 @@ static int populate_table(struct dm_table *table,
next = spec->next;
}
+ r = dm_table_set_type(table);
+ if (r) {
+ DMWARN("unable to set table type");
+ return r;
+ }
+
return dm_table_complete(table);
}
@@ -1089,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
goto out;
}
+ r = dm_table_alloc_md_mempools(t);
+ if (r) {
+ DMWARN("unable to allocate mempools for this table");
+ dm_table_destroy(t);
+ goto out;
+ }
+
down_write(&_hash_lock);
hc = dm_get_mdptr(md);
if (!hc || hc->md != md) {
@@ -1513,6 +1532,7 @@ static const struct file_operations _ctl_fops = {
static struct miscdevice _dm_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = DM_NAME,
+ .devnode = "mapper/control",
.fops = &_ctl_fops
};
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 79fb53e51c7..9184b6deb86 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ ti->num_flush_requests = 1;
ti->private = lc;
return 0;
@@ -81,7 +82,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
struct linear_c *lc = ti->private;
bio->bi_bdev = lc->dev->bdev;
- bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
+ if (bio_sectors(bio))
+ bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
}
static int linear_map(struct dm_target *ti, struct bio *bio,
@@ -132,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
+static int linear_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct linear_c *lc = ti->private;
+
+ return fn(ti, lc->dev, lc->start, data);
+}
+
static struct target_type linear_target = {
.name = "linear",
- .version= {1, 0, 3},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = linear_ctr,
.dtr = linear_dtr,
@@ -142,6 +152,7 @@ static struct target_type linear_target = {
.status = linear_status,
.ioctl = linear_ioctl,
.merge = linear_merge,
+ .iterate_devices = linear_iterate_devices,
};
int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
new file mode 100644
index 00000000000..e69b9656099
--- /dev/null
+++ b/drivers/md/dm-log-userspace-base.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include <linux/bio.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-log-userspace.h>
+
+#include "dm-log-userspace-transfer.h"
+
+struct flush_entry {
+ int type;
+ region_t region;
+ struct list_head list;
+};
+
+struct log_c {
+ struct dm_target *ti;
+ uint32_t region_size;
+ region_t region_count;
+ char uuid[DM_UUID_LEN];
+
+ char *usr_argv_str;
+ uint32_t usr_argc;
+
+ /*
+ * in_sync_hint gets set when doing is_remote_recovering. It
+ * represents the first region that needs recovery. IOW, the
+ * first zero bit of sync_bits. This can be useful for to limit
+ * traffic for calls like is_remote_recovering and get_resync_work,
+ * but be take care in its use for anything else.
+ */
+ uint64_t in_sync_hint;
+
+ spinlock_t flush_lock;
+ struct list_head flush_list; /* only for clear and mark requests */
+};
+
+static mempool_t *flush_entry_pool;
+
+static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ return kmalloc(sizeof(struct flush_entry), gfp_mask);
+}
+
+static void flush_entry_free(void *element, void *pool_data)
+{
+ kfree(element);
+}
+
+static int userspace_do_request(struct log_c *lc, const char *uuid,
+ int request_type, char *data, size_t data_size,
+ char *rdata, size_t *rdata_size)
+{
+ int r;
+
+ /*
+ * If the server isn't there, -ESRCH is returned,
+ * and we must keep trying until the server is
+ * restored.
+ */
+retry:
+ r = dm_consult_userspace(uuid, request_type, data,
+ data_size, rdata, rdata_size);
+
+ if (r != -ESRCH)
+ return r;
+
+ DMERR(" Userspace log server not found.");
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(2*HZ);
+ DMWARN("Attempting to contact userspace log server...");
+ r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+ strlen(lc->usr_argv_str) + 1,
+ NULL, NULL);
+ if (!r)
+ break;
+ }
+ DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
+ r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+ 0, NULL, NULL);
+ if (!r)
+ goto retry;
+
+ DMERR("Error trying to resume userspace log: %d", r);
+
+ return -ESRCH;
+}
+
+static int build_constructor_string(struct dm_target *ti,
+ unsigned argc, char **argv,
+ char **ctr_str)
+{
+ int i, str_size;
+ char *str = NULL;
+
+ *ctr_str = NULL;
+
+ for (i = 0, str_size = 0; i < argc; i++)
+ str_size += strlen(argv[i]) + 1; /* +1 for space between args */
+
+ str_size += 20; /* Max number of chars in a printed u64 number */
+
+ str = kzalloc(str_size, GFP_KERNEL);
+ if (!str) {
+ DMWARN("Unable to allocate memory for constructor string");
+ return -ENOMEM;
+ }
+
+ for (i = 0, str_size = 0; i < argc; i++)
+ str_size += sprintf(str + str_size, "%s ", argv[i]);
+ str_size += sprintf(str + str_size, "%llu",
+ (unsigned long long)ti->len);
+
+ *ctr_str = str;
+ return str_size;
+}
+
+/*
+ * userspace_ctr
+ *
+ * argv contains:
+ * <UUID> <other args>
+ * Where 'other args' is the userspace implementation specific log
+ * arguments. An example might be:
+ * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
+ *
+ * So, this module will strip off the <UUID> for identification purposes
+ * when communicating with userspace about a log; but will pass on everything
+ * else.
+ */
+static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
+ unsigned argc, char **argv)
+{
+ int r = 0;
+ int str_size;
+ char *ctr_str = NULL;
+ struct log_c *lc = NULL;
+ uint64_t rdata;
+ size_t rdata_size = sizeof(rdata);
+
+ if (argc < 3) {
+ DMWARN("Too few arguments to userspace dirty log");
+ return -EINVAL;
+ }
+
+ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+ if (!lc) {
+ DMWARN("Unable to allocate userspace log context.");
+ return -ENOMEM;
+ }
+
+ lc->ti = ti;
+
+ if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
+ DMWARN("UUID argument too long.");
+ kfree(lc);
+ return -EINVAL;
+ }
+
+ strncpy(lc->uuid, argv[0], DM_UUID_LEN);
+ spin_lock_init(&lc->flush_lock);
+ INIT_LIST_HEAD(&lc->flush_list);
+
+ str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
+ if (str_size < 0) {
+ kfree(lc);
+ return str_size;
+ }
+
+ /* Send table string */
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+ ctr_str, str_size, NULL, NULL);
+
+ if (r == -ESRCH) {
+ DMERR("Userspace log server not found");
+ goto out;
+ }
+
+ /* Since the region size does not change, get it now */
+ rdata_size = sizeof(rdata);
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+ NULL, 0, (char *)&rdata, &rdata_size);
+
+ if (r) {
+ DMERR("Failed to get region size of dirty log");
+ goto out;
+ }
+
+ lc->region_size = (uint32_t)rdata;
+ lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
+
+out:
+ if (r) {
+ kfree(lc);
+ kfree(ctr_str);
+ } else {
+ lc->usr_argv_str = ctr_str;
+ lc->usr_argc = argc;
+ log->context = lc;
+ }
+
+ return r;
+}
+
+static void userspace_dtr(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+ NULL, 0,
+ NULL, NULL);
+
+ kfree(lc->usr_argv_str);
+ kfree(lc);
+
+ return;
+}
+
+static int userspace_presuspend(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+ NULL, 0,
+ NULL, NULL);
+
+ return r;
+}
+
+static int userspace_postsuspend(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+ NULL, 0,
+ NULL, NULL);
+
+ return r;
+}
+
+static int userspace_resume(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ lc->in_sync_hint = 0;
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+ NULL, 0,
+ NULL, NULL);
+
+ return r;
+}
+
+static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
+{
+ struct log_c *lc = log->context;
+
+ return lc->region_size;
+}
+
+/*
+ * userspace_is_clean
+ *
+ * Check whether a region is clean. If there is any sort of
+ * failure when consulting the server, we return not clean.
+ *
+ * Returns: 1 if clean, 0 otherwise
+ */
+static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
+{
+ int r;
+ uint64_t region64 = (uint64_t)region;
+ int64_t is_clean;
+ size_t rdata_size;
+ struct log_c *lc = log->context;
+
+ rdata_size = sizeof(is_clean);
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
+ (char *)&region64, sizeof(region64),
+ (char *)&is_clean, &rdata_size);
+
+ return (r) ? 0 : (int)is_clean;
+}
+
+/*
+ * userspace_in_sync
+ *
+ * Check if the region is in-sync. If there is any sort
+ * of failure when consulting the server, we assume that
+ * the region is not in sync.
+ *
+ * If 'can_block' is set, return immediately
+ *
+ * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
+ */
+static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
+ int can_block)
+{
+ int r;
+ uint64_t region64 = region;
+ int64_t in_sync;
+ size_t rdata_size;
+ struct log_c *lc = log->context;
+
+ /*
+ * We can never respond directly - even if in_sync_hint is
+ * set. This is because another machine could see a device
+ * failure and mark the region out-of-sync. If we don't go
+ * to userspace to ask, we might think the region is in-sync
+ * and allow a read to pick up data that is stale. (This is
+ * very unlikely if a device actually fails; but it is very
+ * likely if a connection to one device from one machine fails.)
+ *
+ * There still might be a problem if the mirror caches the region
+ * state as in-sync... but then this call would not be made. So,
+ * that is a mirror problem.
+ */
+ if (!can_block)
+ return -