aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/Kconfig30
-rw-r--r--drivers/md/Makefile5
-rw-r--r--drivers/md/dm-crypt.c19
-rw-r--r--drivers/md/dm-delay.c26
-rw-r--r--drivers/md/dm-exception-store.c2
-rw-r--r--drivers/md/dm-exception-store.h2
-rw-r--r--drivers/md/dm-io.c14
-rw-r--r--drivers/md/dm-ioctl.c27
-rw-r--r--drivers/md/dm-linear.c15
-rw-r--r--drivers/md/dm-log-userspace-base.c696
-rw-r--r--drivers/md/dm-log-userspace-transfer.c276
-rw-r--r--drivers/md/dm-log-userspace-transfer.h18
-rw-r--r--drivers/md/dm-log.c9
-rw-r--r--drivers/md/dm-mpath.c334
-rw-r--r--drivers/md/dm-path-selector.h8
-rw-r--r--drivers/md/dm-queue-length.c263
-rw-r--r--drivers/md/dm-raid1.c17
-rw-r--r--drivers/md/dm-region-hash.c2
-rw-r--r--drivers/md/dm-round-robin.c2
-rw-r--r--drivers/md/dm-service-time.c339
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-snap.c11
-rw-r--r--drivers/md/dm-stripe.c33
-rw-r--r--drivers/md/dm-sysfs.c9
-rw-r--r--drivers/md/dm-table.c465
-rw-r--r--drivers/md/dm.c1133
-rw-r--r--drivers/md/dm.h35
27 files changed, 3377 insertions, 415 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 36e0675be9f..020f9573fd8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -231,6 +231,17 @@ config DM_MIRROR
Allow volume managers to mirror logical volumes, also
needed for live data migration tools such as 'pvmove'.
+config DM_LOG_USERSPACE
+ tristate "Mirror userspace logging (EXPERIMENTAL)"
+ depends on DM_MIRROR && EXPERIMENTAL && NET
+ select CONNECTOR
+ ---help---
+ The userspace logging module provides a mechanism for
+ relaying the dm-dirty-log API to userspace. Log designs
+ which are more suited to userspace implementation (e.g.
+ shared storage logs) or experimental logs can be implemented
+ by leveraging this framework.
+
config DM_ZERO
tristate "Zero target"
depends on BLK_DEV_DM
@@ -249,6 +260,25 @@ config DM_MULTIPATH
---help---
Allow volume managers to support multipath hardware.
+config DM_MULTIPATH_QL
+ tristate "I/O Path Selector based on the number of in-flight I/Os"
+ depends on DM_MULTIPATH
+ ---help---
+ This path selector is a dynamic load balancer which selects
+ the path with the least number of in-flight I/Os.
+
+ If unsure, say N.
+
+config DM_MULTIPATH_ST
+ tristate "I/O Path Selector based on the service time"
+ depends on DM_MULTIPATH
+ ---help---
+ This path selector is a dynamic load balancer which selects
+ the path expected to complete the incoming I/O in the shortest
+ time.
+
+ If unsure, say N.
+
config DM_DELAY
tristate "I/O delaying target (EXPERIMENTAL)"
depends on BLK_DEV_DM && EXPERIMENTAL
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 45cc5951d92..1dc4185bd78 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
dm-mirror-y += dm-raid1.o
+dm-log-userspace-y \
+ += dm-log-userspace-base.o dm-log-userspace-transfer.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o
raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
@@ -36,8 +38,11 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
obj-$(CONFIG_DM_DELAY) += dm-delay.o
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
+obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o
+obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
+obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
quiet_cmd_unroll = UNROLL $@
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 53394e863c7..9933eb861c7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1132,6 +1132,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_crypt_queue;
}
+ ti->num_flush_requests = 1;
ti->private = cc;
return 0;
@@ -1189,6 +1190,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
{
struct dm_crypt_io *io;
+ struct crypt_config *cc;
+
+ if (unlikely(bio_empty_barrier(bio))) {
+ cc = ti->private;
+ bio->bi_bdev = cc->dev->bdev;
+ return DM_MAPIO_REMAPPED;
+ }
io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin);
@@ -1305,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
+static int crypt_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct crypt_config *cc = ti->private;
+
+ return fn(ti, cc->dev, cc->start, data);
+}
+
static struct target_type crypt_target = {
.name = "crypt",
- .version= {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
@@ -1318,6 +1334,7 @@ static struct target_type crypt_target = {
.resume = crypt_resume,
.message = crypt_message,
.merge = crypt_merge,
+ .iterate_devices = crypt_iterate_devices,
};
static int __init dm_crypt_init(void)
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 559dbb52bc8..4e5b843cd4d 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -197,6 +197,7 @@ out:
mutex_init(&dc->timer_lock);
atomic_set(&dc->may_delay, 1);
+ ti->num_flush_requests = 1;
ti->private = dc;
return 0;
@@ -278,8 +279,9 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
bio->bi_bdev = dc->dev_write->bdev;
- bio->bi_sector = dc->start_write +
- (bio->bi_sector - ti->begin);
+ if (bio_sectors(bio))
+ bio->bi_sector = dc->start_write +
+ (bio->bi_sector - ti->begin);
return delay_bio(dc, dc->write_delay, bio);
}
@@ -316,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type,
return 0;
}
+static int delay_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct delay_c *dc = ti->private;
+ int ret = 0;
+
+ ret = fn(ti, dc->dev_read, dc->start_read, data);
+ if (ret)
+ goto out;
+
+ if (dc->dev_write)
+ ret = fn(ti, dc->dev_write, dc->start_write, data);
+
+out:
+ return ret;
+}
+
static struct target_type delay_target = {
.name = "delay",
- .version = {1, 0, 2},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = delay_ctr,
.dtr = delay_dtr,
@@ -326,6 +345,7 @@ static struct target_type delay_target = {
.presuspend = delay_presuspend,
.resume = delay_resume,
.status = delay_status,
+ .iterate_devices = delay_iterate_devices,
};
static int __init dm_delay_init(void)
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 75d8081a904..c3ae51584b1 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -216,7 +216,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
return -EINVAL;
}
- type = get_type(argv[1]);
+ type = get_type(&persistent);
if (!type) {
ti->error = "Exception store type not recognised";
r = -EINVAL;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index c92701dc500..2442c8c0789 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -156,7 +156,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
- return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+ return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index e73aabd61cd..3a2e6a2f8bd 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -22,6 +22,7 @@ struct dm_io_client {
/* FIXME: can we shrink this ? */
struct io {
unsigned long error_bits;
+ unsigned long eopnotsupp_bits;
atomic_t count;
struct task_struct *sleeper;
struct dm_io_client *client;
@@ -107,8 +108,11 @@ static inline unsigned bio_get_region(struct bio *bio)
*---------------------------------------------------------------*/
static void dec_count(struct io *io, unsigned int region, int error)
{
- if (error)
+ if (error) {
set_bit(region, &io->error_bits);
+ if (error == -EOPNOTSUPP)
+ set_bit(region, &io->eopnotsupp_bits);
+ }
if (atomic_dec_and_test(&io->count)) {
if (io->sleeper)
@@ -360,7 +364,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
return -EIO;
}
+retry:
io.error_bits = 0;
+ io.eopnotsupp_bits = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */
io.sleeper = current;
io.client = client;
@@ -377,6 +383,11 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
}
set_current_state(TASK_RUNNING);
+ if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
+ rw &= ~(1 << BIO_RW_BARRIER);
+ goto retry;
+ }
+
if (error_bits)
*error_bits = io.error_bits;
@@ -397,6 +408,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
+ io->eopnotsupp_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = NULL;
io->client = client;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1128d3fba79..7f77f18fcaf 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -276,7 +276,7 @@ retry:
up_write(&_hash_lock);
}
-static int dm_hash_rename(const char *old, const char *new)
+static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
{
char *new_name, *old_name;
struct hash_cell *hc;
@@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new)
dm_table_put(table);
}
- dm_kobject_uevent(hc->md);
+ dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
dm_put(hc->md);
up_write(&_hash_lock);
@@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
__hash_remove(hc);
up_write(&_hash_lock);
+
+ dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
+
dm_put(md);
param->data_size = 0;
return 0;
@@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
return r;
param->data_size = 0;
- return dm_hash_rename(param->name, new_name);
+ return dm_hash_rename(param->event_nr, param->name, new_name);
}
static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param)
if (dm_suspended(md))
r = dm_resume(md);
- if (!r)
+
+ if (!r) {
+ dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
r = __dev_status(md, param);
+ }
dm_put(md);
return r;
@@ -1044,6 +1050,12 @@ static int populate_table(struct dm_table *table,
next = spec->next;
}
+ r = dm_table_set_type(table);
+ if (r) {
+ DMWARN("unable to set table type");
+ return r;
+ }
+
return dm_table_complete(table);
}
@@ -1089,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
goto out;
}
+ r = dm_table_alloc_md_mempools(t);
+ if (r) {
+ DMWARN("unable to allocate mempools for this table");
+ dm_table_destroy(t);
+ goto out;
+ }
+
down_write(&_hash_lock);
hc = dm_get_mdptr(md);
if (!hc || hc->md != md) {
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 79fb53e51c7..9184b6deb86 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ ti->num_flush_requests = 1;
ti->private = lc;
return 0;
@@ -81,7 +82,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
struct linear_c *lc = ti->private;
bio->bi_bdev = lc->dev->bdev;
- bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
+ if (bio_sectors(bio))
+ bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
}
static int linear_map(struct dm_target *ti, struct bio *bio,
@@ -132,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
+static int linear_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct linear_c *lc = ti->private;
+
+ return fn(ti, lc->dev, lc->start, data);
+}
+
static struct target_type linear_target = {
.name = "linear",
- .version= {1, 0, 3},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = linear_ctr,
.dtr = linear_dtr,
@@ -142,6 +152,7 @@ static struct target_type linear_target = {
.status = linear_status,
.ioctl = linear_ioctl,
.merge = linear_merge,
+ .iterate_devices = linear_iterate_devices,
};
int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
new file mode 100644
index 00000000000..e69b9656099
--- /dev/null
+++ b/drivers/md/dm-log-userspace-base.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include <linux/bio.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-log-userspace.h>
+
+#include "dm-log-userspace-transfer.h"
+
+struct flush_entry {
+ int type;
+ region_t region;
+ struct list_head list;
+};
+
+struct log_c {
+ struct dm_target *ti;
+ uint32_t region_size;
+ region_t region_count;
+ char uuid[DM_UUID_LEN];
+
+ char *usr_argv_str;
+ uint32_t usr_argc;
+
+ /*
+ * in_sync_hint gets set when doing is_remote_recovering. It
+ * represents the first region that needs recovery. IOW, the
+ * first zero bit of sync_bits. This can be useful for to limit
+ * traffic for calls like is_remote_recovering and get_resync_work,
+ * but be take care in its use for anything else.
+ */
+ uint64_t in_sync_hint;
+
+ spinlock_t flush_lock;
+ struct list_head flush_list; /* only for clear and mark requests */
+};
+
+static mempool_t *flush_entry_pool;
+
+static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ return kmalloc(sizeof(struct flush_entry), gfp_mask);
+}
+
+static void flush_entry_free(void *element, void *pool_data)
+{
+ kfree(element);
+}
+
+static int userspace_do_request(struct log_c *lc, const char *uuid,
+ int request_type, char *data, size_t data_size,
+ char *rdata, size_t *rdata_size)
+{
+ int r;
+
+ /*
+ * If the server isn't there, -ESRCH is returned,
+ * and we must keep trying until the server is
+ * restored.
+ */
+retry:
+ r = dm_consult_userspace(uuid, request_type, data,
+ data_size, rdata, rdata_size);
+
+ if (r != -ESRCH)
+ return r;
+
+ DMERR(" Userspace log server not found.");
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(2*HZ);
+ DMWARN("Attempting to contact userspace log server...");
+ r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+ strlen(lc->usr_argv_str) + 1,
+ NULL, NULL);
+ if (!r)
+ break;
+ }
+ DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
+ r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+ 0, NULL, NULL);
+ if (!r)
+ goto retry;
+
+ DMERR("Error trying to resume userspace log: %d", r);
+
+ return -ESRCH;
+}
+
+static int build_constructor_string(struct dm_target *ti,
+ unsigned argc, char **argv,
+ char **ctr_str)
+{
+ int i, str_size;
+ char *str = NULL;
+
+ *ctr_str = NULL;
+
+ for (i = 0, str_size = 0; i < argc; i++)
+ str_size += strlen(argv[i]) + 1; /* +1 for space between args */
+
+ str_size += 20; /* Max number of chars in a printed u64 number */
+
+ str = kzalloc(str_size, GFP_KERNEL);
+ if (!str) {
+ DMWARN("Unable to allocate memory for constructor string");
+ return -ENOMEM;
+ }
+
+ for (i = 0, str_size = 0; i < argc; i++)
+ str_size += sprintf(str + str_size, "%s ", argv[i]);
+ str_size += sprintf(str + str_size, "%llu",
+ (unsigned long long)ti->len);
+
+ *ctr_str = str;
+ return str_size;
+}
+
+/*
+ * userspace_ctr
+ *
+ * argv contains:
+ * <UUID> <other args>
+ * Where 'other args' is the userspace implementation specific log
+ * arguments. An example might be:
+ * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
+ *
+ * So, this module will strip off the <UUID> for identification purposes
+ * when communicating with userspace about a log; but will pass on everything
+ * else.
+ */
+static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
+ unsigned argc, char **argv)
+{
+ int r = 0;
+ int str_size;
+ char *ctr_str = NULL;
+ struct log_c *lc = NULL;
+ uint64_t rdata;
+ size_t rdata_size = sizeof(rdata);
+
+ if (argc < 3) {
+ DMWARN("Too few arguments to userspace dirty log");
+ return -EINVAL;
+ }
+
+ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+ if (!lc) {
+ DMWARN("Unable to allocate userspace log context.");
+ return -ENOMEM;
+ }
+
+ lc->ti = ti;
+
+ if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
+ DMWARN("UUID argument too long.");
+ kfree(lc);
+ return -EINVAL;
+ }
+
+ strncpy(lc->uuid, argv[0], DM_UUID_LEN);
+ spin_lock_init(&lc->flush_lock);
+ INIT_LIST_HEAD(&lc->flush_list);
+
+ str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
+ if (str_size < 0) {
+ kfree(lc);
+ return str_size;
+ }
+
+ /* Send table string */
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+ ctr_str, str_size, NULL, NULL);
+
+ if (r == -ESRCH) {
+ DMERR("Userspace log server not found");
+ goto out;
+ }
+
+ /* Since the region size does not change, get it now */
+ rdata_size = sizeof(rdata);
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+ NULL, 0, (char *)&rdata, &rdata_size);
+
+ if (r) {
+ DMERR("Failed to get region size of dirty log");
+ goto out;
+ }
+
+ lc->region_size = (uint32_t)rdata;
+ lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
+
+out:
+ if (r) {
+ kfree(lc);
+ kfree(ctr_str);
+ } else {
+ lc->usr_argv_str = ctr_str;
+ lc->usr_argc = argc;
+ log->context = lc;
+ }
+
+ return r;
+}
+
+static void userspace_dtr(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+ NULL, 0,
+ NULL, NULL);
+
+ kfree(lc->usr_argv_str);
+ kfree(lc);
+
+ return;
+}
+
+static int userspace_presuspend(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+ NULL, 0,
+ NULL, NULL);
+
+ return r;
+}
+
+static int userspace_postsuspend(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+ NULL, 0,
+ NULL, NULL);
+
+ return r;
+}
+
+static int userspace_resume(struct dm_dirty_log *log)
+{
+ int r;
+ struct log_c *lc = log->context;
+
+ lc->in_sync_hint = 0;
+ r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+ NULL, 0,
+ NULL, NULL);
+
+ return r;
+}
+
+static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
+{
+ struct log_c *lc = log->context;
+
+ return lc->region_size;
+}
+
+/*
+ * userspace_is_clean
+ *
+ * Check whether a region is clean. If there is any sort of
+ * failure when consulting the server, we return not clean.
+ *
+ * Returns: 1 if clean, 0 otherwise
+ */
+static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
+{
+ int r;
+ uint64_t region64 = (uint64_t)region;
+ int64_t is_clean;
+ size_t rdata_size;
+ struct log_c *lc = log->context;
+
+ rdata_size = sizeof(is_clean);
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
+ (char *)&region64, sizeof(region64),
+ (char *)&is_clean, &rdata_size);
+
+ return (r) ? 0 : (int)is_clean;
+}
+
+/*
+ * userspace_in_sync
+ *
+ * Check if the region is in-sync. If there is any sort
+ * of failure when consulting the server, we assume that
+ * the region is not in sync.
+ *
+ * If 'can_block' is set, return immediately
+ *
+ * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
+ */
+static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
+ int can_block)
+{
+ int r;
+ uint64_t region64 = region;
+ int64_t in_sync;
+ size_t rdata_size;
+ struct log_c *lc = log->context;
+
+ /*
+ * We can never respond directly - even if in_sync_hint is
+ * set. This is because another machine could see a device
+ * failure and mark the region out-of-sync. If we don't go
+ * to userspace to ask, we might think the region is in-sync
+ * and allow a read to pick up data that is stale. (This is
+ * very unlikely if a device actually fails; but it is very
+ * likely if a connection to one device from one machine fails.)
+ *
+ * There still might be a problem if the mirror caches the region
+ * state as in-sync... but then this call would not be made. So,
+ * that is a mirror problem.
+ */
+ if (!can_block)
+ return -EWOULDBLOCK;
+
+ rdata_size = sizeof(in_sync);
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
+ (char *)&region64, sizeof(region64),
+ (char *)&in_sync, &rdata_size);
+ return (r) ? 0 : (int)in_sync;
+}
+
+/*
+ * userspace_flush
+ *
+ * This function is ok to block.
+ * The flush happens in two stages. First, it sends all
+ * clear/mark requests that are on the list. Then it
+ * tells the server to commit them. This gives the
+ * server a chance to optimise the commit, instead of
+ * doing it for every request.
+ *
+ * Additionally, we could implement another thread that
+ * sends the requests up to the server - reducing the
+ * load on flush. Then the flush would have less in
+ * the list and be responsible for the finishing commit.
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+static int userspace_flush(struct dm_dirty_log *log)
+{
+ int r = 0;
+ unsigned long flags;
+ struct log_c *lc = log->context;
+ LIST_HEAD(flush_list);
+ struct flush_entry *fe, *tmp_fe;
+
+ spin_lock_irqsave(&lc->flush_lock, flags);
+ list_splice_init(&lc->flush_list, &flush_list);
+ spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+ if (list_empty(&flush_list))
+ return 0;
+
+ /*
+ * FIXME: Count up requests, group request types,
+ * allocate memory to stick all requests in and
+ * send to server in one go. Failing the allocation,
+ * do it one by one.
+ */
+
+ list_for_each_entry(fe, &flush_list, list) {
+ r = userspace_do_request(lc, lc->uuid, fe->type,
+ (char *)&fe->region,
+ sizeof(fe->region),
+ NULL, NULL);
+ if (r)
+ goto fail;
+ }
+
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
+ NULL, 0, NULL, NULL);
+
+fail:
+ /*
+ * We can safely remove these entries, even if failure.
+ * Calling code will receive an error and will know that
+ * the log facility has failed.
+ */
+ list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
+ list_del(&fe->list);
+ mempool_free(fe, flush_entry_pool);
+ }
+
+ if (r)
+ dm_table_event(lc->ti->table);
+
+ return r;
+}
+
+/*
+ * userspace_mark_region
+ *
+ * This function should avoid blocking unless absolutely required.
+ * (Memory allocation is valid for blocking.)
+ */
+static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
+{
+ unsigned long flags;
+ struct log_c *lc = log->context;
+ struct flush_entry *fe;
+
+ /* Wait for an allocation, but _never_ fail */
+ fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
+ BUG_ON(!fe);
+
+ spin_lock_irqsave(&lc->flush_lock, flags);
+ fe->type = DM_ULOG_MARK_REGION;
+ fe->region = region;
+ list_add(&fe->list, &lc->flush_list);
+ spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+ return;
+}
+
+/*
+ * userspace_clear_region
+ *
+ * This function must not block.
+ * So, the alloc can't block. In the worst case, it is ok to
+ * fail. It would simply mean we can't clear the region.
+ * Does nothing to current sync context, but does mean
+ * the region will be re-sync'ed on a reload of the mirror
+ * even though it is in-sync.
+ */
+static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
+{
+ unsigned long flags;
+ struct log_c *lc = log->context;
+ struct flush_entry *fe;
+
+ /*
+ * If we fail to allocate, we skip the clearing of
+ * the region. This doesn't hurt us in any way, except
+ * to cause the region to be resync'ed when the
+ * device is activated next time.
+ */
+ fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
+ if (!fe) {
+ DMERR("Failed to allocate memory to clear region.");
+ return;
+ }
+
+ spin_lock_irqsave(&lc->flush_lock, flags);
+ fe->type = DM_ULOG_CLEAR_REGION;
+ fe->region = region;
+ list_add(&fe->list, &lc->flush_list);
+ spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+ return;
+}
+
+/*
+ * userspace_get_resync_work
+ *
+ * Get a region that needs recovery. It is valid to return
+ * an error for this function.
+ *
+ * Returns: 1 if region filled, 0 if no work, <0 on error
+ */
+static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
+{
+ int r;
+ size_t rdata_size;
+ struct log_c *lc = log->context;
+ struct {
+ int64_t i; /* 64-bit for mix arch compatibility */
+ region_t r;
+ } pkg;
+
+ if (lc->in_sync_hint >= lc->region_count)
+ return 0;
+
+ rdata_size = sizeof(pkg);
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
+ NULL, 0,
+ (char *)&pkg, &rdata_size);
+
+ *region = pkg.r;
+ return (r) ? r : (int)pkg.i;
+}
+
+/*
+ * userspace_set_region_sync
+ *
+ * Set the sync status of a given region. This function
+ * must not fail.
+ */
+static void userspace_set_region_sync(struct dm_dirty_log *log,
+ region_t region, int in_sync)
+{
+ int r;
+ struct log_c *lc = log->context;
+ struct {
+ region_t r;
+ int64_t i;
+ } pkg;
+
+ pkg.r = region;
+ pkg.i = (int64_t)in_sync;
+
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
+ (char *)&pkg, sizeof(pkg),
+ NULL, NULL);
+
+ /*
+ * It would be nice to be able to report failures.
+ * However, it is easy emough to detect and resolve.
+ */
+ return;
+}
+
+/*
+ * userspace_get_sync_count
+ *
+ * If there is any sort of failure when consulting the server,
+ * we assume that the sync count is zero.
+ *
+ * Returns: sync count on success, 0 on failure
+ */
+static region_t userspace_get_sync_count(struct dm_dirty_log *log)
+{
+ int r;
+ size_t rdata_size;
+ uint64_t sync_count;
+ struct log_c *lc = log->context;
+
+ rdata_size = sizeof(sync_count);
+ r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
+ NULL, 0,
+ (char *)&sync_count, &rdata_size);
+
+ if (r)
+ return 0;
+
+ if (sync_count >= lc->region_count)
+ lc->in_sync_hint = lc->region_count;
+
+ return (region_t)sync_count;
+}
+
+/*
+ * userspace_status
+ *
+ * Returns: amount of space consumed
+ */
+static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
+ char *result, unsigned maxlen)
+{
+ int r = 0;
+ size_t sz = (size_t)maxlen;
+ struct log_c *lc = log->context;
+
+ switch (status_type) {
+ case STATUSTYPE_INFO: