diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-24 10:26:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-24 10:26:54 -0700 |
commit | c3cb5e193937c7aa50c323e7933507020bd26340 (patch) | |
tree | ea36213ccd29dc4caf2f729fd51b2d489b591a31 | |
parent | ea94b5034bbebc964115f119d6cd330757fce7f9 (diff) | |
parent | f40c67f0f7e2767f80f7cbcbc1ab86c4113c202e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (48 commits)
dm mpath: change to be request based
dm: disable interrupt when taking map_lock
dm: do not set QUEUE_ORDERED_DRAIN if request based
dm: enable request based option
dm: prepare for request based option
dm raid1: add userspace log
dm: calculate queue limits during resume not load
dm log: fix create_log_context to use logical_block_size of log device
dm target:s introduce iterate devices fn
dm table: establish queue limits by copying table limits
dm table: replace struct io_restrictions with struct queue_limits
dm table: validate device logical_block_size
dm table: ensure targets are aligned to logical_block_size
dm ioctl: support cookies for udev
dm: sysfs add suspended attribute
dm table: improve warning message when devices not freed before destruction
dm mpath: add service time load balancer
dm mpath: add queue length load balancer
dm mpath: add start_io and nr_bytes to path selectors
dm snapshot: use barrier when writing exception store
...
35 files changed, 3993 insertions, 435 deletions
diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt new file mode 100644 index 00000000000..994dd75475a --- /dev/null +++ b/Documentation/device-mapper/dm-log.txt @@ -0,0 +1,54 @@ +Device-Mapper Logging +===================== +The device-mapper logging code is used by some of the device-mapper +RAID targets to track regions of the disk that are not consistent. +A region (or portion of the address space) of the disk may be +inconsistent because a RAID stripe is currently being operated on or +a machine died while the region was being altered. In the case of +mirrors, a region would be considered dirty/inconsistent while you +are writing to it because the writes need to be replicated for all +the legs of the mirror and may not reach the legs at the same time. +Once all writes are complete, the region is considered clean again. + +There is a generic logging interface that the device-mapper RAID +implementations use to perform logging operations (see +dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different +logging implementations are available and provide different +capabilities. The list includes: + +Type Files +==== ===== +disk drivers/md/dm-log.c +core drivers/md/dm-log.c +userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h + +The "disk" log type +------------------- +This log implementation commits the log state to disk. This way, the +logging state survives reboots/crashes. + +The "core" log type +------------------- +This log implementation keeps the log state in memory. The log state +will not survive a reboot or crash, but there may be a small boost in +performance. This method can also be used if no storage device is +available for storing log state. + +The "userspace" log type +------------------------ +This log type simply provides a way to export the log API to userspace, +so log implementations can be done there. This is done by forwarding most +logging requests to userspace, where a daemon receives and processes the +request. + +The structure used for communication between kernel and userspace are +located in include/linux/dm-log-userspace.h. Due to the frequency, +diversity, and 2-way communication nature of the exchanges between +kernel and userspace, 'connector' is used as the interface for +communication. + +There are currently two userspace log implementations that leverage this +framework - "clustered_disk" and "clustered_core". These implementations +provide a cluster-coherent log for shared-storage. Device-mapper mirroring +can be used in a shared-storage environment when the cluster log implementations +are employed. diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt new file mode 100644 index 00000000000..f4db2562175 --- /dev/null +++ b/Documentation/device-mapper/dm-queue-length.txt @@ -0,0 +1,39 @@ +dm-queue-length +=============== + +dm-queue-length is a path selector module for device-mapper targets, +which selects a path with the least number of in-flight I/Os. +The path selector name is 'queue-length'. + +Table parameters for each path: [<repeat_count>] + <repeat_count>: The number of I/Os to dispatch using the selected + path before switching to the next path. + If not given, internal default is used. To check + the default value, see the activated table. + +Status for each path: <status> <fail-count> <in-flight> + <status>: 'A' if the path is active, 'F' if the path is failed. + <fail-count>: The number of path failures. + <in-flight>: The number of in-flight I/Os on the path. + + +Algorithm +========= + +dm-queue-length increments/decrements 'in-flight' when an I/O is +dispatched/completed respectively. +dm-queue-length selects a path with the minimum 'in-flight'. + + +Examples +======== +In case that 2 paths (sda and sdb) are used with repeat_count == 128. + +# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \ + dmsetup create test +# +# dmsetup table +test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128 +# +# dmsetup status +test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0 diff --git a/Documentation/device-mapper/dm-service-time.txt b/Documentation/device-mapper/dm-service-time.txt new file mode 100644 index 00000000000..7d00668e97b --- /dev/null +++ b/Documentation/device-mapper/dm-service-time.txt @@ -0,0 +1,91 @@ +dm-service-time +=============== + +dm-service-time is a path selector module for device-mapper targets, +which selects a path with the shortest estimated service time for +the incoming I/O. + +The service time for each path is estimated by dividing the total size +of in-flight I/Os on a path with the performance value of the path. +The performance value is a relative throughput value among all paths +in a path-group, and it can be specified as a table argument. + +The path selector name is 'service-time'. + +Table parameters for each path: [<repeat_count> [<relative_throughput>]] + <repeat_count>: The number of I/Os to dispatch using the selected + path before switching to the next path. + If not given, internal default is used. To check + the default value, see the activated table. + <relative_throughput>: The relative throughput value of the path + among all paths in the path-group. + The valid range is 0-100. + If not given, minimum value '1' is used. + If '0' is given, the path isn't selected while + other paths having a positive value are available. + +Status for each path: <status> <fail-count> <in-flight-size> \ + <relative_throughput> + <status>: 'A' if the path is active, 'F' if the path is failed. + <fail-count>: The number of path failures. + <in-flight-size>: The size of in-flight I/Os on the path. + <relative_throughput>: The relative throughput value of the path + among all paths in the path-group. + + +Algorithm +========= + +dm-service-time adds the I/O size to 'in-flight-size' when the I/O is +dispatched and substracts when completed. +Basically, dm-service-time selects a path having minimum service time +which is calculated by: + + ('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput' + +However, some optimizations below are used to reduce the calculation +as much as possible. + + 1. If the paths have the same 'relative_throughput', skip + the division and just compare the 'in-flight-size'. + + 2. If the paths have the same 'in-flight-size', skip the division + and just compare the 'relative_throughput'. + + 3. If some paths have non-zero 'relative_throughput' and others + have zero 'relative_throughput', ignore those paths with zero + 'relative_throughput'. + +If such optimizations can't be applied, calculate service time, and +compare service time. +If calculated service time is equal, the path having maximum +'relative_throughput' may be better. So compare 'relative_throughput' +then. + + +Examples +======== +In case that 2 paths (sda and sdb) are used with repeat_count == 128 +and sda has an average throughput 1GB/s and sdb has 4GB/s, +'relative_throughput' value may be '1' for sda and '4' for sdb. + +# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \ + dmsetup create test +# +# dmsetup table +test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4 +# +# dmsetup status +test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4 + + +Or '2' for sda and '8' for sdb would be also true. + +# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \ + dmsetup create test +# +# dmsetup table +test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8 +# +# dmsetup status +test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8 diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36e0675be9f..020f9573fd8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -231,6 +231,17 @@ config DM_MIRROR Allow volume managers to mirror logical volumes, also needed for live data migration tools such as 'pvmove'. +config DM_LOG_USERSPACE + tristate "Mirror userspace logging (EXPERIMENTAL)" + depends on DM_MIRROR && EXPERIMENTAL && NET + select CONNECTOR + ---help--- + The userspace logging module provides a mechanism for + relaying the dm-dirty-log API to userspace. Log designs + which are more suited to userspace implementation (e.g. + shared storage logs) or experimental logs can be implemented + by leveraging this framework. + config DM_ZERO tristate "Zero target" depends on BLK_DEV_DM @@ -249,6 +260,25 @@ config DM_MULTIPATH ---help--- Allow volume managers to support multipath hardware. +config DM_MULTIPATH_QL + tristate "I/O Path Selector based on the number of in-flight I/Os" + depends on DM_MULTIPATH + ---help--- + This path selector is a dynamic load balancer which selects + the path with the least number of in-flight I/Os. + + If unsure, say N. + +config DM_MULTIPATH_ST + tristate "I/O Path Selector based on the service time" + depends on DM_MULTIPATH + ---help--- + This path selector is a dynamic load balancer which selects + the path expected to complete the incoming I/O in the shortest + time. + + If unsure, say N. + config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 45cc5951d92..1dc4185bd78 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-snap-persistent.o dm-mirror-y += dm-raid1.o +dm-log-userspace-y \ + += dm-log-userspace-base.o dm-log-userspace-transfer.o md-mod-y += md.o bitmap.o raid456-y += raid5.o raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ @@ -36,8 +38,11 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o +obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o +obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o +obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_ZERO) += dm-zero.o quiet_cmd_unroll = UNROLL $@ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53394e863c7..9933eb861c7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1132,6 +1132,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_crypt_queue; } + ti->num_flush_requests = 1; ti->private = cc; return 0; @@ -1189,6 +1190,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct dm_crypt_io *io; + struct crypt_config *cc; + + if (unlikely(bio_empty_barrier(bio))) { + cc = ti->private; + bio->bi_bdev = cc->dev->bdev; + return DM_MAPIO_REMAPPED; + } io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); @@ -1305,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } +static int crypt_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct crypt_config *cc = ti->private; + + return fn(ti, cc->dev, cc->start, data); +} + static struct target_type crypt_target = { .name = "crypt", - .version= {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1318,6 +1334,7 @@ static struct target_type crypt_target = { .resume = crypt_resume, .message = crypt_message, .merge = crypt_merge, + .iterate_devices = crypt_iterate_devices, }; static int __init dm_crypt_init(void) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 559dbb52bc8..4e5b843cd4d 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -197,6 +197,7 @@ out: mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); + ti->num_flush_requests = 1; ti->private = dc; return 0; @@ -278,8 +279,9 @@ static int delay_map(struct dm_target *ti, struct bio *bio, if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { bio->bi_bdev = dc->dev_write->bdev; - bio->bi_sector = dc->start_write + - (bio->bi_sector - ti->begin); + if (bio_sectors(bio)) + bio->bi_sector = dc->start_write + + (bio->bi_sector - ti->begin); return delay_bio(dc, dc->write_delay, bio); } @@ -316,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type, return 0; } +static int delay_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct delay_c *dc = ti->private; + int ret = 0; + + ret = fn(ti, dc->dev_read, dc->start_read, data); + if (ret) + goto out; + + if (dc->dev_write) + ret = fn(ti, dc->dev_write, dc->start_write, data); + +out: + return ret; +} + static struct target_type delay_target = { .name = "delay", - .version = {1, 0, 2}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, @@ -326,6 +345,7 @@ static struct target_type delay_target = { .presuspend = delay_presuspend, .resume = delay_resume, .status = delay_status, + .iterate_devices = delay_iterate_devices, }; static int __init dm_delay_init(void) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 75d8081a904..c3ae51584b1 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -216,7 +216,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, return -EINVAL; } - type = get_type(argv[1]); + type = get_type(&persistent); if (!type) { ti->error = "Exception store type not recognised"; r = -EINVAL; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index c92701dc500..2442c8c0789 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -156,7 +156,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) */ static inline sector_t get_dev_size(struct block_device *bdev) { - return bdev->bd_inode->i_size >> SECTOR_SHIFT; + return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; } static inline chunk_t sector_to_chunk(struct dm_exception_store *store, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e73aabd61cd..3a2e6a2f8bd 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -22,6 +22,7 @@ struct dm_io_client { /* FIXME: can we shrink this ? */ struct io { unsigned long error_bits; + unsigned long eopnotsupp_bits; atomic_t count; struct task_struct *sleeper; struct dm_io_client *client; @@ -107,8 +108,11 @@ static inline unsigned bio_get_region(struct bio *bio) *---------------------------------------------------------------*/ static void dec_count(struct io *io, unsigned int region, int error) { - if (error) + if (error) { set_bit(region, &io->error_bits); + if (error == -EOPNOTSUPP) + set_bit(region, &io->eopnotsupp_bits); + } if (atomic_dec_and_test(&io->count)) { if (io->sleeper) @@ -360,7 +364,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, return -EIO; } +retry: io.error_bits = 0; + io.eopnotsupp_bits = 0; atomic_set(&io.count, 1); /* see dispatch_io() */ io.sleeper = current; io.client = client; @@ -377,6 +383,11 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, } set_current_state(TASK_RUNNING); + if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { + rw &= ~(1 << BIO_RW_BARRIER); + goto retry; + } + if (error_bits) *error_bits = io.error_bits; @@ -397,6 +408,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; + io->eopnotsupp_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = NULL; io->client = client; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1128d3fba79..7f77f18fcaf 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -276,7 +276,7 @@ retry: up_write(&_hash_lock); } -static int dm_hash_rename(const char *old, const char *new) +static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) { char *new_name, *old_name; struct hash_cell *hc; @@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new) dm_table_put(table); } - dm_kobject_uevent(hc->md); + dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie); dm_put(hc->md); up_write(&_hash_lock); @@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) __hash_remove(hc); up_write(&_hash_lock); + + dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr); + dm_put(md); param->data_size = 0; return 0; @@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return r; param->data_size = 0; - return dm_hash_rename(param->name, new_name); + return dm_hash_rename(param->event_nr, param->name, new_name); } static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) @@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended(md)) r = dm_resume(md); - if (!r) + + if (!r) { + dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); r = __dev_status(md, param); + } dm_put(md); return r; @@ -1044,6 +1050,12 @@ static int populate_table(struct dm_table *table, next = spec->next; } + r = dm_table_set_type(table); + if (r) { + DMWARN("unable to set table type"); + return r; + } + return dm_table_complete(table); } @@ -1089,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size) goto out; } + r = dm_table_alloc_md_mempools(t); + if (r) { + DMWARN("unable to allocate mempools for this table"); + dm_table_destroy(t); + goto out; + } + down_write(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 79fb53e51c7..9184b6deb86 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + ti->num_flush_requests = 1; ti->private = lc; return 0; @@ -81,7 +82,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) struct linear_c *lc = ti->private; bio->bi_bdev = lc->dev->bdev; - bio->bi_sector = linear_map_sector(ti, bio->bi_sector); + if (bio_sectors(bio)) + bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } static int linear_map(struct dm_target *ti, struct bio *bio, @@ -132,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } +static int linear_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct linear_c *lc = ti->private; + + return fn(ti, lc->dev, lc->start, data); +} + static struct target_type linear_target = { .name = "linear", - .version= {1, 0, 3}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, @@ -142,6 +152,7 @@ static struct target_type linear_target = { .status = linear_status, .ioctl = linear_ioctl, .merge = linear_merge, + .iterate_devices = linear_iterate_devices, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c new file mode 100644 index 00000000000..e69b9656099 --- /dev/null +++ b/drivers/md/dm-log-userspace-base.c @@ -0,0 +1,696 @@ +/* + * Copyright (C) 2006-2009 Red Hat, Inc. + * + * This file is released under the LGPL. + */ + +#include <linux/bio.h> +#include <linux/dm-dirty-log.h> +#include <linux/device-mapper.h> +#include <linux/dm-log-userspace.h> + +#include "dm-log-userspace-transfer.h" + +struct flush_entry { + int type; + region_t region; + struct list_head list; +}; + +struct log_c { + struct dm_target *ti; + uint32_t region_size; + region_t region_count; + char uuid[DM_UUID_LEN]; + + char *usr_argv_str; + uint32_t usr_argc; + + /* + * in_sync_hint gets set when doing is_remote_recovering. It + * represents the first region that needs recovery. IOW, the + * first zero bit of sync_bits. This can be useful for to limit + * traffic for calls like is_remote_recovering and get_resync_work, + * but be take care in its use for anything else. + */ + uint64_t in_sync_hint; + + spinlock_t flush_lock; + struct list_head flush_list; /* only for clear and mark requests */ +}; + +static mempool_t *flush_entry_pool; + +static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) +{ + return kmalloc(sizeof(struct flush_entry), gfp_mask); +} + +static void flush_entry_free(void *element, void *pool_data) +{ + kfree(element); +} + +static int userspace_do_request(struct log_c *lc, const char *uuid, + int request_type, char *data, size_t data_size, + char *rdata, size_t *rdata_size) +{ + int r; + + /* + * If the server isn't there, -ESRCH is returned, + * and we must keep trying until the server is + * restored. + */ +retry: + r = dm_consult_userspace(uuid, request_type, data, + data_size, rdata, rdata_size); + + if (r != -ESRCH) + return r; + + DMERR(" Userspace log server not found."); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2*HZ); + DMWARN("Attempting to contact userspace log server..."); + r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, + strlen(lc->usr_argv_str) + 1, + NULL, NULL); + if (!r) + break; + } + DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); + r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, + 0, NULL, NULL); + if (!r) + goto retry; + + DMERR("Error trying to resume userspace log: %d", r); + + return -ESRCH; +} + +static int build_constructor_string(struct dm_target *ti, + unsigned argc, char **argv, + char **ctr_str) +{ + int i, str_size; + char *str = NULL; + + *ctr_str = NULL; + + for (i = 0, str_size = 0; i < argc; i++) + str_size += strlen(argv[i]) + 1; /* +1 for space between args */ + + str_size += 20; /* Max number of chars in a printed u64 number */ + + str = kzalloc(str_size, GFP_KERNEL); + if (!str) { + DMWARN("Unable to allocate memory for constructor string"); + return -ENOMEM; + } + + for (i = 0, str_size = 0; i < argc; i++) + str_size += sprintf(str + str_size, "%s ", argv[i]); + str_size += sprintf(str + str_size, "%llu", + (unsigned long long)ti->len); + + *ctr_str = str; + return str_size; +} + +/* + * userspace_ctr + * + * argv contains: + * <UUID> <other args> + * Where 'other args' is the userspace implementation specific log + * arguments. An example might be: + * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] + * + * So, this module will strip off the <UUID> for identification purposes + * when communicating with userspace about a log; but will pass on everything + * else. + */ +static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, + unsigned argc, char **argv) +{ + int r = 0; + int str_size; + char *ctr_str = NULL; + struct log_c *lc = NULL; + uint64_t rdata; + size_t rdata_size = sizeof(rdata); + + if (argc < 3) { + DMWARN("Too few arguments to userspace dirty log"); + return -EINVAL; + } + + lc = kmalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) { + DMWARN("Unable to allocate userspace log context."); + return -ENOMEM; + } + + lc->ti = ti; + + if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { + DMWARN("UUID argument too long."); + kfree(lc); + return -EINVAL; + } + + strncpy(lc->uuid, argv[0], DM_UUID_LEN); + spin_lock_init(&lc->flush_lock); + INIT_LIST_HEAD(&lc->flush_list); + + str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); + if (str_size < 0) { + kfree(lc); + return str_size; + } + + /* Send table string */ + r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, + ctr_str, str_size, NULL, NULL); + + if (r == -ESRCH) { + DMERR("Userspa |