diff options
Diffstat (limited to 'drivers')
27 files changed, 3377 insertions, 415 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36e0675be9f..020f9573fd8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -231,6 +231,17 @@ config DM_MIRROR Allow volume managers to mirror logical volumes, also needed for live data migration tools such as 'pvmove'. +config DM_LOG_USERSPACE + tristate "Mirror userspace logging (EXPERIMENTAL)" + depends on DM_MIRROR && EXPERIMENTAL && NET + select CONNECTOR + ---help--- + The userspace logging module provides a mechanism for + relaying the dm-dirty-log API to userspace. Log designs + which are more suited to userspace implementation (e.g. + shared storage logs) or experimental logs can be implemented + by leveraging this framework. + config DM_ZERO tristate "Zero target" depends on BLK_DEV_DM @@ -249,6 +260,25 @@ config DM_MULTIPATH ---help--- Allow volume managers to support multipath hardware. +config DM_MULTIPATH_QL + tristate "I/O Path Selector based on the number of in-flight I/Os" + depends on DM_MULTIPATH + ---help--- + This path selector is a dynamic load balancer which selects + the path with the least number of in-flight I/Os. + + If unsure, say N. + +config DM_MULTIPATH_ST + tristate "I/O Path Selector based on the service time" + depends on DM_MULTIPATH + ---help--- + This path selector is a dynamic load balancer which selects + the path expected to complete the incoming I/O in the shortest + time. + + If unsure, say N. + config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 45cc5951d92..1dc4185bd78 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-snap-persistent.o dm-mirror-y += dm-raid1.o +dm-log-userspace-y \ + += dm-log-userspace-base.o dm-log-userspace-transfer.o md-mod-y += md.o bitmap.o raid456-y += raid5.o raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ @@ -36,8 +38,11 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o +obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o +obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o +obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_ZERO) += dm-zero.o quiet_cmd_unroll = UNROLL $@ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53394e863c7..9933eb861c7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1132,6 +1132,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_crypt_queue; } + ti->num_flush_requests = 1; ti->private = cc; return 0; @@ -1189,6 +1190,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct dm_crypt_io *io; + struct crypt_config *cc; + + if (unlikely(bio_empty_barrier(bio))) { + cc = ti->private; + bio->bi_bdev = cc->dev->bdev; + return DM_MAPIO_REMAPPED; + } io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); @@ -1305,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } +static int crypt_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct crypt_config *cc = ti->private; + + return fn(ti, cc->dev, cc->start, data); +} + static struct target_type crypt_target = { .name = "crypt", - .version= {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1318,6 +1334,7 @@ static struct target_type crypt_target = { .resume = crypt_resume, .message = crypt_message, .merge = crypt_merge, + .iterate_devices = crypt_iterate_devices, }; static int __init dm_crypt_init(void) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 559dbb52bc8..4e5b843cd4d 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -197,6 +197,7 @@ out: mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); + ti->num_flush_requests = 1; ti->private = dc; return 0; @@ -278,8 +279,9 @@ static int delay_map(struct dm_target *ti, struct bio *bio, if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { bio->bi_bdev = dc->dev_write->bdev; - bio->bi_sector = dc->start_write + - (bio->bi_sector - ti->begin); + if (bio_sectors(bio)) + bio->bi_sector = dc->start_write + + (bio->bi_sector - ti->begin); return delay_bio(dc, dc->write_delay, bio); } @@ -316,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type, return 0; } +static int delay_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct delay_c *dc = ti->private; + int ret = 0; + + ret = fn(ti, dc->dev_read, dc->start_read, data); + if (ret) + goto out; + + if (dc->dev_write) + ret = fn(ti, dc->dev_write, dc->start_write, data); + +out: + return ret; +} + static struct target_type delay_target = { .name = "delay", - .version = {1, 0, 2}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, @@ -326,6 +345,7 @@ static struct target_type delay_target = { .presuspend = delay_presuspend, .resume = delay_resume, .status = delay_status, + .iterate_devices = delay_iterate_devices, }; static int __init dm_delay_init(void) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 75d8081a904..c3ae51584b1 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -216,7 +216,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, return -EINVAL; } - type = get_type(argv[1]); + type = get_type(&persistent); if (!type) { ti->error = "Exception store type not recognised"; r = -EINVAL; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index c92701dc500..2442c8c0789 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -156,7 +156,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) */ static inline sector_t get_dev_size(struct block_device *bdev) { - return bdev->bd_inode->i_size >> SECTOR_SHIFT; + return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; } static inline chunk_t sector_to_chunk(struct dm_exception_store *store, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e73aabd61cd..3a2e6a2f8bd 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -22,6 +22,7 @@ struct dm_io_client { /* FIXME: can we shrink this ? */ struct io { unsigned long error_bits; + unsigned long eopnotsupp_bits; atomic_t count; struct task_struct *sleeper; struct dm_io_client *client; @@ -107,8 +108,11 @@ static inline unsigned bio_get_region(struct bio *bio) *---------------------------------------------------------------*/ static void dec_count(struct io *io, unsigned int region, int error) { - if (error) + if (error) { set_bit(region, &io->error_bits); + if (error == -EOPNOTSUPP) + set_bit(region, &io->eopnotsupp_bits); + } if (atomic_dec_and_test(&io->count)) { if (io->sleeper) @@ -360,7 +364,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, return -EIO; } +retry: io.error_bits = 0; + io.eopnotsupp_bits = 0; atomic_set(&io.count, 1); /* see dispatch_io() */ io.sleeper = current; io.client = client; @@ -377,6 +383,11 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, } set_current_state(TASK_RUNNING); + if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { + rw &= ~(1 << BIO_RW_BARRIER); + goto retry; + } + if (error_bits) *error_bits = io.error_bits; @@ -397,6 +408,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; + io->eopnotsupp_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = NULL; io->client = client; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1128d3fba79..7f77f18fcaf 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -276,7 +276,7 @@ retry: up_write(&_hash_lock); } -static int dm_hash_rename(const char *old, const char *new) +static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) { char *new_name, *old_name; struct hash_cell *hc; @@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new) dm_table_put(table); } - dm_kobject_uevent(hc->md); + dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie); dm_put(hc->md); up_write(&_hash_lock); @@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) __hash_remove(hc); up_write(&_hash_lock); + + dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr); + dm_put(md); param->data_size = 0; return 0; @@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return r; param->data_size = 0; - return dm_hash_rename(param->name, new_name); + return dm_hash_rename(param->event_nr, param->name, new_name); } static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) @@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended(md)) r = dm_resume(md); - if (!r) + + if (!r) { + dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); r = __dev_status(md, param); + } dm_put(md); return r; @@ -1044,6 +1050,12 @@ static int populate_table(struct dm_table *table, next = spec->next; } + r = dm_table_set_type(table); + if (r) { + DMWARN("unable to set table type"); + return r; + } + return dm_table_complete(table); } @@ -1089,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size) goto out; } + r = dm_table_alloc_md_mempools(t); + if (r) { + DMWARN("unable to allocate mempools for this table"); + dm_table_destroy(t); + goto out; + } + down_write(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 79fb53e51c7..9184b6deb86 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + ti->num_flush_requests = 1; ti->private = lc; return 0; @@ -81,7 +82,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) struct linear_c *lc = ti->private; bio->bi_bdev = lc->dev->bdev; - bio->bi_sector = linear_map_sector(ti, bio->bi_sector); + if (bio_sectors(bio)) + bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } static int linear_map(struct dm_target *ti, struct bio *bio, @@ -132,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } +static int linear_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct linear_c *lc = ti->private; + + return fn(ti, lc->dev, lc->start, data); +} + static struct target_type linear_target = { .name = "linear", - .version= {1, 0, 3}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, @@ -142,6 +152,7 @@ static struct target_type linear_target = { .status = linear_status, .ioctl = linear_ioctl, .merge = linear_merge, + .iterate_devices = linear_iterate_devices, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c new file mode 100644 index 00000000000..e69b9656099 --- /dev/null +++ b/drivers/md/dm-log-userspace-base.c @@ -0,0 +1,696 @@ +/* + * Copyright (C) 2006-2009 Red Hat, Inc. + * + * This file is released under the LGPL. + */ + +#include <linux/bio.h> +#include <linux/dm-dirty-log.h> +#include <linux/device-mapper.h> +#include <linux/dm-log-userspace.h> + +#include "dm-log-userspace-transfer.h" + +struct flush_entry { + int type; + region_t region; + struct list_head list; +}; + +struct log_c { + struct dm_target *ti; + uint32_t region_size; + region_t region_count; + char uuid[DM_UUID_LEN]; + + char *usr_argv_str; + uint32_t usr_argc; + + /* + * in_sync_hint gets set when doing is_remote_recovering. It + * represents the first region that needs recovery. IOW, the + * first zero bit of sync_bits. This can be useful for to limit + * traffic for calls like is_remote_recovering and get_resync_work, + * but be take care in its use for anything else. + */ + uint64_t in_sync_hint; + + spinlock_t flush_lock; + struct list_head flush_list; /* only for clear and mark requests */ +}; + +static mempool_t *flush_entry_pool; + +static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) +{ + return kmalloc(sizeof(struct flush_entry), gfp_mask); +} + +static void flush_entry_free(void *element, void *pool_data) +{ + kfree(element); +} + +static int userspace_do_request(struct log_c *lc, const char *uuid, + int request_type, char *data, size_t data_size, + char *rdata, size_t *rdata_size) +{ + int r; + + /* + * If the server isn't there, -ESRCH is returned, + * and we must keep trying until the server is + * restored. + */ +retry: + r = dm_consult_userspace(uuid, request_type, data, + data_size, rdata, rdata_size); + + if (r != -ESRCH) + return r; + + DMERR(" Userspace log server not found."); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2*HZ); + DMWARN("Attempting to contact userspace log server..."); + r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, + strlen(lc->usr_argv_str) + 1, + NULL, NULL); + if (!r) + break; + } + DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); + r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, + 0, NULL, NULL); + if (!r) + goto retry; + + DMERR("Error trying to resume userspace log: %d", r); + + return -ESRCH; +} + +static int build_constructor_string(struct dm_target *ti, + unsigned argc, char **argv, + char **ctr_str) +{ + int i, str_size; + char *str = NULL; + + *ctr_str = NULL; + + for (i = 0, str_size = 0; i < argc; i++) + str_size += strlen(argv[i]) + 1; /* +1 for space between args */ + + str_size += 20; /* Max number of chars in a printed u64 number */ + + str = kzalloc(str_size, GFP_KERNEL); + if (!str) { + DMWARN("Unable to allocate memory for constructor string"); + return -ENOMEM; + } + + for (i = 0, str_size = 0; i < argc; i++) + str_size += sprintf(str + str_size, "%s ", argv[i]); + str_size += sprintf(str + str_size, "%llu", + (unsigned long long)ti->len); + + *ctr_str = str; + return str_size; +} + +/* + * userspace_ctr + * + * argv contains: + * <UUID> <other args> + * Where 'other args' is the userspace implementation specific log + * arguments. An example might be: + * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] + * + * So, this module will strip off the <UUID> for identification purposes + * when communicating with userspace about a log; but will pass on everything + * else. + */ +static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, + unsigned argc, char **argv) +{ + int r = 0; + int str_size; + char *ctr_str = NULL; + struct log_c *lc = NULL; + uint64_t rdata; + size_t rdata_size = sizeof(rdata); + + if (argc < 3) { + DMWARN("Too few arguments to userspace dirty log"); + return -EINVAL; + } + + lc = kmalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) { + DMWARN("Unable to allocate userspace log context."); + return -ENOMEM; + } + + lc->ti = ti; + + if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { + DMWARN("UUID argument too long."); + kfree(lc); + return -EINVAL; + } + + strncpy(lc->uuid, argv[0], DM_UUID_LEN); + spin_lock_init(&lc->flush_lock); + INIT_LIST_HEAD(&lc->flush_list); + + str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); + if (str_size < 0) { + kfree(lc); + return str_size; + } + + /* Send table string */ + r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, + ctr_str, str_size, NULL, NULL); + + if (r == -ESRCH) { + DMERR("Userspace log server not found"); + goto out; + } + + /* Since the region size does not change, get it now */ + rdata_size = sizeof(rdata); + r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, + NULL, 0, (char *)&rdata, &rdata_size); + + if (r) { + DMERR("Failed to get region size of dirty log"); + goto out; + } + + lc->region_size = (uint32_t)rdata; + lc->region_count = dm_sector_div_up(ti->len, lc->region_size); + +out: + if (r) { + kfree(lc); + kfree(ctr_str); + } else { + lc->usr_argv_str = ctr_str; + lc->usr_argc = argc; + log->context = lc; + } + + return r; +} + +static void userspace_dtr(struct dm_dirty_log *log) +{ + int r; + struct log_c *lc = log->context; + + r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, + NULL, 0, + NULL, NULL); + + kfree(lc->usr_argv_str); + kfree(lc); + + return; +} + +static int userspace_presuspend(struct dm_dirty_log *log) +{ + int r; + struct log_c *lc = log->context; + + r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, + NULL, 0, + NULL, NULL); + + return r; +} + +static int userspace_postsuspend(struct dm_dirty_log *log) +{ + int r; + struct log_c *lc = log->context; + + r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, + NULL, 0, + NULL, NULL); + + return r; +} + +static int userspace_resume(struct dm_dirty_log *log) +{ + int r; + struct log_c *lc = log->context; + + lc->in_sync_hint = 0; + r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, + NULL, 0, + NULL, NULL); + + return r; +} + +static uint32_t userspace_get_region_size(struct dm_dirty_log *log) +{ + struct log_c *lc = log->context; + + return lc->region_size; +} + +/* + * userspace_is_clean + * + * Check whether a region is clean. If there is any sort of + * failure when consulting the server, we return not clean. + * + * Returns: 1 if clean, 0 otherwise + */ +static int userspace_is_clean(struct dm_dirty_log *log, region_t region) +{ + int r; + uint64_t region64 = (uint64_t)region; + int64_t is_clean; + size_t rdata_size; + struct log_c *lc = log->context; + + rdata_size = sizeof(is_clean); + r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, + (char *)®ion64, sizeof(region64), + (char *)&is_clean, &rdata_size); + + return (r) ? 0 : (int)is_clean; +} + +/* + * userspace_in_sync + * + * Check if the region is in-sync. If there is any sort + * of failure when consulting the server, we assume that + * the region is not in sync. + * + * If 'can_block' is set, return immediately + * + * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK + */ +static int userspace_in_sync(struct dm_dirty_log *log, region_t region, + int can_block) +{ + int r; + uint64_t region64 = region; + int64_t in_sync; + size_t rdata_size; + struct log_c *lc = log->context; + + /* + * We can never respond directly - even if in_sync_hint is + * set. This is because another machine could see a device + * failure and mark the region out-of-sync. If we don't go + * to userspace to ask, we might think the region is in-sync + * and allow a read to pick up data that is stale. (This is + * very unlikely if a device actually fails; but it is very + * likely if a connection to one device from one machine fails.) + * + * There still might be a problem if the mirror caches the region + * state as in-sync... but then this call would not be made. So, + * that is a mirror problem. + */ + if (!can_block) + return -EWOULDBLOCK; + + rdata_size = sizeof(in_sync); + r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, + (char *)®ion64, sizeof(region64), + (char *)&in_sync, &rdata_size); + return (r) ? 0 : (int)in_sync; +} + +/* + * userspace_flush + * + * This function is ok to block. + * The flush happens in two stages. First, it sends all + * clear/mark requests that are on the list. Then it + * tells the server to commit them. This gives the + * server a chance to optimise the commit, instead of + * doing it for every request. + * + * Additionally, we could implement another thread that + * sends the requests up to the server - reducing the + * load on flush. Then the flush would have less in + * the list and be responsible for the finishing commit. + * + * Returns: 0 on success, < 0 on failure + */ +static int userspace_flush(struct dm_dirty_log *log) +{ + int r = 0; + unsigned long flags; + struct log_c *lc = log->context; + LIST_HEAD(flush_list); + struct flush_entry *fe, *tmp_fe; + + spin_lock_irqsave(&lc->flush_lock, flags); + list_splice_init(&lc->flush_list, &flush_list); + spin_unlock_irqrestore(&lc->flush_lock, flags); + + if (list_empty(&flush_list)) + return 0; + + /* + * FIXME: Count up requests, group request types, + * allocate memory to stick all requests in and + * send to server in one go. Failing the allocation, + * do it one by one. + */ + + list_for_each_entry(fe, &flush_list, list) { + r = userspace_do_request(lc, lc->uuid, fe->type, + (char *)&fe->region, + sizeof(fe->region), + NULL, NULL); + if (r) + goto fail; + } + + r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, + NULL, 0, NULL, NULL); + +fail: + /* + * We can safely remove these entries, even if failure. + * Calling code will receive an error and will know that + * the log facility has failed. + */ + list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { + list_del(&fe->list); + mempool_free(fe, flush_entry_pool); + } + + if (r) + dm_table_event(lc->ti->table); + + return r; +} + +/* + * userspace_mark_region + * + * This function should avoid blocking unless absolutely required. + * (Memory allocation is valid for blocking.) + */ +static void userspace_mark_region(struct dm_dirty_log *log, region_t region) +{ + unsigned long flags; + struct log_c *lc = log->context; + struct flush_entry *fe; + + /* Wait for an allocation, but _never_ fail */ + fe = mempool_alloc(flush_entry_pool, GFP_NOIO); + BUG_ON(!fe); + + spin_lock_irqsave(&lc->flush_lock, flags); + fe->type = DM_ULOG_MARK_REGION; + fe->region = region; + list_add(&fe->list, &lc->flush_list); + spin_unlock_irqrestore(&lc->flush_lock, flags); + + return; +} + +/* + * userspace_clear_region + * + * This function must not block. + * So, the alloc can't block. In the worst case, it is ok to + * fail. It would simply mean we can't clear the region. + * Does nothing to current sync context, but does mean + * the region will be re-sync'ed on a reload of the mirror + * even though it is in-sync. + */ +static void userspace_clear_region(struct dm_dirty_log *log, region_t region) +{ + unsigned long flags; + struct log_c *lc = log->context; + struct flush_entry *fe; + + /* + * If we fail to allocate, we skip the clearing of + * the region. This doesn't hurt us in any way, except + * to cause the region to be resync'ed when the + * device is activated next time. + */ + fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); + if (!fe) { + DMERR("Failed to allocate memory to clear region."); + return; + } + + spin_lock_irqsave(&lc->flush_lock, flags); + fe->type = DM_ULOG_CLEAR_REGION; + fe->region = region; + list_add(&fe->list, &lc->flush_list); + spin_unlock_irqrestore(&lc->flush_lock, flags); + + return; +} + +/* + * userspace_get_resync_work + * + * Get a region that needs recovery. It is valid to return + * an error for this function. + * + * Returns: 1 if region filled, 0 if no work, <0 on error + */ +static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) +{ + int r; + size_t rdata_size; + struct log_c *lc = log->context; + struct { + int64_t i; /* 64-bit for mix arch compatibility */ + region_t r; + } pkg; + + if (lc->in_sync_hint >= lc->region_count) + return 0; + + rdata_size = sizeof(pkg); + r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, + NULL, 0, + (char *)&pkg, &rdata_size); + + *region = pkg.r; + return (r) ? r : (int)pkg.i; +} + +/* + * userspace_set_region_sync + * + * Set the sync status of a given region. This function + * must not fail. + */ +static void userspace_set_region_sync(struct dm_dirty_log *log, + region_t region, int in_sync) +{ + int r; + struct log_c *lc = log->context; + struct { + region_t r; + int64_t i; + } pkg; + + pkg.r = region; + pkg.i = (int64_t)in_sync; + + r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, + (char *)&pkg, sizeof(pkg), + NULL, NULL); + + /* + * It would be nice to be able to report failures. + * However, it is easy emough to detect and resolve. + */ + return; +} + +/* + * userspace_get_sync_count + * + * If there is any sort of failure when consulting the server, + * we assume that the sync count is zero. + * + * Returns: sync count on success, 0 on failure + */ +static region_t userspace_get_sync_count(struct dm_dirty_log *log) +{ + int r; + size_t rdata_size; + uint64_t sync_count; + struct log_c *lc = log->context; + + rdata_size = sizeof(sync_count); + r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, + NULL, 0, + (char *)&sync_count, &rdata_size); + + if (r) + return 0; + + if (sync_count >= lc->region_count) + lc->in_sync_hint = lc->region_count; + + return (region_t)sync_count; +} + +/* + * userspace_status + * + * Returns: amount of space consumed + */ +static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, + char *result, unsigned maxlen) +{ + int r = 0; + size_t sz = (size_t)maxlen; + struct log_c *lc = log->context; + + switch (status_type) { + case STATUSTYPE_INFO: |