diff options
Diffstat (limited to 'drivers/md/dm-log-userspace-base.c')
| -rw-r--r-- | drivers/md/dm-log-userspace-base.c | 346 | 
1 files changed, 285 insertions, 61 deletions
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 1ed0094f064..b953db6cc22 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -9,17 +9,30 @@  #include <linux/dm-dirty-log.h>  #include <linux/device-mapper.h>  #include <linux/dm-log-userspace.h> +#include <linux/module.h> +#include <linux/workqueue.h>  #include "dm-log-userspace-transfer.h" +#define DM_LOG_USERSPACE_VSN "1.3.0" +  struct flush_entry {  	int type;  	region_t region;  	struct list_head list;  }; +/* + * This limit on the number of mark and clear request is, to a degree, + * arbitrary.  However, there is some basis for the choice in the limits + * imposed on the size of data payload by dm-log-userspace-transfer.c: + * dm_consult_userspace(). + */ +#define MAX_FLUSH_GROUP_COUNT 32 +  struct log_c {  	struct dm_target *ti; +	struct dm_dev *log_dev;  	uint32_t region_size;  	region_t region_count;  	uint64_t luid; @@ -37,8 +50,27 @@ struct log_c {  	 */  	uint64_t in_sync_hint; +	/* +	 * Mark and clear requests are held until a flush is issued +	 * so that we can group, and thereby limit, the amount of +	 * network traffic between kernel and userspace.  The 'flush_lock' +	 * is used to protect these lists. +	 */  	spinlock_t flush_lock; -	struct list_head flush_list;  /* only for clear and mark requests */ +	struct list_head mark_list; +	struct list_head clear_list; + +	/* +	 * Workqueue for flush of clear region requests. +	 */ +	struct workqueue_struct *dmlog_wq; +	struct delayed_work flush_log_work; +	atomic_t sched_flush; + +	/* +	 * Combine userspace flush and mark requests for efficiency. +	 */ +	uint32_t integrated_flush;  };  static mempool_t *flush_entry_pool; @@ -103,6 +135,9 @@ static int build_constructor_string(struct dm_target *ti,  	*ctr_str = NULL; +	/* +	 * Determine overall size of the string. +	 */  	for (i = 0, str_size = 0; i < argc; i++)  		str_size += strlen(argv[i]) + 1; /* +1 for space between args */ @@ -122,18 +157,39 @@ static int build_constructor_string(struct dm_target *ti,  	return str_size;  } +static void do_flush(struct work_struct *work) +{ +	int r; +	struct log_c *lc = container_of(work, struct log_c, flush_log_work.work); + +	atomic_set(&lc->sched_flush, 0); + +	r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL); + +	if (r) +		dm_table_event(lc->ti->table); +} +  /*   * userspace_ctr   *   * argv contains: - *	<UUID> <other args> - * Where 'other args' is the userspace implementation specific log - * arguments.  An example might be: - *	<UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] + *	<UUID> [integrated_flush] <other args> + * Where 'other args' are the userspace implementation-specific log + * arguments. + * + * Example: + *	<UUID> [integrated_flush] clustered-disk <arg count> <log dev> + *	<region_size> [[no]sync] + * + * This module strips off the <UUID> and uses it for identification + * purposes when communicating with userspace about a log.   * - * So, this module will strip off the <UUID> for identification purposes - * when communicating with userspace about a log; but will pass on everything - * else. + * If integrated_flush is defined, the kernel combines flush + * and mark requests. + * + * The rest of the line, beginning with 'clustered-disk', is passed + * to the userspace ctr function.   */  static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,  			 unsigned argc, char **argv) @@ -144,13 +200,15 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,  	struct log_c *lc = NULL;  	uint64_t rdata;  	size_t rdata_size = sizeof(rdata); +	char *devices_rdata = NULL; +	size_t devices_rdata_size = DM_NAME_LEN;  	if (argc < 3) {  		DMWARN("Too few arguments to userspace dirty log");  		return -EINVAL;  	} -	lc = kmalloc(sizeof(*lc), GFP_KERNEL); +	lc = kzalloc(sizeof(*lc), GFP_KERNEL);  	if (!lc) {  		DMWARN("Unable to allocate userspace log context.");  		return -ENOMEM; @@ -167,22 +225,46 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,  		return -EINVAL;  	} +	lc->usr_argc = argc; +  	strncpy(lc->uuid, argv[0], DM_UUID_LEN); +	argc--; +	argv++;  	spin_lock_init(&lc->flush_lock); -	INIT_LIST_HEAD(&lc->flush_list); +	INIT_LIST_HEAD(&lc->mark_list); +	INIT_LIST_HEAD(&lc->clear_list); + +	if (!strcasecmp(argv[0], "integrated_flush")) { +		lc->integrated_flush = 1; +		argc--; +		argv++; +	} -	str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); +	str_size = build_constructor_string(ti, argc, argv, &ctr_str);  	if (str_size < 0) {  		kfree(lc);  		return str_size;  	} -	/* Send table string */ -	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, -				 ctr_str, str_size, NULL, NULL); +	devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL); +	if (!devices_rdata) { +		DMERR("Failed to allocate memory for device information"); +		r = -ENOMEM; +		goto out; +	} -	if (r == -ESRCH) { -		DMERR("Userspace log server not found"); +	/* +	 * Send table string and get back any opened device. +	 */ +	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, +				 ctr_str, str_size, +				 devices_rdata, &devices_rdata_size); + +	if (r < 0) { +		if (r == -ESRCH) +			DMERR("Userspace log server not found"); +		else +			DMERR("Userspace log server failed to create log");  		goto out;  	} @@ -199,13 +281,38 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,  	lc->region_size = (uint32_t)rdata;  	lc->region_count = dm_sector_div_up(ti->len, lc->region_size); +	if (devices_rdata_size) { +		if (devices_rdata[devices_rdata_size - 1] != '\0') { +			DMERR("DM_ULOG_CTR device return string not properly terminated"); +			r = -EINVAL; +			goto out; +		} +		r = dm_get_device(ti, devices_rdata, +				  dm_table_get_mode(ti->table), &lc->log_dev); +		if (r) +			DMERR("Failed to register %s with device-mapper", +			      devices_rdata); +	} + +	if (lc->integrated_flush) { +		lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0); +		if (!lc->dmlog_wq) { +			DMERR("couldn't start dmlogd"); +			r = -ENOMEM; +			goto out; +		} + +		INIT_DELAYED_WORK(&lc->flush_log_work, do_flush); +		atomic_set(&lc->sched_flush, 0); +	} +  out: +	kfree(devices_rdata);  	if (r) {  		kfree(lc);  		kfree(ctr_str);  	} else {  		lc->usr_argv_str = ctr_str; -		lc->usr_argc = argc;  		log->context = lc;  	} @@ -214,12 +321,21 @@ out:  static void userspace_dtr(struct dm_dirty_log *log)  { -	int r;  	struct log_c *lc = log->context; -	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, -				 NULL, 0, -				 NULL, NULL); +	if (lc->integrated_flush) { +		/* flush workqueue */ +		if (atomic_read(&lc->sched_flush)) +			flush_delayed_work(&lc->flush_log_work); + +		destroy_workqueue(lc->dmlog_wq); +	} + +	(void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, +				    NULL, 0, NULL, NULL); + +	if (lc->log_dev) +		dm_put_device(lc->ti, lc->log_dev);  	kfree(lc->usr_argv_str);  	kfree(lc); @@ -233,8 +349,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)  	struct log_c *lc = log->context;  	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, -				 NULL, 0, -				 NULL, NULL); +				 NULL, 0, NULL, NULL);  	return r;  } @@ -244,9 +359,14 @@ static int userspace_postsuspend(struct dm_dirty_log *log)  	int r;  	struct log_c *lc = log->context; +	/* +	 * Run planned flush earlier. +	 */ +	if (lc->integrated_flush && atomic_read(&lc->sched_flush)) +		flush_delayed_work(&lc->flush_log_work); +  	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, -				 NULL, 0, -				 NULL, NULL); +				 NULL, 0, NULL, NULL);  	return r;  } @@ -258,8 +378,7 @@ static int userspace_resume(struct dm_dirty_log *log)  	lc->in_sync_hint = 0;  	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, -				 NULL, 0, -				 NULL, NULL); +				 NULL, 0, NULL, NULL);  	return r;  } @@ -338,6 +457,85 @@ static int userspace_in_sync(struct dm_dirty_log *log, region_t region,  	return (r) ? 0 : (int)in_sync;  } +static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) +{ +	int r = 0; +	struct flush_entry *fe; + +	list_for_each_entry(fe, flush_list, list) { +		r = userspace_do_request(lc, lc->uuid, fe->type, +					 (char *)&fe->region, +					 sizeof(fe->region), +					 NULL, NULL); +		if (r) +			break; +	} + +	return r; +} + +static int flush_by_group(struct log_c *lc, struct list_head *flush_list, +			  int flush_with_payload) +{ +	int r = 0; +	int count; +	uint32_t type = 0; +	struct flush_entry *fe, *tmp_fe; +	LIST_HEAD(tmp_list); +	uint64_t group[MAX_FLUSH_GROUP_COUNT]; + +	/* +	 * Group process the requests +	 */ +	while (!list_empty(flush_list)) { +		count = 0; + +		list_for_each_entry_safe(fe, tmp_fe, flush_list, list) { +			group[count] = fe->region; +			count++; + +			list_move(&fe->list, &tmp_list); + +			type = fe->type; +			if (count >= MAX_FLUSH_GROUP_COUNT) +				break; +		} + +		if (flush_with_payload) { +			r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, +						 (char *)(group), +						 count * sizeof(uint64_t), +						 NULL, NULL); +			/* +			 * Integrated flush failed. +			 */ +			if (r) +				break; +		} else { +			r = userspace_do_request(lc, lc->uuid, type, +						 (char *)(group), +						 count * sizeof(uint64_t), +						 NULL, NULL); +			if (r) { +				/* +				 * Group send failed.  Attempt one-by-one. +				 */ +				list_splice_init(&tmp_list, flush_list); +				r = flush_one_by_one(lc, flush_list); +				break; +			} +		} +	} + +	/* +	 * Must collect flush_entrys that were successfully processed +	 * as a group so that they will be free'd by the caller. +	 */ +	list_splice_init(&tmp_list, flush_list); + +	return r; +} +  /*   * userspace_flush   * @@ -360,42 +558,70 @@ static int userspace_flush(struct dm_dirty_log *log)  	int r = 0;  	unsigned long flags;  	struct log_c *lc = log->context; -	LIST_HEAD(flush_list); +	LIST_HEAD(mark_list); +	LIST_HEAD(clear_list); +	int mark_list_is_empty; +	int clear_list_is_empty;  	struct flush_entry *fe, *tmp_fe;  	spin_lock_irqsave(&lc->flush_lock, flags); -	list_splice_init(&lc->flush_list, &flush_list); +	list_splice_init(&lc->mark_list, &mark_list); +	list_splice_init(&lc->clear_list, &clear_list);  	spin_unlock_irqrestore(&lc->flush_lock, flags); -	if (list_empty(&flush_list)) +	mark_list_is_empty = list_empty(&mark_list); +	clear_list_is_empty = list_empty(&clear_list); + +	if (mark_list_is_empty && clear_list_is_empty)  		return 0; -	/* -	 * FIXME: Count up requests, group request types, -	 * allocate memory to stick all requests in and -	 * send to server in one go.  Failing the allocation, -	 * do it one by one. -	 */ +	r = flush_by_group(lc, &clear_list, 0); +	if (r) +		goto out; -	list_for_each_entry(fe, &flush_list, list) { -		r = userspace_do_request(lc, lc->uuid, fe->type, -					 (char *)&fe->region, -					 sizeof(fe->region), -					 NULL, NULL); +	if (!lc->integrated_flush) { +		r = flush_by_group(lc, &mark_list, 0);  		if (r) -			goto fail; +			goto out; +		r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, +					 NULL, 0, NULL, NULL); +		goto out;  	} -	r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, -				 NULL, 0, NULL, NULL); +	/* +	 * Send integrated flush request with mark_list as payload. +	 */ +	r = flush_by_group(lc, &mark_list, 1); +	if (r) +		goto out; + +	if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) { +		/* +		 * When there are only clear region requests, +		 * we schedule a flush in the future. +		 */ +		queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ); +		atomic_set(&lc->sched_flush, 1); +	} else { +		/* +		 * Cancel pending flush because we +		 * have already flushed in mark_region. +		 */ +		cancel_delayed_work(&lc->flush_log_work); +		atomic_set(&lc->sched_flush, 0); +	} -fail: +out:  	/* -	 * We can safely remove these entries, even if failure. +	 * We can safely remove these entries, even after failure.  	 * Calling code will receive an error and will know that  	 * the log facility has failed.  	 */ -	list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { +	list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) { +		list_del(&fe->list); +		mempool_free(fe, flush_entry_pool); +	} +	list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {  		list_del(&fe->list);  		mempool_free(fe, flush_entry_pool);  	} @@ -425,7 +651,7 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region)  	spin_lock_irqsave(&lc->flush_lock, flags);  	fe->type = DM_ULOG_MARK_REGION;  	fe->region = region; -	list_add(&fe->list, &lc->flush_list); +	list_add(&fe->list, &lc->mark_list);  	spin_unlock_irqrestore(&lc->flush_lock, flags);  	return; @@ -462,7 +688,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region)  	spin_lock_irqsave(&lc->flush_lock, flags);  	fe->type = DM_ULOG_CLEAR_REGION;  	fe->region = region; -	list_add(&fe->list, &lc->flush_list); +	list_add(&fe->list, &lc->clear_list);  	spin_unlock_irqrestore(&lc->flush_lock, flags);  	return; @@ -491,8 +717,7 @@ static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)  	rdata_size = sizeof(pkg);  	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, -				 NULL, 0, -				 (char *)&pkg, &rdata_size); +				 NULL, 0, (char *)&pkg, &rdata_size);  	*region = pkg.r;  	return (r) ? r : (int)pkg.i; @@ -518,8 +743,7 @@ static void userspace_set_region_sync(struct dm_dirty_log *log,  	pkg.i = (int64_t)in_sync;  	r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, -				 (char *)&pkg, sizeof(pkg), -				 NULL, NULL); +				 (char *)&pkg, sizeof(pkg), NULL, NULL);  	/*  	 * It would be nice to be able to report failures. @@ -545,8 +769,7 @@ static region_t userspace_get_sync_count(struct dm_dirty_log *log)  	rdata_size = sizeof(sync_count);  	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, -				 NULL, 0, -				 (char *)&sync_count, &rdata_size); +				 NULL, 0, (char *)&sync_count, &rdata_size);  	if (r)  		return 0; @@ -573,8 +796,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,  	switch (status_type) {  	case STATUSTYPE_INFO:  		r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, -					 NULL, 0, -					 result, &sz); +					 NULL, 0, result, &sz);  		if (r) {  			sz = 0; @@ -587,8 +809,10 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,  		BUG_ON(!table_args); /* There will always be a ' ' */  		table_args++; -		DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, -		       lc->uuid, table_args); +		DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid); +		if (lc->integrated_flush) +			DMEMIT("integrated_flush "); +		DMEMIT("%s ", table_args);  		break;  	}  	return (r) ? 0 : (int)sz; @@ -684,7 +908,7 @@ static int __init userspace_dirty_log_init(void)  		return r;  	} -	DMINFO("version 1.0.0 loaded"); +	DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");  	return 0;  } @@ -694,7 +918,7 @@ static void __exit userspace_dirty_log_exit(void)  	dm_ulog_tfr_exit();  	mempool_destroy(flush_entry_pool); -	DMINFO("version 1.0.0 unloaded"); +	DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");  	return;  }  | 
