diff options
Diffstat (limited to 'crypto/async_tx')
| -rw-r--r-- | crypto/async_tx/Kconfig | 4 | ||||
| -rw-r--r-- | crypto/async_tx/Makefile | 1 | ||||
| -rw-r--r-- | crypto/async_tx/async_memcpy.c | 36 | ||||
| -rw-r--r-- | crypto/async_tx/async_memset.c | 88 | ||||
| -rw-r--r-- | crypto/async_tx/async_pq.c | 175 | ||||
| -rw-r--r-- | crypto/async_tx/async_raid6_recov.c | 62 | ||||
| -rw-r--r-- | crypto/async_tx/async_tx.c | 10 | ||||
| -rw-r--r-- | crypto/async_tx/async_xor.c | 130 | ||||
| -rw-r--r-- | crypto/async_tx/raid6test.c | 21 | 
9 files changed, 256 insertions, 271 deletions
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index 1b11abbb5c9..f38a58aef3e 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -10,10 +10,6 @@ config ASYNC_XOR  	select ASYNC_CORE  	select XOR_BLOCKS -config ASYNC_MEMSET -	tristate -	select ASYNC_CORE -  config ASYNC_PQ  	tristate  	select ASYNC_CORE diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index d1e0e6f72bc..462e4abbfe6 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -1,6 +1,5 @@  obj-$(CONFIG_ASYNC_CORE) += async_tx.o  obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o -obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o  obj-$(CONFIG_ASYNC_XOR) += async_xor.o  obj-$(CONFIG_ASYNC_PQ) += async_pq.o  obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 518c22bd956..f8c0b8dbeb7 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -25,6 +25,7 @@   */  #include <linux/kernel.h>  #include <linux/highmem.h> +#include <linux/module.h>  #include <linux/mm.h>  #include <linux/dma-mapping.h>  #include <linux/async_tx.h> @@ -49,27 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,  						      &dest, 1, &src, 1, len);  	struct dma_device *device = chan ? chan->device : NULL;  	struct dma_async_tx_descriptor *tx = NULL; +	struct dmaengine_unmap_data *unmap = NULL; -	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { -		dma_addr_t dma_dest, dma_src; +	if (device) +		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO); + +	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {  		unsigned long dma_prep_flags = 0;  		if (submit->cb_fn)  			dma_prep_flags |= DMA_PREP_INTERRUPT;  		if (submit->flags & ASYNC_TX_FENCE)  			dma_prep_flags |= DMA_PREP_FENCE; -		dma_dest = dma_map_page(device->dev, dest, dest_offset, len, -					DMA_FROM_DEVICE); -		dma_src = dma_map_page(device->dev, src, src_offset, len, -				       DMA_TO_DEVICE); +		unmap->to_cnt = 1; +		unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len, +					      DMA_TO_DEVICE); +		unmap->from_cnt = 1; +		unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len, +					      DMA_FROM_DEVICE); +		unmap->len = len; -		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src, -						    len, dma_prep_flags); +		tx = device->device_prep_dma_memcpy(chan, unmap->addr[1], +						    unmap->addr[0], len, +						    dma_prep_flags);  	}  	if (tx) {  		pr_debug("%s: (async) len: %zu\n", __func__, len); + +		dma_set_unmap(tx, unmap);  		async_tx_submit(chan, tx, submit);  	} else {  		void *dest_buf, *src_buf; @@ -78,17 +88,19 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,  		/* wait for any prerequisite operations */  		async_tx_quiesce(&submit->depend_tx); -		dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; -		src_buf = kmap_atomic(src, KM_USER1) + src_offset; +		dest_buf = kmap_atomic(dest) + dest_offset; +		src_buf = kmap_atomic(src) + src_offset;  		memcpy(dest_buf, src_buf, len); -		kunmap_atomic(src_buf, KM_USER1); -		kunmap_atomic(dest_buf, KM_USER0); +		kunmap_atomic(src_buf); +		kunmap_atomic(dest_buf);  		async_tx_sync_epilog(submit);  	} +	dmaengine_unmap_put(unmap); +  	return tx;  }  EXPORT_SYMBOL_GPL(async_memcpy); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c deleted file mode 100644 index 58e4a8752ae..00000000000 --- a/crypto/async_tx/async_memset.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * memory fill offload engine support - * - * Copyright © 2006, Intel Corporation. - * - *      Dan Williams <dan.j.williams@intel.com> - * - *      with architecture considerations by: - *      Neil Brown <neilb@suse.de> - *      Jeff Garzik <jeff@garzik.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/dma-mapping.h> -#include <linux/async_tx.h> - -/** - * async_memset - attempt to fill memory with a dma engine. - * @dest: destination page - * @val: fill value - * @offset: offset in pages to start transaction - * @len: length in bytes - * - * honored flags: ASYNC_TX_ACK - */ -struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, size_t len, -	     struct async_submit_ctl *submit) -{ -	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, -						      &dest, 1, NULL, 0, len); -	struct dma_device *device = chan ? chan->device : NULL; -	struct dma_async_tx_descriptor *tx = NULL; - -	if (device && is_dma_fill_aligned(device, offset, 0, len)) { -		dma_addr_t dma_dest; -		unsigned long dma_prep_flags = 0; - -		if (submit->cb_fn) -			dma_prep_flags |= DMA_PREP_INTERRUPT; -		if (submit->flags & ASYNC_TX_FENCE) -			dma_prep_flags |= DMA_PREP_FENCE; -		dma_dest = dma_map_page(device->dev, dest, offset, len, -					DMA_FROM_DEVICE); - -		tx = device->device_prep_dma_memset(chan, dma_dest, val, len, -						    dma_prep_flags); -	} - -	if (tx) { -		pr_debug("%s: (async) len: %zu\n", __func__, len); -		async_tx_submit(chan, tx, submit); -	} else { /* run the memset synchronously */ -		void *dest_buf; -		pr_debug("%s: (sync) len: %zu\n", __func__, len); - -		dest_buf = page_address(dest) + offset; - -		/* wait for any prerequisite operations */ -		async_tx_quiesce(&submit->depend_tx); - -		memset(dest_buf, val, len); - -		async_tx_sync_epilog(submit); -	} - -	return tx; -} -EXPORT_SYMBOL_GPL(async_memset); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("asynchronous memset api"); -MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index fdd8257d35d..d05327caf69 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -21,6 +21,7 @@   */  #include <linux/kernel.h>  #include <linux/interrupt.h> +#include <linux/module.h>  #include <linux/dma-mapping.h>  #include <linux/raid/pq.h>  #include <linux/async_tx.h> @@ -45,49 +46,24 @@ static struct page *pq_scribble_page;   * do_async_gen_syndrome - asynchronously calculate P and/or Q   */  static __async_inline struct dma_async_tx_descriptor * -do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, -		      const unsigned char *scfs, unsigned int offset, int disks, -		      size_t len, dma_addr_t *dma_src, +do_async_gen_syndrome(struct dma_chan *chan, +		      const unsigned char *scfs, int disks, +		      struct dmaengine_unmap_data *unmap, +		      enum dma_ctrl_flags dma_flags,  		      struct async_submit_ctl *submit)  {  	struct dma_async_tx_descriptor *tx = NULL;  	struct dma_device *dma = chan->device; -	enum dma_ctrl_flags dma_flags = 0;  	enum async_tx_flags flags_orig = submit->flags;  	dma_async_tx_callback cb_fn_orig = submit->cb_fn;  	dma_async_tx_callback cb_param_orig = submit->cb_param;  	int src_cnt = disks - 2; -	unsigned char coefs[src_cnt];  	unsigned short pq_src_cnt;  	dma_addr_t dma_dest[2];  	int src_off = 0; -	int idx; -	int i; -	/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ -	if (P(blocks, disks)) -		dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, -					   len, DMA_BIDIRECTIONAL); -	else -		dma_flags |= DMA_PREP_PQ_DISABLE_P; -	if (Q(blocks, disks)) -		dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, -					   len, DMA_BIDIRECTIONAL); -	else -		dma_flags |= DMA_PREP_PQ_DISABLE_Q; - -	/* convert source addresses being careful to collapse 'empty' -	 * sources and update the coefficients accordingly -	 */ -	for (i = 0, idx = 0; i < src_cnt; i++) { -		if (blocks[i] == NULL) -			continue; -		dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, -					    DMA_TO_DEVICE); -		coefs[idx] = scfs[i]; -		idx++; -	} -	src_cnt = idx; +	if (submit->flags & ASYNC_TX_FENCE) +		dma_flags |= DMA_PREP_FENCE;  	while (src_cnt > 0) {  		submit->flags = flags_orig; @@ -99,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,  		if (src_cnt > pq_src_cnt) {  			submit->flags &= ~ASYNC_TX_ACK;  			submit->flags |= ASYNC_TX_FENCE; -			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;  			submit->cb_fn = NULL;  			submit->cb_param = NULL;  		} else { -			dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;  			submit->cb_fn = cb_fn_orig;  			submit->cb_param = cb_param_orig;  			if (cb_fn_orig)  				dma_flags |= DMA_PREP_INTERRUPT;  		} -		if (submit->flags & ASYNC_TX_FENCE) -			dma_flags |= DMA_PREP_FENCE; -		/* Since we have clobbered the src_list we are committed -		 * to doing this asynchronously.  Drivers force forward -		 * progress in case they can not provide a descriptor +		/* Drivers force forward progress in case they can not provide +		 * a descriptor  		 */  		for (;;) { +			dma_dest[0] = unmap->addr[disks - 2]; +			dma_dest[1] = unmap->addr[disks - 1];  			tx = dma->device_prep_dma_pq(chan, dma_dest, -						     &dma_src[src_off], +						     &unmap->addr[src_off],  						     pq_src_cnt, -						     &coefs[src_off], len, +						     &scfs[src_off], unmap->len,  						     dma_flags);  			if (likely(tx))  				break; @@ -128,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,  			dma_async_issue_pending(chan);  		} +		dma_set_unmap(tx, unmap);  		async_tx_submit(chan, tx, submit);  		submit->depend_tx = tx; @@ -187,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,   * set to NULL those buffers will be replaced with the raid6_zero_page   * in the synchronous path and omitted in the hardware-asynchronous   * path. - * - * 'blocks' note: if submit->scribble is NULL then the contents of - * 'blocks' may be overwritten to perform address conversions - * (dma_map_page() or page_address()).   */  struct dma_async_tx_descriptor *  async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, @@ -201,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,  						      &P(blocks, disks), 2,  						      blocks, src_cnt, len);  	struct dma_device *device = chan ? chan->device : NULL; -	dma_addr_t *dma_src = NULL; +	struct dmaengine_unmap_data *unmap = NULL;  	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); -	if (submit->scribble) -		dma_src = submit->scribble; -	else if (sizeof(dma_addr_t) <= sizeof(struct page *)) -		dma_src = (dma_addr_t *) blocks; +	if (device) +		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); -	if (dma_src && device && +	if (unmap &&  	    (src_cnt <= dma_maxpq(device, 0) ||  	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&  	    is_dma_pq_aligned(device, offset, 0, len)) { +		struct dma_async_tx_descriptor *tx; +		enum dma_ctrl_flags dma_flags = 0; +		unsigned char coefs[src_cnt]; +		int i, j; +  		/* run the p+q asynchronously */  		pr_debug("%s: (async) disks: %d len: %zu\n",  			 __func__, disks, len); -		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, -					     disks, len, dma_src, submit); + +		/* convert source addresses being careful to collapse 'empty' +		 * sources and update the coefficients accordingly +		 */ +		unmap->len = len; +		for (i = 0, j = 0; i < src_cnt; i++) { +			if (blocks[i] == NULL) +				continue; +			unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset, +						      len, DMA_TO_DEVICE); +			coefs[j] = raid6_gfexp[i]; +			unmap->to_cnt++; +			j++; +		} + +		/* +		 * DMAs use destinations as sources, +		 * so use BIDIRECTIONAL mapping +		 */ +		unmap->bidi_cnt++; +		if (P(blocks, disks)) +			unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks), +							offset, len, DMA_BIDIRECTIONAL); +		else { +			unmap->addr[j++] = 0; +			dma_flags |= DMA_PREP_PQ_DISABLE_P; +		} + +		unmap->bidi_cnt++; +		if (Q(blocks, disks)) +			unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks), +						       offset, len, DMA_BIDIRECTIONAL); +		else { +			unmap->addr[j++] = 0; +			dma_flags |= DMA_PREP_PQ_DISABLE_Q; +		} + +		tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit); +		dmaengine_unmap_put(unmap); +		return tx;  	} +	dmaengine_unmap_put(unmap); +  	/* run the pq synchronously */  	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); @@ -276,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,  	struct dma_async_tx_descriptor *tx;  	unsigned char coefs[disks-2];  	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; -	dma_addr_t *dma_src = NULL; -	int src_cnt = 0; +	struct dmaengine_unmap_data *unmap = NULL;  	BUG_ON(disks < 4); -	if (submit->scribble) -		dma_src = submit->scribble; -	else if (sizeof(dma_addr_t) <= sizeof(struct page *)) -		dma_src = (dma_addr_t *) blocks; +	if (device) +		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); -	if (dma_src && device && disks <= dma_maxpq(device, 0) && +	if (unmap && disks <= dma_maxpq(device, 0) &&  	    is_dma_pq_aligned(device, offset, 0, len)) {  		struct device *dev = device->dev; -		dma_addr_t *pq = &dma_src[disks-2]; -		int i; +		dma_addr_t pq[2]; +		int i, j = 0, src_cnt = 0;  		pr_debug("%s: (async) disks: %d len: %zu\n",  			 __func__, disks, len); -		if (!P(blocks, disks)) + +		unmap->len = len; +		for (i = 0; i < disks-2; i++) +			if (likely(blocks[i])) { +				unmap->addr[j] = dma_map_page(dev, blocks[i], +							      offset, len, +							      DMA_TO_DEVICE); +				coefs[j] = raid6_gfexp[i]; +				unmap->to_cnt++; +				src_cnt++; +				j++; +			} + +		if (!P(blocks, disks)) { +			pq[0] = 0;  			dma_flags |= DMA_PREP_PQ_DISABLE_P; -		else +		} else {  			pq[0] = dma_map_page(dev, P(blocks, disks),  					     offset, len,  					     DMA_TO_DEVICE); -		if (!Q(blocks, disks)) +			unmap->addr[j++] = pq[0]; +			unmap->to_cnt++; +		} +		if (!Q(blocks, disks)) { +			pq[1] = 0;  			dma_flags |= DMA_PREP_PQ_DISABLE_Q; -		else +		} else {  			pq[1] = dma_map_page(dev, Q(blocks, disks),  					     offset, len,  					     DMA_TO_DEVICE); +			unmap->addr[j++] = pq[1]; +			unmap->to_cnt++; +		}  		if (submit->flags & ASYNC_TX_FENCE)  			dma_flags |= DMA_PREP_FENCE; -		for (i = 0; i < disks-2; i++) -			if (likely(blocks[i])) { -				dma_src[src_cnt] = dma_map_page(dev, blocks[i], -								offset, len, -								DMA_TO_DEVICE); -				coefs[src_cnt] = raid6_gfexp[i]; -				src_cnt++; -			} -  		for (;;) { -			tx = device->device_prep_dma_pq_val(chan, pq, dma_src, +			tx = device->device_prep_dma_pq_val(chan, pq, +							    unmap->addr,  							    src_cnt,  							    coefs,  							    len, pqres, @@ -329,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,  			async_tx_quiesce(&submit->depend_tx);  			dma_async_issue_pending(chan);  		} + +		dma_set_unmap(tx, unmap);  		async_tx_submit(chan, tx, submit);  		return tx; diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index ce038d861eb..934a8498149 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -22,9 +22,11 @@   */  #include <linux/kernel.h>  #include <linux/interrupt.h> +#include <linux/module.h>  #include <linux/dma-mapping.h>  #include <linux/raid/pq.h>  #include <linux/async_tx.h> +#include <linux/dmaengine.h>  static struct dma_async_tx_descriptor *  async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, @@ -33,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,  	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,  						      &dest, 1, srcs, 2, len);  	struct dma_device *dma = chan ? chan->device : NULL; +	struct dmaengine_unmap_data *unmap = NULL;  	const u8 *amul, *bmul;  	u8 ax, bx;  	u8 *a, *b, *c; -	if (dma) { -		dma_addr_t dma_dest[2]; -		dma_addr_t dma_src[2]; +	if (dma) +		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO); + +	if (unmap) {  		struct device *dev = dma->dev; +		dma_addr_t pq[2];  		struct dma_async_tx_descriptor *tx;  		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;  		if (submit->flags & ASYNC_TX_FENCE)  			dma_flags |= DMA_PREP_FENCE; -		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); -		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); -		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); -		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, +		unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); +		unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); +		unmap->to_cnt = 2; + +		unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); +		unmap->bidi_cnt = 1; +		/* engine only looks at Q, but expects it to follow P */ +		pq[1] = unmap->addr[2]; + +		unmap->len = len; +		tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,  					     len, dma_flags);  		if (tx) { +			dma_set_unmap(tx, unmap);  			async_tx_submit(chan, tx, submit); +			dmaengine_unmap_put(unmap);  			return tx;  		}  		/* could not get a descriptor, unmap and fall through to  		 * the synchronous path  		 */ -		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); -		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); -		dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); +		dmaengine_unmap_put(unmap);  	}  	/* run the operation synchronously */ @@ -88,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,  	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,  						      &dest, 1, &src, 1, len);  	struct dma_device *dma = chan ? chan->device : NULL; +	struct dmaengine_unmap_data *unmap = NULL;  	const u8 *qmul; /* Q multiplier table */  	u8 *d, *s; -	if (dma) { +	if (dma) +		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO); + +	if (unmap) {  		dma_addr_t dma_dest[2]; -		dma_addr_t dma_src[1];  		struct device *dev = dma->dev;  		struct dma_async_tx_descriptor *tx;  		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;  		if (submit->flags & ASYNC_TX_FENCE)  			dma_flags |= DMA_PREP_FENCE; -		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); -		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); -		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, -					     len, dma_flags); +		unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); +		unmap->to_cnt++; +		unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); +		dma_dest[1] = unmap->addr[1]; +		unmap->bidi_cnt++; +		unmap->len = len; + +		/* this looks funny, but the engine looks for Q at +		 * dma_dest[1] and ignores dma_dest[0] as a dest +		 * due to DMA_PREP_PQ_DISABLE_P +		 */ +		tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr, +					     1, &coef, len, dma_flags); +  		if (tx) { +			dma_set_unmap(tx, unmap); +			dmaengine_unmap_put(unmap);  			async_tx_submit(chan, tx, submit);  			return tx;  		} @@ -112,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,  		/* could not get a descriptor, unmap and fall through to  		 * the synchronous path  		 */ -		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); -		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); +		dmaengine_unmap_put(unmap);  	}  	/* no channel available, or failed to allocate a descriptor, so diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 7f2c00a4520..39ea4791a3c 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -24,6 +24,7 @@   *   */  #include <linux/rculist.h> +#include <linux/module.h>  #include <linux/kernel.h>  #include <linux/async_tx.h> @@ -127,8 +128,8 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,  		}  		device->device_issue_pending(chan);  	} else { -		if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) -			panic("%s: DMA_ERROR waiting for depend_tx\n", +		if (dma_wait_for_async_tx(depend_tx) != DMA_COMPLETE) +			panic("%s: DMA error waiting for depend_tx\n",  			      __func__);  		tx->tx_submit(tx);  	} @@ -279,8 +280,9 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)  		 * we are referring to the correct operation  		 */  		BUG_ON(async_tx_test_ack(*tx)); -		if (dma_wait_for_async_tx(*tx) == DMA_ERROR) -			panic("DMA_ERROR waiting for transaction\n"); +		if (dma_wait_for_async_tx(*tx) != DMA_COMPLETE) +			panic("%s: DMA error waiting for transaction\n", +			      __func__);  		async_tx_ack(*tx);  		*tx = NULL;  	} diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 079ae8ca590..3c562f5a60b 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -25,6 +25,7 @@   */  #include <linux/kernel.h>  #include <linux/interrupt.h> +#include <linux/module.h>  #include <linux/mm.h>  #include <linux/dma-mapping.h>  #include <linux/raid/xor.h> @@ -32,48 +33,31 @@  /* do_async_xor - dma map the pages and perform the xor with an engine */  static __async_inline struct dma_async_tx_descriptor * -do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, -	     unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, +do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,  	     struct async_submit_ctl *submit)  {  	struct dma_device *dma = chan->device;  	struct dma_async_tx_descriptor *tx = NULL; -	int src_off = 0; -	int i;  	dma_async_tx_callback cb_fn_orig = submit->cb_fn;  	void *cb_param_orig = submit->cb_param;  	enum async_tx_flags flags_orig = submit->flags; -	enum dma_ctrl_flags dma_flags; -	int xor_src_cnt = 0; -	dma_addr_t dma_dest; - -	/* map the dest bidrectional in case it is re-used as a source */ -	dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL); -	for (i = 0; i < src_cnt; i++) { -		/* only map the dest once */ -		if (!src_list[i]) -			continue; -		if (unlikely(src_list[i] == dest)) { -			dma_src[xor_src_cnt++] = dma_dest; -			continue; -		} -		dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset, -						      len, DMA_TO_DEVICE); -	} -	src_cnt = xor_src_cnt; +	enum dma_ctrl_flags dma_flags = 0; +	int src_cnt = unmap->to_cnt; +	int xor_src_cnt; +	dma_addr_t dma_dest = unmap->addr[unmap->to_cnt]; +	dma_addr_t *src_list = unmap->addr;  	while (src_cnt) { +		dma_addr_t tmp; +  		submit->flags = flags_orig; -		dma_flags = 0;  		xor_src_cnt = min(src_cnt, (int)dma->max_xor); -		/* if we are submitting additional xors, leave the chain open, -		 * clear the callback parameters, and leave the destination -		 * buffer mapped +		/* if we are submitting additional xors, leave the chain open +		 * and clear the callback parameters  		 */  		if (src_cnt > xor_src_cnt) {  			submit->flags &= ~ASYNC_TX_ACK;  			submit->flags |= ASYNC_TX_FENCE; -			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;  			submit->cb_fn = NULL;  			submit->cb_param = NULL;  		} else { @@ -84,36 +68,41 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,  			dma_flags |= DMA_PREP_INTERRUPT;  		if (submit->flags & ASYNC_TX_FENCE)  			dma_flags |= DMA_PREP_FENCE; -		/* Since we have clobbered the src_list we are committed -		 * to doing this asynchronously.  Drivers force forward progress -		 * in case they can not provide a descriptor + +		/* Drivers force forward progress in case they can not provide a +		 * descriptor  		 */ -		tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], -					      xor_src_cnt, len, dma_flags); +		tmp = src_list[0]; +		if (src_list > unmap->addr) +			src_list[0] = dma_dest; +		tx = dma->device_prep_dma_xor(chan, dma_dest, src_list, +					      xor_src_cnt, unmap->len, +					      dma_flags); +		src_list[0] = tmp; +  		if (unlikely(!tx))  			async_tx_quiesce(&submit->depend_tx); -		/* spin wait for the preceeding transactions to complete */ +		/* spin wait for the preceding transactions to complete */  		while (unlikely(!tx)) {  			dma_async_issue_pending(chan);  			tx = dma->device_prep_dma_xor(chan, dma_dest, -						      &dma_src[src_off], -						      xor_src_cnt, len, +						      src_list, +						      xor_src_cnt, unmap->len,  						      dma_flags);  		} +		dma_set_unmap(tx, unmap);  		async_tx_submit(chan, tx, submit);  		submit->depend_tx = tx;  		if (src_cnt > xor_src_cnt) {  			/* drop completed sources */  			src_cnt -= xor_src_cnt; -			src_off += xor_src_cnt; -  			/* use the intermediate result a source */ -			dma_src[--src_off] = dma_dest;  			src_cnt++; +			src_list += xor_src_cnt - 1;  		} else  			break;  	} @@ -188,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,  	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,  						      &dest, 1, src_list,  						      src_cnt, len); -	dma_addr_t *dma_src = NULL; +	struct dma_device *device = chan ? chan->device : NULL; +	struct dmaengine_unmap_data *unmap = NULL;  	BUG_ON(src_cnt <= 1); -	if (submit->scribble) -		dma_src = submit->scribble; -	else if (sizeof(dma_addr_t) <= sizeof(struct page *)) -		dma_src = (dma_addr_t *) src_list; +	if (device) +		unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO); + +	if (unmap && is_dma_xor_aligned(device, offset, 0, len)) { +		struct dma_async_tx_descriptor *tx; +		int i, j; -	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {  		/* run the xor asynchronously */  		pr_debug("%s (async): len: %zu\n", __func__, len); -		return do_async_xor(chan, dest, src_list, offset, src_cnt, len, -				    dma_src, submit); +		unmap->len = len; +		for (i = 0, j = 0; i < src_cnt; i++) { +			if (!src_list[i]) +				continue; +			unmap->to_cnt++; +			unmap->addr[j++] = dma_map_page(device->dev, src_list[i], +							offset, len, DMA_TO_DEVICE); +		} + +		/* map it bidirectional as it may be re-used as a source */ +		unmap->addr[j] = dma_map_page(device->dev, dest, offset, len, +					      DMA_BIDIRECTIONAL); +		unmap->bidi_cnt = 1; + +		tx = do_async_xor(chan, unmap, submit); +		dmaengine_unmap_put(unmap); +		return tx;  	} else { +		dmaengine_unmap_put(unmap);  		/* run the xor synchronously */  		pr_debug("%s (sync): len: %zu\n", __func__, len);  		WARN_ONCE(chan, "%s: no space for dma address conversion\n", @@ -229,9 +236,7 @@ EXPORT_SYMBOL_GPL(async_xor);  static int page_is_zero(struct page *p, unsigned int offset, size_t len)  { -	char *a = page_address(p) + offset; -	return ((*(u32 *) a) == 0 && -		memcmp(a, a + 4, len - 4) == 0); +	return !memchr_inv(page_address(p) + offset, 0, len);  }  static inline struct dma_chan * @@ -269,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,  	struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);  	struct dma_device *device = chan ? chan->device : NULL;  	struct dma_async_tx_descriptor *tx = NULL; -	dma_addr_t *dma_src = NULL; +	struct dmaengine_unmap_data *unmap = NULL;  	BUG_ON(src_cnt <= 1); -	if (submit->scribble) -		dma_src = submit->scribble; -	else if (sizeof(dma_addr_t) <= sizeof(struct page *)) -		dma_src = (dma_addr_t *) src_list; +	if (device) +		unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO); -	if (dma_src && device && src_cnt <= device->max_xor && +	if (unmap && src_cnt <= device->max_xor &&  	    is_dma_xor_aligned(device, offset, 0, len)) {  		unsigned long dma_prep_flags = 0;  		int i; @@ -289,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,  			dma_prep_flags |= DMA_PREP_INTERRUPT;  		if (submit->flags & ASYNC_TX_FENCE)  			dma_prep_flags |= DMA_PREP_FENCE; -		for (i = 0; i < src_cnt; i++) -			dma_src[i] = dma_map_page(device->dev, src_list[i], -						  offset, len, DMA_TO_DEVICE); -		tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, +		for (i = 0; i < src_cnt; i++) { +			unmap->addr[i] = dma_map_page(device->dev, src_list[i], +						      offset, len, DMA_TO_DEVICE); +			unmap->to_cnt++; +		} +		unmap->len = len; + +		tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,  						     len, result,  						     dma_prep_flags);  		if (unlikely(!tx)) { @@ -302,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,  			while (!tx) {  				dma_async_issue_pending(chan);  				tx = device->device_prep_dma_xor_val(chan, -					dma_src, src_cnt, len, result, +					unmap->addr, src_cnt, len, result,  					dma_prep_flags);  			}  		} - +		dma_set_unmap(tx, unmap);  		async_tx_submit(chan, tx, submit);  	} else {  		enum async_tx_flags flags_orig = submit->flags; @@ -328,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,  		async_tx_sync_epilog(submit);  		submit->flags = flags_orig;  	} +	dmaengine_unmap_put(unmap);  	return tx;  } diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index c1321935ebc..dad95f45b88 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -21,12 +21,14 @@   */  #include <linux/async_tx.h>  #include <linux/gfp.h> +#include <linux/mm.h>  #include <linux/random.h> +#include <linux/module.h>  #undef pr  #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) -#define NDISKS 16 /* Including P and Q */ +#define NDISKS 64 /* Including P and Q */  static struct page *dataptrs[NDISKS];  static addr_conv_t addr_conv[NDISKS]; @@ -44,15 +46,10 @@ static void callback(void *param)  static void makedata(int disks)  { -	int i, j; +	int i;  	for (i = 0; i < disks; i++) { -		for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { -			u32 *p = page_address(data[i]) + j; - -			*p = random32(); -		} - +		prandom_bytes(page_address(data[i]), PAGE_SIZE);  		dataptrs[i] = data[i];  	}  } @@ -222,6 +219,14 @@ static int raid6_test(void)  		err += test(11, &tests);  		err += test(12, &tests);  	} + +	/* the 24 disk case is special for ioatdma as it is the boudary point +	 * at which it needs to switch from 8-source ops to 16-source +	 * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set) +	 */ +	if (NDISKS > 24) +		err += test(24, &tests); +  	err += test(NDISKS, &tests);  	pr("\n");  | 
