aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Bellinger <nab@daterainc.com>2013-08-19 15:20:28 -0700
committerNicholas Bellinger <nab@linux-iscsi.org>2013-09-10 16:41:59 -0700
commit68ff9b9b27525cdaaea81890456f65aed5ce0b70 (patch)
tree80219e09e0c9d42b4ae61987075d9d03624695fc
parent0123a9ec6a4fea20d5afea90c9b47fb73fb1bc34 (diff)
target: Add support for COMPARE_AND_WRITE emulation
This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
-rw-r--r--drivers/target/target_core_device.c1
-rw-r--r--drivers/target/target_core_sbc.c197
-rw-r--r--include/target/target_core_base.h1
3 files changed, 198 insertions, 1 deletions
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 0b5f86806f1..f3dc1f5fc41 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1413,6 +1413,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
spin_lock_init(&dev->se_port_lock);
spin_lock_init(&dev->se_tmr_lock);
spin_lock_init(&dev->qf_cmd_lock);
+ sema_init(&dev->caw_sem, 1);
atomic_set(&dev->dev_ordered_id, 0);
INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list);
spin_lock_init(&dev->t10_wwn.t10_vpd_lock);
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 5569b369710..dfb6603eab4 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -25,6 +25,7 @@
#include <linux/ratelimit.h>
#include <asm/unaligned.h>
#include <scsi/scsi.h>
+#include <scsi/scsi_tcq.h>
#include <target/target_core_base.h>
#include <target/target_core_backend.h>
@@ -344,6 +345,177 @@ sbc_execute_rw(struct se_cmd *cmd)
cmd->data_direction);
}
+static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
+{
+ struct se_device *dev = cmd->se_dev;
+
+ cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
+ /*
+ * Unlock ->caw_sem originally obtained during sbc_compare_and_write()
+ * before the original READ I/O submission.
+ */
+ up(&dev->caw_sem);
+
+ return TCM_NO_SENSE;
+}
+
+static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
+{
+ struct se_device *dev = cmd->se_dev;
+ struct scatterlist *write_sg = NULL, *sg;
+ unsigned char *buf, *addr;
+ struct sg_mapping_iter m;
+ unsigned int offset = 0, len;
+ unsigned int nlbas = cmd->t_task_nolb;
+ unsigned int block_size = dev->dev_attrib.block_size;
+ unsigned int compare_len = (nlbas * block_size);
+ sense_reason_t ret = TCM_NO_SENSE;
+ int rc, i;
+
+ buf = kzalloc(cmd->data_length, GFP_KERNEL);
+ if (!buf) {
+ pr_err("Unable to allocate compare_and_write buf\n");
+ return TCM_OUT_OF_RESOURCES;
+ }
+
+ write_sg = kzalloc(sizeof(struct scatterlist) * cmd->t_data_nents,
+ GFP_KERNEL);
+ if (!write_sg) {
+ pr_err("Unable to allocate compare_and_write sg\n");
+ ret = TCM_OUT_OF_RESOURCES;
+ goto out;
+ }
+ /*
+ * Setup verify and write data payloads from total NumberLBAs.
+ */
+ rc = sg_copy_to_buffer(cmd->t_data_sg, cmd->t_data_nents, buf,
+ cmd->data_length);
+ if (!rc) {
+ pr_err("sg_copy_to_buffer() failed for compare_and_write\n");
+ ret = TCM_OUT_OF_RESOURCES;
+ goto out;
+ }
+ /*
+ * Compare against SCSI READ payload against verify payload
+ */
+ for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, i) {
+ addr = (unsigned char *)kmap_atomic(sg_page(sg));
+ if (!addr) {
+ ret = TCM_OUT_OF_RESOURCES;
+ goto out;
+ }
+
+ len = min(sg->length, compare_len);
+
+ if (memcmp(addr, buf + offset, len)) {
+ pr_warn("Detected MISCOMPARE for addr: %p buf: %p\n",
+ addr, buf + offset);
+ kunmap_atomic(addr);
+ goto miscompare;
+ }
+ kunmap_atomic(addr);
+
+ offset += len;
+ compare_len -= len;
+ if (!compare_len)
+ break;
+ }
+
+ i = 0;
+ len = cmd->t_task_nolb * block_size;
+ sg_miter_start(&m, cmd->t_data_sg, cmd->t_data_nents, SG_MITER_TO_SG);
+ /*
+ * Currently assumes NoLB=1 and SGLs are PAGE_SIZE..
+ */
+ while (len) {
+ sg_miter_next(&m);
+
+ if (block_size < PAGE_SIZE) {
+ sg_set_page(&write_sg[i], m.page, block_size,
+ block_size);
+ } else {
+ sg_miter_next(&m);
+ sg_set_page(&write_sg[i], m.page, block_size,
+ 0);
+ }
+ len -= block_size;
+ i++;
+ }
+ sg_miter_stop(&m);
+ /*
+ * Save the original SGL + nents values before updating to new
+ * assignments, to be released in transport_free_pages() ->
+ * transport_reset_sgl_orig()
+ */
+ cmd->t_data_sg_orig = cmd->t_data_sg;
+ cmd->t_data_sg = write_sg;
+ cmd->t_data_nents_orig = cmd->t_data_nents;
+ cmd->t_data_nents = 1;
+
+ cmd->sam_task_attr = MSG_HEAD_TAG;
+ cmd->transport_complete_callback = compare_and_write_post;
+ /*
+ * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler
+ * for submitting the adjusted SGL to write instance user-data.
+ */
+ cmd->execute_cmd = sbc_execute_rw;
+
+ spin_lock_irq(&cmd->t_state_lock);
+ cmd->t_state = TRANSPORT_PROCESSING;
+ cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+ spin_unlock_irq(&cmd->t_state_lock);
+
+ __target_execute_cmd(cmd);
+
+ kfree(buf);
+ return ret;
+
+miscompare:
+ pr_warn("Target/%s: Send MISCOMPARE check condition and sense\n",
+ dev->transport->name);
+ ret = TCM_MISCOMPARE_VERIFY;
+out:
+ /*
+ * In the MISCOMPARE or failure case, unlock ->caw_sem obtained in
+ * sbc_compare_and_write() before the original READ I/O submission.
+ */
+ up(&dev->caw_sem);
+ kfree(write_sg);
+ kfree(buf);
+ return ret;
+}
+
+static sense_reason_t
+sbc_compare_and_write(struct se_cmd *cmd)
+{
+ struct se_device *dev = cmd->se_dev;
+ sense_reason_t ret;
+ int rc;
+ /*
+ * Submit the READ first for COMPARE_AND_WRITE to perform the
+ * comparision using SGLs at cmd->t_bidi_data_sg..
+ */
+ rc = down_interruptible(&dev->caw_sem);
+ if ((rc != 0) || signal_pending(current)) {
+ cmd->transport_complete_callback = NULL;
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ }
+
+ ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents,
+ DMA_FROM_DEVICE);
+ if (ret) {
+ cmd->transport_complete_callback = NULL;
+ up(&dev->caw_sem);
+ return ret;
+ }
+ /*
+ * Unlock of dev->caw_sem to occur in compare_and_write_callback()
+ * upon MISCOMPARE, or in compare_and_write_done() upon completion
+ * of WRITE instance user-data.
+ */
+ return TCM_NO_SENSE;
+}
+
sense_reason_t
sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
{
@@ -481,6 +653,28 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
}
break;
}
+ case COMPARE_AND_WRITE:
+ sectors = cdb[13];
+ /*
+ * Currently enforce COMPARE_AND_WRITE for a single sector
+ */
+ if (sectors > 1) {
+ pr_err("COMPARE_AND_WRITE contains NoLB: %u greater"
+ " than 1\n", sectors);
+ return TCM_INVALID_CDB_FIELD;
+ }
+ /*
+ * Double size because we have two buffers, note that
+ * zero is not an error..
+ */
+ size = 2 * sbc_get_size(cmd, sectors);
+ cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
+ cmd->t_task_nolb = sectors;
+ cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE;
+ cmd->execute_rw = ops->execute_rw;
+ cmd->execute_cmd = sbc_compare_and_write;
+ cmd->transport_complete_callback = compare_and_write_callback;
+ break;
case READ_CAPACITY:
size = READ_CAP_LEN;
cmd->execute_cmd = sbc_emulate_readcapacity;
@@ -620,7 +814,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
return TCM_ADDRESS_OUT_OF_RANGE;
}
- size = sbc_get_size(cmd, sectors);
+ if (!(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE))
+ size = sbc_get_size(cmd, sectors);
}
return target_cmd_size_check(cmd, size);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 61a4dc89070..b5d0c2496c7 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -672,6 +672,7 @@ struct se_device {
spinlock_t se_port_lock;
spinlock_t se_tmr_lock;
spinlock_t qf_cmd_lock;
+ struct semaphore caw_sem;
/* Used for legacy SPC-2 reservationsa */
struct se_node_acl *dev_reserved_node_acl;
/* Used for ALUA Logical Unit Group membership */