diff options
author | Akhil Bhansali <abhansali@stec-inc.com> | 2013-10-15 14:19:07 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-11-08 09:10:28 -0700 |
commit | e67f86b31ae5be8a88bec27b5ecb18dc2ffc9c56 (patch) | |
tree | 8e0cf5c5dd6a266edbce015ffc3e23b7c3e8cf37 /drivers/block | |
parent | 0317cd6de852a70e0374e7eb40a013072274386f (diff) |
Add support for sTec's pci-e flash card Kronos
Signed-off-by: Akhil Bhansali <abhansali@stec-inc.com>
Signed-off-by: Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Folded patch, contributions to clean up this driver from:
Jens Axboe
Dan Carpenter
Andrew Morton
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 10 | ||||
-rw-r--r-- | drivers/block/Makefile | 2 | ||||
-rw-r--r-- | drivers/block/skd_main.c | 5817 | ||||
-rw-r--r-- | drivers/block/skd_s1120.h | 354 |
4 files changed, 6183 insertions, 0 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index e07a5fd58ad..555aed0b50d 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -316,6 +316,16 @@ config BLK_DEV_NVME To compile this driver as a module, choose M here: the module will be called nvme. +config BLK_DEV_SKD + tristate "STEC S1120 Block Driver" + depends on PCI + depends on 64BIT + ---help--- + Saying Y or M here will enable support for the + STEC, Inc. S1120 PCIe SSD. + + Use device /dev/skd$N amd /dev/skd$Np$M. + config BLK_DEV_OSD tristate "OSD object-as-blkdev support" depends on SCSI_OSD_ULD diff --git a/drivers/block/Makefile b/drivers/block/Makefile index ca07399a8d9..f33b3669428 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o +obj-$(CONFIG_BLK_DEV_SKD) += skd.o obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o @@ -43,4 +44,5 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ nvme-y := nvme-core.o nvme-scsi.o +skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c new file mode 100644 index 00000000000..3110f68eced --- /dev/null +++ b/drivers/block/skd_main.c @@ -0,0 +1,5817 @@ +/* Copyright 2012 STEC, Inc. + * + * This file is licensed under the terms of the 3-clause + * BSD License (http://opensource.org/licenses/BSD-3-Clause) + * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html), + * at your option. Both licenses are also available in the LICENSE file + * distributed with this project. This file may not be copied, modified, + * or distributed except in accordance with those terms. + * Gordoni Waidhofer <gwaidhofer@stec-inc.com> + * Initial Driver Design! + * Thomas Swann <tswann@stec-inc.com> + * Interrupt handling. + * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com> + * biomode implementation. + * Akhil Bhansali <abhansali@stec-inc.com> + * Added support for DISCARD / FLUSH and FUA. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/blkdev.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/compiler.h> +#include <linux/workqueue.h> +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/time.h> +#include <linux/hdreg.h> +#include <linux/dma-mapping.h> +#include <linux/completion.h> +#include <linux/scatterlist.h> +#include <linux/version.h> +#include <linux/err.h> +#include <linux/scatterlist.h> +#include <linux/aer.h> +#include <linux/ctype.h> +#include <linux/wait.h> +#include <linux/uio.h> +#include <scsi/scsi.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi_tcq.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/sg.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <asm-generic/unaligned.h> + +#include "skd_s1120.h" + +static int skd_dbg_level; +static int skd_isr_comp_limit = 4; + +enum { + STEC_LINK_2_5GTS = 0, + STEC_LINK_5GTS = 1, + STEC_LINK_8GTS = 2, + STEC_LINK_UNKNOWN = 0xFF +}; + +enum { + SKD_FLUSH_INITIALIZER, + SKD_FLUSH_ZERO_SIZE_FIRST, + SKD_FLUSH_DATA_SECOND, +}; + +#define DPRINTK(skdev, fmt, args ...) \ + do { \ + if (unlikely((skdev)->dbg_level > 0)) { \ + pr_err("%s:%s:%d " fmt, (skdev)->name, \ + __func__, __LINE__, ## args); \ + } \ + } while (0) + +#define SKD_ASSERT(expr) \ + do { \ + if (unlikely(!(expr))) { \ + pr_err("Assertion failed! %s,%s,%s,line=%d\n", \ + # expr, __FILE__, __func__, __LINE__); \ + } \ + } while (0) + +#define VPRINTK(skdev, fmt, args ...) \ + do { \ + if (unlikely((skdev)->dbg_level > 1)) { \ + pr_err("%s:%s:%d " fmt, (skdev)->name, \ + __func__, __LINE__, ## args); \ + } \ + } while (0) + + +#define DRV_NAME "skd" +#define DRV_VERSION "2.2.1" +#define DRV_BUILD_ID "0260" +#define PFX DRV_NAME ": " +#define DRV_BIN_VERSION 0x100 +#define DRV_VER_COMPL "2.2.1." DRV_BUILD_ID + +MODULE_AUTHOR("bug-reports: support@stec-inc.com"); +MODULE_LICENSE("Dual BSD/GPL"); + +MODULE_DESCRIPTION("STEC s1120 PCIe SSD block/BIO driver (b" DRV_BUILD_ID ")"); +MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID); + +#define PCI_VENDOR_ID_STEC 0x1B39 +#define PCI_DEVICE_ID_S1120 0x0001 + +#define SKD_FUA_NV (1 << 1) +#define SKD_MINORS_PER_DEVICE 16 + +#define SKD_MAX_QUEUE_DEPTH 200u + +#define SKD_PAUSE_TIMEOUT (5 * 1000) + +#define SKD_N_FITMSG_BYTES (512u) + +#define SKD_N_SPECIAL_CONTEXT 32u +#define SKD_N_SPECIAL_FITMSG_BYTES (128u) + +/* SG elements are 32 bytes, so we can make this 4096 and still be under the + * 128KB limit. That allows 4096*4K = 16M xfer size + */ +#define SKD_N_SG_PER_REQ_DEFAULT 256u +#define SKD_N_SG_PER_SPECIAL 256u + +#define SKD_N_COMPLETION_ENTRY 256u +#define SKD_N_READ_CAP_BYTES (8u) + +#define SKD_N_INTERNAL_BYTES (512u) + +/* 5 bits of uniqifier, 0xF800 */ +#define SKD_ID_INCR (0x400) +#define SKD_ID_TABLE_MASK (3u << 8u) +#define SKD_ID_RW_REQUEST (0u << 8u) +#define SKD_ID_INTERNAL (1u << 8u) +#define SKD_ID_SPECIAL_REQUEST (2u << 8u) +#define SKD_ID_FIT_MSG (3u << 8u) +#define SKD_ID_SLOT_MASK 0x00FFu +#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu + +#define SKD_N_TIMEOUT_SLOT 4u +#define SKD_TIMEOUT_SLOT_MASK 3u + +#define SKD_N_MAX_SECTORS 2048u + +#define SKD_MAX_RETRIES 2u + +#define SKD_TIMER_SECONDS(seconds) (seconds) +#define SKD_TIMER_MINUTES(minutes) ((minutes) * (60)) + +#define INQ_STD_NBYTES 36 +#define SKD_DISCARD_CDB_LENGTH 24 + +enum skd_drvr_state { + SKD_DRVR_STATE_LOAD, + SKD_DRVR_STATE_IDLE, + SKD_DRVR_STATE_BUSY, + SKD_DRVR_STATE_STARTING, + SKD_DRVR_STATE_ONLINE, + SKD_DRVR_STATE_PAUSING, + SKD_DRVR_STATE_PAUSED, + SKD_DRVR_STATE_DRAINING_TIMEOUT, + SKD_DRVR_STATE_RESTARTING, + SKD_DRVR_STATE_RESUMING, + SKD_DRVR_STATE_STOPPING, + SKD_DRVR_STATE_FAULT, + SKD_DRVR_STATE_DISAPPEARED, + SKD_DRVR_STATE_PROTOCOL_MISMATCH, + SKD_DRVR_STATE_BUSY_ERASE, + SKD_DRVR_STATE_BUSY_SANITIZE, + SKD_DRVR_STATE_BUSY_IMMINENT, + SKD_DRVR_STATE_WAIT_BOOT, + SKD_DRVR_STATE_SYNCING, +}; + +#define SKD_WAIT_BOOT_TIMO SKD_TIMER_SECONDS(90u) +#define SKD_STARTING_TIMO SKD_TIMER_SECONDS(8u) +#define SKD_RESTARTING_TIMO SKD_TIMER_MINUTES(4u) +#define SKD_DRAINING_TIMO SKD_TIMER_SECONDS(6u) +#define SKD_BUSY_TIMO SKD_TIMER_MINUTES(20u) +#define SKD_STARTED_BUSY_TIMO SKD_TIMER_SECONDS(60u) +#define SKD_START_WAIT_SECONDS 90u + +enum skd_req_state { + SKD_REQ_STATE_IDLE, + SKD_REQ_STATE_SETUP, + SKD_REQ_STATE_BUSY, + SKD_REQ_STATE_COMPLETED, + SKD_REQ_STATE_TIMEOUT, + SKD_REQ_STATE_ABORTED, +}; + +enum skd_fit_msg_state { + SKD_MSG_STATE_IDLE, + SKD_MSG_STATE_BUSY, +}; + +enum skd_check_status_action { + SKD_CHECK_STATUS_REPORT_GOOD, + SKD_CHECK_STATUS_REPORT_SMART_ALERT, + SKD_CHECK_STATUS_REQUEUE_REQUEST, + SKD_CHECK_STATUS_REPORT_ERROR, + SKD_CHECK_STATUS_BUSY_IMMINENT, +}; + +struct skd_fitmsg_context { + enum skd_fit_msg_state state; + + struct skd_fitmsg_context *next; + + u32 id; + u16 outstanding; + + u32 length; + u32 offset; + + u8 *msg_buf; + dma_addr_t mb_dma_address; +}; + +struct skd_request_context { + enum skd_req_state state; + + struct skd_request_context *next; + + u16 id; + u32 fitmsg_id; + + struct request *req; + struct bio *bio; + unsigned long start_time; + u8 flush_cmd; + u8 discard_page; + + u32 timeout_stamp; + u8 sg_data_dir; + struct scatterlist *sg; + u32 n_sg; + u32 sg_byte_count; + + struct fit_sg_descriptor *sksg_list; + dma_addr_t sksg_dma_address; + + struct fit_completion_entry_v1 completion; + + struct fit_comp_error_info err_info; + +}; +#define SKD_DATA_DIR_HOST_TO_CARD 1 +#define SKD_DATA_DIR_CARD_TO_HOST 2 +#define SKD_DATA_DIR_NONE 3 /* especially for DISCARD requests. */ + +struct skd_special_context { + struct skd_request_context req; + + u8 orphaned; + + void *data_buf; + dma_addr_t db_dma_address; + + u8 *msg_buf; + dma_addr_t mb_dma_address; +}; + +struct skd_sg_io { + fmode_t mode; + void __user *argp; + + struct sg_io_hdr sg; + + u8 cdb[16]; + + u32 dxfer_len; + u32 iovcnt; + struct sg_iovec *iov; + struct sg_iovec no_iov_iov; + + struct skd_special_context *skspcl; +}; + +typedef enum skd_irq_type { + SKD_IRQ_LEGACY, + SKD_IRQ_MSI, + SKD_IRQ_MSIX +} skd_irq_type_t; + +#define SKD_MAX_BARS 2 + +struct skd_device { + volatile void __iomem *mem_map[SKD_MAX_BARS]; + resource_size_t mem_phys[SKD_MAX_BARS]; + u32 mem_size[SKD_MAX_BARS]; + + skd_irq_type_t irq_type; + u32 msix_count; + struct skd_msix_entry *msix_entries; + + struct pci_dev *pdev; + int pcie_error_reporting_is_enabled; + + spinlock_t lock; + struct gendisk *disk; + struct request_queue *queue; + struct device *class_dev; + int gendisk_on; + int sync_done; + + atomic_t device_count; + u32 devno; + u32 major; + char name[32]; + char isr_name[30]; + + enum skd_drvr_state state; + u32 drive_state; + + u32 in_flight; + u32 cur_max_queue_depth; + u32 queue_low_water_mark; + u32 dev_max_queue_depth; + + u32 num_fitmsg_context; + u32 num_req_context; + + u32 timeout_slot[SKD_N_TIMEOUT_SLOT]; + u32 timeout_stamp; + struct skd_fitmsg_context *skmsg_free_list; + struct skd_fitmsg_context *skmsg_table; + + struct skd_request_context *skreq_free_list; + struct skd_request_context *skreq_table; + + struct skd_special_context *skspcl_free_list; + struct skd_special_context *skspcl_table; + + struct skd_special_context internal_skspcl; + u32 read_cap_blocksize; + u32 read_cap_last_lba; + int read_cap_is_valid; + int inquiry_is_valid; + u8 inq_serial_num[13]; /*12 chars plus null term */ + u8 id_str[80]; /* holds a composite name (pci + sernum) */ + + u8 skcomp_cycle; + u32 skcomp_ix; + struct fit_completion_entry_v1 *skcomp_table; + struct fit_comp_error_info *skerr_table; + dma_addr_t cq_dma_address; + + wait_queue_head_t waitq; + + struct timer_list timer; + u32 timer_countdown; + u32 timer_substate; + + int n_special; + int sgs_per_request; + u32 last_mtd; + + u32 proto_ver; + + int dbg_level; + u32 connect_time_stamp; + int connect_retries; +#define SKD_MAX_CONNECT_RETRIES 16 + u32 drive_jiffies; + + u32 timo_slot; + + + struct work_struct completion_worker; + + struct bio_list bio_queue; + int queue_stopped; + + struct list_head flush_list; +}; + +#define SKD_FLUSH_JOB "skd-flush-jobs" +struct kmem_cache *skd_flush_slab; + +/* + * These commands hold "nonzero size FLUSH bios", + * which are enqueud in skdev->flush_list during + * completion of "zero size FLUSH commands". + * It will be active in biomode. + */ +struct skd_flush_cmd { + void *cmd; + struct list_head flist; +}; + +#define SKD_WRITEL(DEV, VAL, OFF) skd_reg_write32(DEV, VAL, OFF) +#define SKD_READL(DEV, OFF) skd_reg_read32(DEV, OFF) +#define SKD_WRITEQ(DEV, VAL, OFF) skd_reg_write64(DEV, VAL, OFF) + +static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset) +{ + u32 val; + + if (likely(skdev->dbg_level < 2)) + return readl(skdev->mem_map[1] + offset); + else { + barrier(); + val = readl(skdev->mem_map[1] + offset); + barrier(); + VPRINTK(skdev, "offset %x = %x\n", offset, val); + return val; + } + +} + +static inline void skd_reg_write32(struct skd_device *skdev, u32 val, + u32 offset) +{ + if (likely(skdev->dbg_level < 2)) { + writel(val, skdev->mem_map[1] + offset); + barrier(); + readl(skdev->mem_map[1] + offset); + barrier(); + } else { + barrier(); + writel(val, skdev->mem_map[1] + offset); + barrier(); + readl(skdev->mem_map[1] + offset); + barrier(); + VPRINTK(skdev, "offset %x = %x\n", offset, val); + } +} + +static inline void skd_reg_write64(struct skd_device *skdev, u64 val, + u32 offset) +{ + if (likely(skdev->dbg_level < 2)) { + writeq(val, skdev->mem_map[1] + offset); + barrier(); + readq(skdev->mem_map[1] + offset); + barrier(); + } else { + barrier(); + writeq(val, skdev->mem_map[1] + offset); + barrier(); + readq(skdev->mem_map[1] + offset); + barrier(); + VPRINTK(skdev, "offset %x = %016llx\n", offset, val); + } +} + + +#define SKD_IRQ_DEFAULT SKD_IRQ_MSI +static int skd_isr_type = SKD_IRQ_DEFAULT; + +module_param(skd_isr_type, int, 0444); +MODULE_PARM_DESC(skd_isr_type, "Interrupt type capability." + " (0==legacy, 1==MSI, 2==MSI-X, default==1)"); + +#define SKD_MAX_REQ_PER_MSG_DEFAULT 1 +static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT; + +module_param(skd_max_req_per_msg, int, 0444); +MODULE_PARM_DESC(skd_max_req_per_msg, + "Maximum SCSI requests packed in a single message." + " (1-14, default==1)"); + +#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64 +#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64" +static int skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT; + +module_param(skd_max_queue_depth, int, 0444); +MODULE_PARM_DESC(skd_max_queue_depth, + "Maximum SCSI requests issued to s1120." + " (1-200, default==" SKD_MAX_QUEUE_DEPTH_DEFAULT_STR ")"); + +static int skd_sgs_per_request = SKD_N_SG_PER_REQ_DEFAULT; +module_param(skd_sgs_per_request, int, 0444); +MODULE_PARM_DESC(skd_sgs_per_request, + "Maximum SG elements per block request." + " (1-4096, default==256)"); + +static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT; +module_param(skd_max_pass_thru, int, 0444); +MODULE_PARM_DESC(skd_max_pass_thru, + "Maximum SCSI pass-thru at a time." " (1-50, default==32)"); + +module_param(skd_dbg_level, int, 0444); +MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)"); + +module_param(skd_isr_comp_limit, int, 0444); +MODULE_PARM_DESC(skd_isr_comp_limit, "s1120 isr comp limit (0=none) default=4"); + +static int skd_bio; +module_param(skd_bio, int, 0444); +MODULE_PARM_DESC(skd_bio, + "Register as a bio device instead of block (0, 1) default=0"); + +/* Major device number dynamically assigned. */ +static u32 skd_major; + +static struct skd_device *skd_construct(struct pci_dev *pdev); +static void skd_destruct(struct skd_device *skdev); +static const struct block_device_operations skd_blockdev_ops; +static void skd_send_fitmsg(struct skd_device *skdev, + struct skd_fitmsg_context *skmsg); +static void skd_send_special_fitmsg(struct skd_device *skdev, + struct skd_special_context *skspcl); +static void skd_request_fn(struct request_queue *rq); +static void skd_end_request(struct skd_device *skdev, + struct skd_request_context *skreq, int error); +static int skd_preop_sg_list(struct skd_device *skdev, + struct skd_request_context *skreq); +static void skd_postop_sg_list(struct skd_device *skdev, + struct skd_request_context *skreq); + +static void skd_restart_device(struct skd_device *skdev); +static int skd_quiesce_dev(struct skd_device *skdev); +static int skd_unquiesce_dev(struct skd_device *skdev); +static void skd_release_special(struct skd_device *skdev, + struct skd_special_context *skspcl); +static void skd_disable_interrupts(struct skd_device *skdev); +static void skd_isr_fwstate(struct skd_device *skdev); +static void skd_recover_requests(struct skd_device *skdev, int requeue); +static void skd_soft_reset(struct skd_device *skdev); + +static const char *skd_name(struct skd_device *skdev); +const char *skd_drive_state_to_str(int state); +const char *skd_skdev_state_to_str(enum skd_drvr_state state); +static void skd_log_skdev(struct skd_device *skdev, const char *event); +static void skd_log_skmsg(struct skd_device *skdev, + struct skd_fitmsg_context *skmsg, const char *event); +static void skd_log_skreq(struct skd_device *skdev, + struct skd_request_context *skreq, const char *event); + +/* FLUSH FUA flag handling. */ +static int skd_flush_cmd_enqueue(struct skd_device *, void *); +static void *skd_flush_cmd_dequeue(struct skd_device *); + + +/* + ***************************************************************************** + * READ/WRITE REQUESTS + ***************************************************************************** + */ +static void skd_stop_queue(struct skd_device *skdev) +{ + if (!skd_bio) + blk_stop_queue(skdev->queue); + else + skdev->queue_stopped = 1; +} + +static void skd_unstop_queue(struct skd_device *skdev) +{ + if (!skd_bio) + queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue); + else + skdev->queue_stopped = 0; +} + +static void skd_start_queue(struct skd_device *skdev) +{ + if (!skd_bio) { + blk_start_queue(skdev->queue); + } else { + pr_err("(%s): Starting queue\n", skd_name(skdev)); + skdev->queue_stopped = 0; + skd_request_fn(skdev->queue); + } +} + +static int skd_queue_stopped(struct skd_device *skdev) +{ + if (!skd_bio) + return blk_queue_stopped(skdev->queue); + else + return skdev->queue_stopped; +} + +static void skd_fail_all_pending_blk(struct skd_device *skdev) +{ + struct request_queue *q = skdev->queue; + struct request *req; + + for (;; ) { + req = blk_peek_request(q); + if (req == NULL) + break; + blk_start_request(req); + __blk_end_request_all(req, -EIO); + } +} + +static void skd_fail_all_pending_bio(struct skd_device *skdev) +{ + struct bio *bio; + int error = -EIO; + + for (;; ) { + bio = bio_list_pop(&skdev->bio_queue); + + if (bio == NULL) + break; + + bio_endio(bio, error); + } +} + +static void skd_fail_all_pending(struct skd_device *skdev) +{ + if (!skd_bio) + skd_fail_all_pending_blk(skdev); + else + skd_fail_all_pending_bio(skdev); +} + +static void skd_make_request(struct request_queue *q, struct bio *bio) +{ + struct skd_device *skdev = q->queuedata; + unsigned long flags; + + spin_lock_irqsave(&skdev->lock, flags); + + bio_list_add(&skdev->bio_queue, bio); + skd_request_fn(skdev->queue); + + spin_unlock_irqrestore(&skdev->lock, flags); +} + +static void +skd_prep_rw_cdb(struct skd_scsi_request *scsi_req, + int data_dir, unsigned lba, + unsigned count) +{ + if (data_dir == READ) + scsi_req->cdb[0] = 0x28; + else + scsi_req->cdb[0] = 0x2a; + + scsi_req->cdb[1] = 0; + scsi_req->cdb[2] = (lba & 0xff000000) >> 24; + scsi_req->cdb[3] = (lba & 0xff0000) >> 16; + scsi_req->cdb[4] = (lba & 0xff00) >> 8; + scsi_req->cdb[5] = (lba & 0xff); + scsi_req->cdb[6] = 0; + scsi_req->cdb[7] = (count & 0xff00) >> 8; + scsi_req->cdb[8] = count & 0xff; + scsi_req->cdb[9] = 0; +} + +static void +skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req, + struct skd_request_context *skreq) +{ + skreq->flush_cmd = 1; + + scsi_req->cdb[0] = 0x35; + scsi_req->cdb[1] = 0; + scsi_req->cdb[2] = 0; + scsi_req->cdb[3] = 0; + scsi_req->cdb[4] = 0; + scsi_req->cdb[5] = 0; + scsi_req->cdb[6] = 0; + scsi_req->cdb[7] = 0; + scsi_req->cdb[8] = 0; + scsi_req->cdb[9] = 0; +} + +static void +skd_prep_discard_cdb(struct skd_scsi_request *scsi_req, + struct skd_request_context *skreq, + struct page *page, + u32 lba, u32 count) +{ + char *buf; + unsigned long len; + struct request *req; + + buf = page_address(page); + len = SKD_DISCARD_CDB_LENGTH; + + scsi_req->cdb[0] = UNMAP; + scsi_req->cdb[8] = len; + + put_unaligned_be16(6 + 16, &buf[0]); + put_unaligned_be16(16, &buf[2]); + put_unaligned_be64(lba, &buf[8]); + put_unaligned_be32(count, &buf[16]); + + if (!skd_bio) { + req = skreq->req; + blk_add_request_payload(req, page, len); + req->buffer = buf; + } else { + skreq->bio->bi_io_vec->bv_page = page; + skreq->bio->bi_io_vec->bv_offset = 0; + skreq->bio->bi_io_vec->bv_len = len; + + skreq->bio->bi_vcnt = 1; + skreq->bio->bi_phys_segments = 1; + } +} + +static void skd_request_fn_not_online(struct request_queue *q); + +static void skd_request_fn(struct request_queue *q) +{ + struct skd_device *skdev = q->queuedata; + struct skd_fitmsg_context *skmsg = NULL; + struct fit_msg_hdr *fmh = NULL; + struct skd_request_context *skreq; + struct request *req = NULL; + struct bio *bio = NULL; + struct skd_scsi_request *scsi_req; + struct page *page; + unsigned long io_flags; + int error; + u32 lba; + u32 count; + int data_dir; + u32 be_lba; + u32 be_count; + u64 be_dmaa; + u64 cmdctxt; + u32 timo_slot; + void *cmd_ptr; + int flush, fua; + + if (skdev->state != SKD_DRVR_STATE_ONLINE) { + skd_request_fn_not_online(q); + return; + } + + if (skd_queue_stopped(skdev)) { + if (skdev->skmsg_free_list == NULL || + skdev->skreq_free_list == NULL || + skdev->in_flight >= skdev->queue_low_water_mark) + /* There is still some kind of shortage */ + return; + + skd_unstop_queue(skdev); + } + + /* + * Stop conditions: + * - There are no more native requests + * - There are already the maximum number of requests in progress + * - There are no more skd_request_context entries + * - There are no more FIT msg buffers + */ + for (;; ) { + + flush = fua = 0; + + if (!skd_bio) { + req = blk_peek_request(q); + + /* Are there any native requests to start? */ + if (req == NULL) + break; + + lba = (u32)blk_rq_pos(req); + count = blk_rq_sectors(req); + data_dir = rq_data_dir(req); + io_flags = req->cmd_flags; + + if (io_flags & REQ_FLUSH) + flush++; + + if (io_flags & REQ_FUA) + fua++; + + VPRINTK(skdev, + "new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", + req, lba, lba, count, count, data_dir); + } else { + if (!list_empty(&skdev->flush_list)) { + /* Process data part of FLUSH request. */ + bio = (struct bio *)skd_flush_cmd_dequeue(skdev); + flush++; + VPRINTK(skdev, "processing FLUSH request with data.\n"); + } else { + /* peek at our bio queue */ + bio = bio_list_peek(&skdev->bio_queue); + } + + /* Are there any native requests to start? */ + if (bio == NULL) + break; + + lba = (u32)bio->bi_sector; + count = bio_sectors(bio); + data_dir = bio_data_dir(bio); + io_flags = bio->bi_rw; + + VPRINTK(skdev, + "new bio=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", + bio, lba, lba, count, count, data_dir); + + if (io_flags & REQ_FLUSH) + flush++; + + if (io_flags & REQ_FUA) + fua++; + } + + /* At this point we know there is a request + * (from our bio q or req q depending on the way + * the driver is built do checks for resources. + */ + + /* Are too many requets already in progress? */ + if (skdev->in_flight >= skdev->cur_max_queue_depth) { + VPRINTK(skdev, "qdepth %d, limit %d\n", + skdev->in_flight, skdev->cur_max_queue_depth); + break; + } + + /* Is a skd_request_context available? */ + skreq = skdev->skreq_free_list; + if (skreq == NULL) { + VPRINTK(skdev, "Out of req=%p\n", q); + break; + } + SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE); + SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0); + + /* Now we check to see if we can get a fit msg */ + if (skmsg == NULL) { + if (skdev->skmsg_free_list == NULL) { + VPRINTK(skdev, "Out of msg\n"); + break; + } + } + + skreq->flush_cmd = 0; + skreq->n_sg = 0; + skreq->sg_byte_count = 0; + skreq->discard_page = 0; + + /* + * OK to now dequeue request from either bio or q. + * + * At this point we are comitted to either start or reject + * the native request. Note that skd_request_context is + * available but is still at the head of the free list. + */ + if (!skd_bio) { + blk_start_request(req); + skreq->req = req; + skreq->fitmsg_id = 0; + } else { + if (unlikely(flush == SKD_FLUSH_DATA_SECOND)) { + skreq->bio = bio; + } else { + skreq->bio = bio_list_pop(&skdev->bio_queue); + SKD_ASSERT(skreq->bio == bio); + skreq->start_time = jiffies; + part_inc_in_flight(&skdev->disk->part0, + bio_data_dir(bio)); + } + + skreq->fitmsg_id = 0; + } + + /* Either a FIT msg is in progress or we have to start one. */ + if (skmsg == NULL) { + /* Are there any FIT msg buffers available? */ + skmsg = skdev->skmsg_free_list; + if (skmsg == NULL) { + VPRINTK(skdev, "Out of msg skdev=%p\n", skdev); + break; + } + SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE); + SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0); + + skdev->skmsg_free_list = skmsg->next; + + skmsg->state = SKD_MSG_STATE_BUSY; + skmsg->id += SKD_ID_INCR; + + /* Initialize the FIT msg header */ + fmh = (struct fit_msg_hdr *)skmsg->msg_buf; + memset(fmh, 0, sizeof(*fmh)); + fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT; + skmsg->length = sizeof(*fmh); + } + + skreq->fitmsg_id = skmsg->id; + + /* + * Note that a FIT msg may have just been started + * but contains no SoFIT requests yet. + */ + + /* + * Transcode the request, checking as we go. The outcome of + * the transcoding is represented by the error variable. + */ + cmd_ptr = &skmsg->msg_buf[skmsg->length]; + memset(cmd_ptr, 0, 32); + + be_lba = cpu_to_be32(lba); + be_count = cpu_to_be32(count); + be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address); + cmdctxt = skreq->id + SKD_ID_INCR; + + scsi_req = cmd_ptr; + scsi_req->hdr.tag = cmdctxt; + scsi_req->hdr.sg_list_dma_address = be_dmaa; + + if (data_dir == READ) + skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST; + else + skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD; + + if (io_flags & REQ_DISCARD) { + page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!page) { + pr_err("request_fn:Page allocation failed.\n"); + skd_end_request(skdev, skreq, -ENOMEM); + break; + } + skreq->discard_page = 1; + skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); + + } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { + skd_prep_zerosize_flush_cdb(scsi_req, skreq); + SKD_ASSERT(skreq->flush_cmd == 1); + + } else { + skd_prep_rw_cdb(scsi_req, data_dir, lba, count); + } + + if (fua) + scsi_req->cdb[1] |= SKD_FUA_NV; + + if ((!skd_bio && !req->bio) || + (skd_bio && flush == SKD_FLUSH_ZERO_SIZE_FIRST)) + goto skip_sg; + + error = skd_preop_sg_list(skdev, skreq); + + if (error != 0) { + /* + * Complete the native request with error. + * Note that the request context is still at the + * head of the free list, and that the SoFIT request + * was encoded into the FIT msg buffer but the FIT + * msg length has not been updated. In short, the + * only resource that has been allocated but might + * not be used is that the FIT msg could be empty. + */ + DPRINTK(skdev, "error Out\n"); + skd_end_request(skdev, skreq, error); + continue; + } + +skip_sg: + scsi_req->hdr.sg_list_len_bytes = + cpu_to_be32(skreq->sg_byte_count); + + /* Complete resource allocations. */ + skdev->skreq_free_list = skreq->next; + skreq->state = SKD_REQ_STATE_BUSY; + skreq->id += SKD_ID_INCR; + + skmsg->length += sizeof(struct skd_scsi_request); + fmh->num_protocol_cmds_coalesced++; + + /* + * Update the active request counts. + * Capture the timeout timestamp. + */ + skreq->timeout_stamp = skdev->timeout_stamp; + timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK; + skdev->timeout_slot[timo_slot]++; + skdev->in_flight++; + VPRINTK(skdev, "req=0x%x busy=%d\n", + skreq->id, skdev->in_flight); + + /* + * If the FIT msg buffer is full send it. + */ + if (skmsg->length >= SKD_N_FITMSG_BYTES || + fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) { + skd_send_fitmsg(skdev, skmsg); + skmsg = NULL; + fmh = NULL; + } + } + + /* + * Is a FIT msg in progress? If it is empty put the buffer back + * on the free list. If it is non-empty send what we got. + * This minimizes latency when there are fewer requests than + * what fits in a FIT msg. + */ + if (skmsg != NULL) { + /* Bigger than just a FIT msg header? */ + if (skmsg->length > sizeof(struct fit_msg_hdr)) { + VPRINTK(skdev, "sending msg=%p, len %d\n", + skmsg, skmsg->length); + skd_send_fitmsg(skdev, skmsg); + } else { + /* + * The FIT msg is empty. It means we got started + * on the msg, but the requests were rejected. + */ + skmsg->state = SKD_MSG_STATE_IDLE; + skmsg->id += SKD_ID_INCR; + skmsg->next = skdev->skmsg_free_list; + skdev->skmsg_free_list = skmsg; + } + skmsg = NULL; + fmh = NULL; + } + + /* + * If req is non-NULL it means there is something to do but + * we are out of a resource. + */ + if (((!skd_bio) && req) || + ((skd_bio) && bio_list_peek(&skdev->bio_queue))) + skd_stop_queue(skdev); +} + +static void skd_end_request_blk(struct skd_device *skdev, + struct skd_request_context *skreq, int error) +{ + struct request *req = skreq->req; + unsigned int io_flags = req->cmd_flags; + + if ((io_flags & REQ_DISCARD) && + (skreq->discard_page == 1)) { + VPRINTK(skdev, "skd_end_request_blk, free the page!"); + free_page((unsigned long)req->buffer); + req->buffer = NULL; + } + + if (unlikely(error)) { + struct request *req = skreq->req; + char *cmd = (rq_data_dir(req) == READ) ? "read" : "write"; + u32 lba = (u32)blk_rq_pos(req); + u32 count = blk_rq_sectors(req); + + pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n", + skd_name(skdev), cmd, lba, count, skreq->id); + } else + VPRINTK(skdev, "id=0x%x error=%d\n", skreq->id, error); + + __blk_end_request_all(skreq->req, error); +} + +static int skd_preop_sg_list_blk(struct skd_device *skdev, + struct skd_request_context *skreq) +{ + struct request *req = skreq->req; + int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD; + int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE; + struct scatterlist *sg = &skreq->sg[0]; + int n_sg; + int i; + + skreq->sg_byte_count = 0; + + /* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD || + skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */ + + n_sg = blk_rq_map_sg(skdev->queue, req, sg); + if (n_sg <= 0) + return -EINVAL; + + /* + * Map scatterlist to PCI bus addresses. + * Note PCI might change the number of entries. + */ + n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir); + if (n_sg <= 0) + return -EINVAL; + + SKD_ASSERT(n_sg <= skdev->sgs_per_request); + + skreq->n_sg = n_sg; + + for (i = 0; i < n_sg; i++) { + struct fit_sg_descriptor *sgd = &skreq->sksg_list[i]; + u32 cnt = sg_dma_len(&sg[i]); + uint64_t dma_addr = sg_dma_address(&sg[i]); + + sgd->control = FIT_SGD_CONTROL_NOT_LAST; + sgd->byte_count = cnt; + skreq->sg_byte_count += cnt; + sgd->host_side_addr = dma_addr; + sgd->dev_side_addr = 0; + } + + skreq->sksg_list[n_sg - 1].next_desc_ptr = 0LL; + skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST; + + if (unlikely(skdev->dbg_level > 1)) { + VPRINTK(skdev, "skreq=%x sksg_list=%p sksg_dma=%llx\n", + skreq->id, skreq->sksg_list, skreq->sksg_dma_address); + for (i = 0; i < n_sg; i++) { + struct fit_sg_descriptor *sgd = &skreq->sksg_list[i]; + VPRINTK(skdev, " sg[%d] count=%u ctrl=0x%x " + "addr=0x%llx next=0x%llx\n", + i, sgd->byte_count, sgd->control, |