From 859361a228258edf4821d9f5635825033eca78e8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 2 Aug 2012 14:05:59 -0600 Subject: NVMe: Free cmdid on nvme_submit_bio error nvme_map_bio() is called after the cmdid is allocated, so we have to free the cmdid before returning from nvme_submit_bio() if nvme_map_bio() returned an error. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 931769e133e..954a61018dc 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -237,7 +237,8 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, *fn = special_completion; return CMD_CTX_INVALID; } - *fn = info[cmdid].fn; + if (fn) + *fn = info[cmdid].fn; ctx = info[cmdid].ctx; info[cmdid].fn = special_completion; info[cmdid].ctx = CMD_CTX_COMPLETED; @@ -589,7 +590,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs); if (result < 0) - goto free_iod; + goto free_cmdid; length = result; cmnd->rw.command_id = cmdid; @@ -609,6 +610,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; + free_cmdid: + free_cmdid(nvmeq, cmdid, NULL); free_iod: nvme_free_iod(nvmeq->dev, iod); nomem: -- cgit v1.2.3-70-g09d2 From 3295874b6074d749516d6decd43afad7bf6e38ff Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 20 Aug 2012 14:57:49 -0600 Subject: NVMe: End queued bio requests when freeing queue If the queue has bios queued on it when it is freed, bio_endio() must be called for them first. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 954a61018dc..af88635e44e 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -909,6 +909,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) spin_lock_irq(&nvmeq->q_lock); nvme_cancel_ios(nvmeq, false); + while (bio_list_peek(&nvmeq->sq_cong)) { + struct bio *bio = bio_list_pop(&nvmeq->sq_cong); + bio_endio(bio, -EIO); + } spin_unlock_irq(&nvmeq->q_lock); irq_set_affinity_hint(vector, NULL); -- cgit v1.2.3-70-g09d2 From f4f117f64baf8840d22266d518227b2a186d294b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 21 Sep 2012 10:49:05 -0600 Subject: NVMe: Set result from user admin command The ioctl data structure includes space for the 'result' of the admin command to be returned; it just wasn't filled in. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index af88635e44e..47c86045428 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1237,12 +1237,17 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, if (length != cmd.data_len) status = -ENOMEM; else - status = nvme_submit_admin_cmd(dev, &c, NULL); + status = nvme_submit_admin_cmd(dev, &c, &cmd.result); if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); nvme_free_iod(dev, iod); } + + if (!status && copy_to_user(&ucmd->result, &cmd.result, + sizeof(cmd.result))) + status = -EFAULT; + return status; } -- cgit v1.2.3-70-g09d2 From 08df1e05657fc6712e520e7c09cc6c86160ceb35 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 21 Sep 2012 10:52:13 -0600 Subject: NVMe: Add result to nvme_get_features nvme_get_features() was not returning the result. Add a parameter to return the result in (similar to nvme_set_features()) and change all callers. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 47c86045428..c1d5444f0cb 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -838,8 +838,8 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, return nvme_submit_admin_cmd(dev, &c, NULL); } -static int nvme_get_features(struct nvme_dev *dev, unsigned fid, - unsigned nsid, dma_addr_t dma_addr) +static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result) { struct nvme_command c; @@ -849,7 +849,7 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_admin_cmd(dev, &c, result); } static int nvme_set_features(struct nvme_dev *dev, unsigned fid, @@ -1535,7 +1535,7 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) continue; res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i, - dma_addr + 4096); + dma_addr + 4096, NULL); if (res) continue; -- cgit v1.2.3-70-g09d2 From 6ecec74520d8a357726e6c12f99080dbe7b347dd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 26 Sep 2012 12:49:27 -0600 Subject: NVMe: Define SMART log This data structure is defined in the NVMe specification. It's not used by the kernel, but is available for use by userspace software. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 1 + include/linux/nvme.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index c1d5444f0cb..270805cf8d4 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -135,6 +135,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); + BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); } typedef void (*nvme_completion_fn)(struct nvme_dev *, void *, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c25cccaa555..4fa3b0b9b07 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -137,6 +137,34 @@ enum { NVME_LBAF_RP_DEGRADED = 3, }; +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __u8 rsvd192[320]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + struct nvme_lba_range_type { __u8 type; __u8 attributes; -- cgit v1.2.3-70-g09d2 From 2b1960341576bf51c01b12fefeb1cc53820923e7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 Nov 2012 11:59:23 -0700 Subject: NVMe: Initialize iod nents to 0 For commands that do not map a scatter list, we need to initilaize the iod's number of sg entries (nents) to 0 and not unmap in this case. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 270805cf8d4..993c014d195 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -337,6 +337,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) iod->offset = offsetof(struct nvme_iod, sg[nseg]); iod->npages = -1; iod->length = nbytes; + iod->nents = 0; } return iod; @@ -377,7 +378,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; - dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, + if (iod->nents) + dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); nvme_free_iod(dev, iod); if (status) { -- cgit v1.2.3-70-g09d2 From a72d9002f80bffd7e4c7d60e5a9caa0cddffe894 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 28 Feb 2013 10:34:23 +0800 Subject: xen/xen-blkback: preq.dev is used without initialized If call xen_vbd_translate failed, the preq.dev will be not initialized. Use blkif->vbd.pdevice instead (still better to print relative info). Note that before commit 01c681d4c70d64cb72142a2823f27c4146a02e63 (xen/blkback: Don't trust the handle from the frontend.) the value bogus, as it was the guest provided value from req->u.rw.handle rather than the actual device. Signed-off-by: Chen Gang Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index de1f319f7bd..6d1cc3df2ac 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -904,7 +904,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); + preq.sector_number + preq.nr_sects, + blkif->vbd.pdevice); goto fail_response; } -- cgit v1.2.3-70-g09d2 From 0e367ae46503cfe7791460c8ba8434a5d60b2bd5 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 7 Mar 2013 17:32:01 +0000 Subject: xen/blkback: correctly respond to unknown, non-native requests If the frontend is using a non-native protocol (e.g., a 64-bit frontend with a 32-bit backend) and it sent an unrecognized request, the request was not translated and the response would have the incorrect ID. This may cause the frontend driver to behave incorrectly or crash. Since the ID field in the request is always in the same place, regardless of the request type we can get the correct ID and make a valid response (which will report BLKIF_RSP_EOPNOTSUPP). This bug affected 64-bit SLES 11 guests when using a 32-bit backend. This guest does a BLKIF_OP_RESERVED_1 (BLKIF_OP_PACKET in the SLES source) and would crash in blkif_int() as the ID in the response would be invalid. Signed-off-by: David Vrabel Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 31 +++++++++++++++++++++++++++---- drivers/block/xen-blkback/common.h | 25 +++++++++++++++++++++++++ include/xen/interface/io/blkif.h | 10 ++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6d1cc3df2ac..1a0faf6370c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -679,6 +679,16 @@ static int dispatch_discard_io(struct xen_blkif *blkif, return err; } +static int dispatch_other_io(struct xen_blkif *blkif, + struct blkif_request *req, + struct pending_req *pending_req) +{ + free_req(pending_req); + make_response(blkif, req->u.other.id, req->operation, + BLKIF_RSP_EOPNOTSUPP); + return -EIO; +} + static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); @@ -800,17 +810,30 @@ __do_block_io_op(struct xen_blkif *blkif) /* Apply all sanity checks to /private copy/ of request. */ barrier(); - if (unlikely(req.operation == BLKIF_OP_DISCARD)) { + + switch (req.operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + if (dispatch_rw_block_io(blkif, &req, pending_req)) + goto done; + break; + case BLKIF_OP_DISCARD: free_req(pending_req); if (dispatch_discard_io(blkif, &req)) - break; - } else if (dispatch_rw_block_io(blkif, &req, pending_req)) + goto done; + break; + default: + if (dispatch_other_io(blkif, &req, pending_req)) + goto done; break; + } /* Yield point for this unbounded loop. */ cond_resched(); } - +done: return more_to_do; } diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6072390c7f5..195278ae993 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -77,11 +77,18 @@ struct blkif_x86_32_request_discard { uint64_t nr_sectors; } __attribute__((__packed__)); +struct blkif_x86_32_request_other { + uint8_t _pad1; + blkif_vdev_t _pad2; + uint64_t id; /* private guest value, echoed in resp */ +} __attribute__((__packed__)); + struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; + struct blkif_x86_32_request_other other; } u; } __attribute__((__packed__)); @@ -113,11 +120,19 @@ struct blkif_x86_64_request_discard { uint64_t nr_sectors; } __attribute__((__packed__)); +struct blkif_x86_64_request_other { + uint8_t _pad1; + blkif_vdev_t _pad2; + uint32_t _pad3; /* offsetof(blkif_..,u.discard.id)==8 */ + uint64_t id; /* private guest value, echoed in resp */ +} __attribute__((__packed__)); + struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; + struct blkif_x86_64_request_other other; } u; } __attribute__((__packed__)); @@ -278,6 +293,11 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: + /* + * Don't know how to translate this op. Only get the + * ID so failure can be reported to the frontend. + */ + dst->u.other.id = src->u.other.id; break; } } @@ -309,6 +329,11 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: + /* + * Don't know how to translate this op. Only get the + * ID so failure can be reported to the frontend. + */ + dst->u.other.id = src->u.other.id; break; } } diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 01c3d62436e..ffd4652de91 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -138,11 +138,21 @@ struct blkif_request_discard { uint8_t _pad3; } __attribute__((__packed__)); +struct blkif_request_other { + uint8_t _pad1; + blkif_vdev_t _pad2; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad3; /* offsetof(blkif_req..,u.other.id)==8*/ +#endif + uint64_t id; /* private guest value, echoed in resp */ +} __attribute__((__packed__)); + struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; + struct blkif_request_other other; } u; } __attribute__((__packed__)); -- cgit v1.2.3-70-g09d2 From 986cacbd26abe5d498be922cd6632f1ec376c271 Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Mon, 11 Mar 2013 16:15:50 +0000 Subject: xen/blkback: Change statistics counter types to unsigned These values shouldn't be negative, but after an overflow their value can turn into negative, if they are signed. xentop can show bogus values in this case. Signed-off-by: Zoltan Kiss Reported-by: Ichiro Ogino Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 4 ++-- drivers/block/xen-blkback/common.h | 14 +++++++------- drivers/block/xen-blkback/xenbus.c | 14 +++++++------- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 1a0faf6370c..eaccc222a1d 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -381,8 +381,8 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" - " | ds %4d\n", + pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" + " | ds %4llu\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req, blkif->st_ds_req); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 195278ae993..da78346487a 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -223,13 +223,13 @@ struct xen_blkif { /* statistics */ unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_f_req; - int st_ds_req; - int st_rd_sect; - int st_wr_sect; + unsigned long long st_rd_req; + unsigned long long st_wr_req; + unsigned long long st_oo_req; + unsigned long long st_f_req; + unsigned long long st_ds_req; + unsigned long long st_rd_sect; + unsigned long long st_wr_sect; wait_queue_head_t waiting_to_free; }; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 5e237f630c4..8bfd1bcf95e 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -230,13 +230,13 @@ int __init xen_blkif_interface_init(void) } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) -VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); -VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); -VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); -VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); -VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); -VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); -VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); +VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req); +VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req); +VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req); +VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect); static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_oo_req.attr, -- cgit v1.2.3-70-g09d2 From f37912039eb04979f269de0a7dc1a601702df51a Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Mon, 25 Feb 2013 12:27:46 -0600 Subject: block: IBM RamSan 70/80 trivial changes. This patch includes trivial changes that were recommended by different members of the Linux Community. Changes include: o Removing the redundant wmb(). o Formatting o Various other little things. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/config.c | 6 ++---- drivers/block/rsxx/core.c | 4 ++-- drivers/block/rsxx/cregs.c | 13 +++---------- drivers/block/rsxx/dma.c | 12 ------------ drivers/block/rsxx/rsxx.h | 6 ++++-- 5 files changed, 11 insertions(+), 30 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c index a295e7e9ee4..0d8cb18284e 100644 --- a/drivers/block/rsxx/config.c +++ b/drivers/block/rsxx/config.c @@ -29,10 +29,8 @@ #include "rsxx_priv.h" #include "rsxx_cfg.h" -static void initialize_config(void *config) +static void initialize_config(struct rsxx_card_cfg *cfg) { - struct rsxx_card_cfg *cfg = config; - cfg->hdr.version = RSXX_CFG_VERSION; cfg->data.block_size = RSXX_HW_BLK_SIZE; @@ -181,7 +179,7 @@ int rsxx_load_config(struct rsxx_cardinfo *card) } else { dev_info(CARD_TO_DEV(card), "Initializing card configuration.\n"); - initialize_config(card); + initialize_config(&card->config); st = rsxx_save_config(card); if (st) return st; diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index e5162487686..edbae10e7f6 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -161,9 +161,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata) } /*----------------- Card Event Handler -------------------*/ -static char *rsxx_card_state_to_str(unsigned int state) +static const char * const rsxx_card_state_to_str(unsigned int state) { - static char *state_strings[] = { + static const char * const state_strings[] = { "Unknown", "Shutdown", "Starting", "Formatting", "Uninitialized", "Good", "Shutting Down", "Fault", "Read Only Fault", "dStroying" diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 80bbe639fcc..22415643526 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -126,13 +126,6 @@ static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) cmd->buf, cmd->stream); } - /* - * Data copy must complete before initiating the command. This is - * needed for weakly ordered processors (i.e. PowerPC), so that all - * neccessary registers are written before we kick the hardware. - */ - wmb(); - /* Setting the valid bit will kick off the command. */ iowrite32(cmd->op, card->regmap + CREG_CMD); } @@ -399,12 +392,12 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card, return st; /* - * This timeout is neccessary for unresponsive hardware. The additional + * This timeout is necessary for unresponsive hardware. The additional * 20 seconds to used to guarantee that each cregs requests has time to * complete. */ - timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * - card->creg_ctrl.q_depth) + 20000); + timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth + 20000); /* * The creg interface is guaranteed to complete. It has a timeout diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 63176e67662..7c3a57bed2c 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -432,16 +432,6 @@ static void rsxx_issue_dmas(struct work_struct *work) /* Let HW know we've queued commands. */ if (cmds_pending) { - /* - * We must guarantee that the CPU writes to 'ctrl->cmd.buf' - * (which is in PCI-consistent system-memory) from the loop - * above make it into the coherency domain before the - * following PIO "trigger" updating the cmd.idx. A WMB is - * sufficient. We need not explicitly CPU cache-flush since - * the memory is a PCI-consistent (ie; coherent) mapping. - */ - wmb(); - atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); mod_timer(&ctrl->activity_timer, jiffies + DMA_ACTIVITY_TIMEOUT); @@ -798,8 +788,6 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); - wmb(); - return 0; } diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h index 2e50b65902b..24ba3642bd8 100644 --- a/drivers/block/rsxx/rsxx.h +++ b/drivers/block/rsxx/rsxx.h @@ -27,15 +27,17 @@ /*----------------- IOCTL Definitions -------------------*/ +#define RSXX_MAX_DATA 8 + struct rsxx_reg_access { __u32 addr; __u32 cnt; __u32 stat; __u32 stream; - __u32 data[8]; + __u32 data[RSXX_MAX_DATA]; }; -#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) +#define RSXX_MAX_REG_CNT (RSXX_MAX_DATA * (sizeof(__u32))) #define RSXX_IOC_MAGIC 'r' -- cgit v1.2.3-70-g09d2 From 03ac03a8971bd7e9f8c8b20a309b61beaf154d60 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Mon, 25 Feb 2013 12:31:31 -0600 Subject: block: IBM RamSan 70/80 fixes inconsistent locking. This patch includes changes to the cregs locking scheme. Before, inconsistant locking would occur because of misuse of spin_lock, spin_lock_bh, and counter parts. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/cregs.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 22415643526..0539a25877e 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -99,22 +99,6 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card, } } -static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) -{ - struct creg_cmd *cmd; - - /* - * Spin lock is needed because this can be called in atomic/interrupt - * context. - */ - spin_lock_bh(&card->creg_ctrl.lock); - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - spin_unlock_bh(&card->creg_ctrl.lock); - - return cmd; -} - static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) { iowrite32(cmd->addr, card->regmap + CREG_ADD); @@ -189,11 +173,11 @@ static int creg_queue_cmd(struct rsxx_cardinfo *card, cmd->cb_private = cb_private; cmd->status = 0; - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); list_add_tail(&cmd->list, &card->creg_ctrl.queue); card->creg_ctrl.q_depth++; creg_kick_queue(card); - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); return 0; } @@ -203,7 +187,11 @@ static void creg_cmd_timed_out(unsigned long data) struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; struct creg_cmd *cmd; - cmd = pop_active_cmd(card); + spin_lock(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock(&card->creg_ctrl.lock); + if (cmd == NULL) { card->creg_ctrl.creg_stats.creg_timeout++; dev_warn(CARD_TO_DEV(card), @@ -240,7 +228,11 @@ static void creg_cmd_done(struct work_struct *work) if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) card->creg_ctrl.creg_stats.failed_cancel_timer++; - cmd = pop_active_cmd(card); + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + if (cmd == NULL) { dev_err(CARD_TO_DEV(card), "Spurious creg interrupt!\n"); @@ -289,10 +281,10 @@ creg_done: kmem_cache_free(creg_cmd_pool, cmd); - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); card->creg_ctrl.active = 0; creg_kick_queue(card); - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); } static void creg_reset(struct rsxx_cardinfo *card) @@ -317,7 +309,7 @@ static void creg_reset(struct rsxx_cardinfo *card) "Resetting creg interface for recovery\n"); /* Cancel outstanding commands */ - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { list_del(&cmd->list); card->creg_ctrl.q_depth--; @@ -338,7 +330,7 @@ static void creg_reset(struct rsxx_cardinfo *card) card->creg_ctrl.active = 0; } - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); card->creg_ctrl.reset = 0; spin_lock_irqsave(&card->irq_lock, flags); @@ -705,7 +697,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card) int cnt = 0; /* Cancel outstanding commands */ - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { list_del(&cmd->list); if (cmd->cb) @@ -730,7 +722,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card) "Canceled active creg command\n"); kmem_cache_free(creg_cmd_pool, cmd); } - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); cancel_work_sync(&card->creg_ctrl.done_work); } -- cgit v1.2.3-70-g09d2 From 9bb3c4469e317919b0fde8c0e0a3ebe7bd2cf167 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Wed, 27 Feb 2013 09:24:59 -0600 Subject: block: IBM RamSan 70/80 branding changes. This patch includes changing the hardware branding name from IBM RamSan to IBM FlashSystem. v2 Changes include: o Removing the unnecessary IBM Vendor ID #define v1 Changes include: o Changed all references of RamSan to FlashSystem. o Changed the vendor/device IDs for the product. o Changed driver version number. o Updated the MAINTAINERS file. o Various other little things. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- MAINTAINERS | 12 ++++++------ drivers/block/Kconfig | 4 ++-- drivers/block/rsxx/Makefile | 2 +- drivers/block/rsxx/config.c | 2 +- drivers/block/rsxx/core.c | 10 ++++------ drivers/block/rsxx/dma.c | 2 +- drivers/block/rsxx/rsxx_cfg.h | 2 +- drivers/block/rsxx/rsxx_priv.h | 9 +++------ 8 files changed, 19 insertions(+), 24 deletions(-) (limited to 'drivers/block') diff --git a/MAINTAINERS b/MAINTAINERS index 95616582c72..a00f0eaf0ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3242,6 +3242,12 @@ F: Documentation/firmware_class/ F: drivers/base/firmware*.c F: include/linux/firmware.h +FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card) +M: Joshua Morris +M: Philip Kelleher +S: Maintained +F: drivers/block/rsxx/ + FLOPPY DRIVER M: Jiri Kosina T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/floppy.git @@ -6516,12 +6522,6 @@ S: Maintained F: Documentation/blockdev/ramdisk.txt F: drivers/block/brd.c -RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card) -M: Joshua Morris -M: Philip Kelleher -S: Maintained -F: drivers/block/rsxx/ - RANDOM NUMBER DRIVER M: Theodore Ts'o" S: Maintained diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 5dc0daed8fa..b81ddfea1da 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -532,11 +532,11 @@ config BLK_DEV_RBD If unsure, say N. config BLK_DEV_RSXX - tristate "RamSam PCIe Flash SSD Device Driver" + tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver" depends on PCI help Device driver for IBM's high speed PCIe SSD - storage devices: RamSan-70 and RamSan-80. + storage devices: FlashSystem-70 and FlashSystem-80. To compile this driver as a module, choose M here: the module will be called rsxx. diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile index f35cd0b71f7..b1c53c0aa45 100644 --- a/drivers/block/rsxx/Makefile +++ b/drivers/block/rsxx/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o -rsxx-y := config.o core.o cregs.o dev.o dma.o +rsxx-objs := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c index 0d8cb18284e..10cd530d3e1 100644 --- a/drivers/block/rsxx/config.c +++ b/drivers/block/rsxx/config.c @@ -35,7 +35,7 @@ static void initialize_config(struct rsxx_card_cfg *cfg) cfg->data.block_size = RSXX_HW_BLK_SIZE; cfg->data.stripe_size = RSXX_HW_BLK_SIZE; - cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.vendor_id = RSXX_VENDOR_ID_IBM; cfg->data.cache_order = (-1); cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; cfg->data.intr_coal.count = 0; diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index edbae10e7f6..b82ee7baf0e 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -39,8 +39,8 @@ #define NO_LEGACY 0 -MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); -MODULE_AUTHOR("IBM "); +MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver"); +MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); @@ -593,10 +593,8 @@ static void rsxx_pci_shutdown(struct pci_dev *dev) } static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)}, {0,}, }; diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 7c3a57bed2c..efd75b55a67 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -28,7 +28,7 @@ struct rsxx_dma { struct list_head list; u8 cmd; - unsigned int laddr; /* Logical address on the ramsan */ + unsigned int laddr; /* Logical address */ struct { u32 off; u32 cnt; diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h index c025fe5fdb7..f384c943846 100644 --- a/drivers/block/rsxx/rsxx_cfg.h +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -58,7 +58,7 @@ struct rsxx_card_cfg { }; /* Vendor ID Values */ -#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_IBM 0 #define RSXX_VENDOR_ID_DSI 1 #define RSXX_VENDOR_COUNT 2 diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index a1ac907d8f4..f5a95f75bd5 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -45,16 +45,13 @@ struct proc_cmd; -#define PCI_VENDOR_ID_TMS_IBM 0x15B6 -#define PCI_DEVICE_ID_RS70_FLASH 0x0019 -#define PCI_DEVICE_ID_RS70D_FLASH 0x001A -#define PCI_DEVICE_ID_RS80_FLASH 0x001C -#define PCI_DEVICE_ID_RS81_FLASH 0x001E +#define PCI_DEVICE_ID_FS70_FLASH 0x04A9 +#define PCI_DEVICE_ID_FS80_FLASH 0x04AA #define RS70_PCI_REV_SUPPORTED 4 #define DRIVER_NAME "rsxx" -#define DRIVER_VERSION "3.7" +#define DRIVER_VERSION "4.0" /* Block size is 4096 */ #define RSXX_HW_BLK_SHIFT 12 -- cgit v1.2.3-70-g09d2 From 1ebfd109822ea35b71aee4efe9ddc2e1b9ac0ed7 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Mon, 25 Feb 2013 13:09:40 -0600 Subject: block: IBM RamSan 70/80 error message bug fix. This patch includes a simple change to the rsxx_pci_remove function that caused error messages because traffic was halted too early. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index b82ee7baf0e..cbbdff113f4 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -538,9 +538,6 @@ static void rsxx_pci_remove(struct pci_dev *dev) rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); spin_unlock_irqrestore(&card->irq_lock, flags); - /* Prevent work_structs from re-queuing themselves. */ - card->halt = 1; - cancel_work_sync(&card->event_work); rsxx_destroy_dev(card); @@ -549,6 +546,10 @@ static void rsxx_pci_remove(struct pci_dev *dev) spin_lock_irqsave(&card->irq_lock, flags); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + free_irq(dev->irq, card); if (!force_legacy) -- cgit v1.2.3-70-g09d2 From c95246c3a2ac796cfa43e76200ede59cb4a1644f Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Sat, 16 Mar 2013 08:22:25 +0100 Subject: Adding in EEH support to the IBM FlashSystem 70/80 device driver Changes in v2 include: o Fixed spelling of guarantee. o Fixed potential memory leak if slot reset fails out. o Changed list_for_each_entry_safe with list_for_each_entry. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 203 +++++++++++++++++++++++++++++++++++++- drivers/block/rsxx/cregs.c | 59 +++++++++-- drivers/block/rsxx/dma.c | 216 ++++++++++++++++++++++++++++++----------- drivers/block/rsxx/rsxx_priv.h | 25 ++++- 4 files changed, 436 insertions(+), 67 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index cbbdff113f4..93f28191a0f 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida); static DEFINE_SPINLOCK(rsxx_ida_lock); /*----------------- Interrupt Control & Handling -------------------*/ + +static void rsxx_mask_interrupts(struct rsxx_cardinfo *card) +{ + card->isr_mask = 0; + card->ier_mask = 0; +} + static void __enable_intr(unsigned int *mask, unsigned int intr) { *mask |= intr; @@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr) */ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) { - if (unlikely(card->halt)) + if (unlikely(card->halt) || + unlikely(card->eeh_state)) return; __enable_intr(&card->ier_mask, intr); @@ -80,6 +89,9 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) { + if (unlikely(card->eeh_state)) + return; + __disable_intr(&card->ier_mask, intr); iowrite32(card->ier_mask, card->regmap + IER); } @@ -87,7 +99,8 @@ void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, unsigned int intr) { - if (unlikely(card->halt)) + if (unlikely(card->halt) || + unlikely(card->eeh_state)) return; __enable_intr(&card->isr_mask, intr); @@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, unsigned int intr) { + if (unlikely(card->eeh_state)) + return; + __disable_intr(&card->isr_mask, intr); __disable_intr(&card->ier_mask, intr); iowrite32(card->ier_mask, card->regmap + IER); @@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata) do { reread_isr = 0; + if (unlikely(card->eeh_state)) + break; + isr = ioread32(card->regmap + ISR); if (isr == 0xffffffff) { /* @@ -304,6 +323,179 @@ static int card_shutdown(struct rsxx_cardinfo *card) return 0; } +static void rsxx_eeh_frozen(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + int i; + + dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n"); + + card->eeh_state = 1; + rsxx_mask_interrupts(card); + + /* + * We need to guarantee that the write for eeh_state and masking + * interrupts does not become reordered. This will prevent a possible + * race condition with the EEH code. + */ + wmb(); + + pci_disable_device(dev); + + rsxx_eeh_save_issued_dmas(card); + + rsxx_eeh_save_issued_creg(card); + + for (i = 0; i < card->n_targets; i++) { + if (card->ctrl[i].status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + card->ctrl[i].status.buf, + card->ctrl[i].status.dma_addr); + if (card->ctrl[i].cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + card->ctrl[i].cmd.buf, + card->ctrl[i].cmd.dma_addr); + } +} + +static void rsxx_eeh_failure(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + int i; + + dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n"); + + card->eeh_state = 1; + + for (i = 0; i < card->n_targets; i++) + del_timer_sync(&card->ctrl[i].activity_timer); + + rsxx_eeh_cancel_dmas(card); +} + +static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) +{ + unsigned int status; + int iter = 0; + + /* We need to wait for the hardware to reset */ + while (iter++ < 10) { + status = ioread32(card->regmap + PCI_RECONFIG); + + if (status & RSXX_FLUSH_BUSY) { + ssleep(1); + continue; + } + + if (status & RSXX_FLUSH_TIMEOUT) + dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n"); + return 0; + } + + /* Hardware failed resetting itself. */ + return -1; +} + +static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, + enum pci_channel_state error) +{ + if (dev->revision < RSXX_EEH_SUPPORT) + return PCI_ERS_RESULT_NONE; + + if (error == pci_channel_io_perm_failure) { + rsxx_eeh_failure(dev); + return PCI_ERS_RESULT_DISCONNECT; + } + + rsxx_eeh_frozen(dev); + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + int st; + + dev_warn(&dev->dev, + "IBM FlashSystem PCI: recovering from slot reset.\n"); + + st = pci_enable_device(dev); + if (st) + goto failed_hw_setup; + + pci_set_master(dev); + + st = rsxx_eeh_fifo_flush_poll(card); + if (st) + goto failed_hw_setup; + + rsxx_dma_queue_reset(card); + + for (i = 0; i < card->n_targets; i++) { + st = rsxx_hw_buffers_init(dev, &card->ctrl[i]); + if (st) + goto failed_hw_buffers_init; + } + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Clears the ISR register from spurious interrupts */ + st = ioread32(card->regmap + ISR); + + card->eeh_state = 0; + + st = rsxx_eeh_remap_dmas(card); + if (st) + goto failed_remap_dmas; + + spin_lock_irqsave(&card->irq_lock, flags); + if (card->n_targets & RSXX_MAX_TARGETS) + rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G); + else + rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C); + spin_unlock_irqrestore(&card->irq_lock, flags); + + rsxx_kick_creg_queue(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock(&card->ctrl[i].queue_lock); + if (list_empty(&card->ctrl[i].queue)) { + spin_unlock(&card->ctrl[i].queue_lock); + continue; + } + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + + dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n"); + + return PCI_ERS_RESULT_RECOVERED; + +failed_hw_buffers_init: +failed_remap_dmas: + for (i = 0; i < card->n_targets; i++) { + if (card->ctrl[i].status.buf) + pci_free_consistent(card->dev, + STATUS_BUFFER_SIZE8, + card->ctrl[i].status.buf, + card->ctrl[i].status.dma_addr); + if (card->ctrl[i].cmd.buf) + pci_free_consistent(card->dev, + COMMAND_BUFFER_SIZE8, + card->ctrl[i].cmd.buf, + card->ctrl[i].cmd.dma_addr); + } +failed_hw_setup: + rsxx_eeh_failure(dev); + return PCI_ERS_RESULT_DISCONNECT; + +} + /*----------------- Driver Initialization & Setup -------------------*/ /* Returns: 0 if the driver is compatible with the device -1 if the driver is NOT compatible with the device */ @@ -383,6 +575,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, spin_lock_init(&card->irq_lock); card->halt = 0; + card->eeh_state = 0; spin_lock_irq(&card->irq_lock); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); @@ -593,6 +786,11 @@ static void rsxx_pci_shutdown(struct pci_dev *dev) card_shutdown(card); } +static const struct pci_error_handlers rsxx_err_handler = { + .error_detected = rsxx_error_detected, + .slot_reset = rsxx_slot_reset, +}; + static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)}, {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)}, @@ -608,6 +806,7 @@ static struct pci_driver rsxx_pci_driver = { .remove = rsxx_pci_remove, .suspend = rsxx_pci_suspend, .shutdown = rsxx_pci_shutdown, + .err_handler = &rsxx_err_handler, }; static int __init rsxx_core_init(void) diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 0539a25877e..4b5c020a0a6 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -58,7 +58,7 @@ static struct kmem_cache *creg_cmd_pool; #error Unknown endianess!!! Aborting... #endif -static void copy_to_creg_data(struct rsxx_cardinfo *card, +static int copy_to_creg_data(struct rsxx_cardinfo *card, int cnt8, void *buf, unsigned int stream) @@ -66,6 +66,9 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card, int i = 0; u32 *data = buf; + if (unlikely(card->eeh_state)) + return -EIO; + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { /* * Firmware implementation makes it necessary to byte swap on @@ -76,10 +79,12 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card, else iowrite32(data[i], card->regmap + CREG_DATA(i)); } + + return 0; } -static void copy_from_creg_data(struct rsxx_cardinfo *card, +static int copy_from_creg_data(struct rsxx_cardinfo *card, int cnt8, void *buf, unsigned int stream) @@ -87,6 +92,9 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card, int i = 0; u32 *data = buf; + if (unlikely(card->eeh_state)) + return -EIO; + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { /* * Firmware implementation makes it necessary to byte swap on @@ -97,19 +105,32 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card, else data[i] = ioread32(card->regmap + CREG_DATA(i)); } + + return 0; } static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) { + int st; + + if (unlikely(card->eeh_state)) + return; + iowrite32(cmd->addr, card->regmap + CREG_ADD); iowrite32(cmd->cnt8, card->regmap + CREG_CNT); if (cmd->op == CREG_OP_WRITE) { - if (cmd->buf) - copy_to_creg_data(card, cmd->cnt8, - cmd->buf, cmd->stream); + if (cmd->buf) { + st = copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + if (st) + return; + } } + if (unlikely(card->eeh_state)) + return; + /* Setting the valid bit will kick off the command. */ iowrite32(cmd->op, card->regmap + CREG_CMD); } @@ -272,7 +293,7 @@ static void creg_cmd_done(struct work_struct *work) goto creg_done; } - copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); } creg_done: @@ -675,6 +696,32 @@ int rsxx_reg_access(struct rsxx_cardinfo *card, return 0; } +void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + + if (cmd) { + del_timer_sync(&card->creg_ctrl.cmd_timer); + + spin_lock_bh(&card->creg_ctrl.lock); + list_add(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + card->creg_ctrl.active = 0; + spin_unlock_bh(&card->creg_ctrl.lock); + } +} + +void rsxx_kick_creg_queue(struct rsxx_cardinfo *card) +{ + spin_lock_bh(&card->creg_ctrl.lock); + if (!list_empty(&card->creg_ctrl.queue)) + creg_kick_queue(card); + spin_unlock_bh(&card->creg_ctrl.lock); +} + /*------------ Initialization & Setup --------------*/ int rsxx_creg_setup(struct rsxx_cardinfo *card) { diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index efd75b55a67..60d344d002e 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -81,9 +81,6 @@ enum rsxx_hw_status { HW_STATUS_FAULT = 0x08, }; -#define STATUS_BUFFER_SIZE8 4096 -#define COMMAND_BUFFER_SIZE8 4096 - static struct kmem_cache *rsxx_dma_pool; struct dma_tracker { @@ -122,7 +119,7 @@ static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) return tgt; } -static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) { /* Reset all DMA Command/Status Queues */ iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); @@ -210,7 +207,8 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) u32 q_depth = 0; u32 intr_coal; - if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE || + unlikely(card->eeh_state)) return; for (i = 0; i < card->n_targets; i++) @@ -223,31 +221,26 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) } /*----------------- RSXX DMA Handling -------------------*/ -static void rsxx_complete_dma(struct rsxx_cardinfo *card, +static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma, unsigned int status) { if (status & DMA_SW_ERR) - printk_ratelimited(KERN_ERR - "SW Error in DMA(cmd x%02x, laddr x%08x)\n", - dma->cmd, dma->laddr); + ctrl->stats.dma_sw_err++; if (status & DMA_HW_FAULT) - printk_ratelimited(KERN_ERR - "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", - dma->cmd, dma->laddr); + ctrl->stats.dma_hw_fault++; if (status & DMA_CANCELLED) - printk_ratelimited(KERN_ERR - "DMA Cancelled(cmd x%02x, laddr x%08x)\n", - dma->cmd, dma->laddr); + ctrl->stats.dma_cancelled++; if (dma->dma_addr) - pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + pci_unmap_page(ctrl->card->dev, dma->dma_addr, + get_dma_size(dma), dma->cmd == HW_CMD_BLK_WRITE ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (dma->cb) - dma->cb(card, dma->cb_data, status ? 1 : 0); + dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0); kmem_cache_free(rsxx_dma_pool, dma); } @@ -330,14 +323,15 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, if (requeue_cmd) rsxx_requeue_dma(ctrl, dma); else - rsxx_complete_dma(ctrl->card, dma, status); + rsxx_complete_dma(ctrl, dma, status); } static void dma_engine_stalled(unsigned long data) { struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; - if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + if (atomic_read(&ctrl->stats.hw_q_depth) == 0 || + unlikely(ctrl->card->eeh_state)) return; if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { @@ -369,7 +363,8 @@ static void rsxx_issue_dmas(struct work_struct *work) ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); hw_cmd_buf = ctrl->cmd.buf; - if (unlikely(ctrl->card->halt)) + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->eeh_state)) return; while (1) { @@ -397,7 +392,7 @@ static void rsxx_issue_dmas(struct work_struct *work) */ if (unlikely(ctrl->card->dma_fault)) { push_tracker(ctrl->trackers, tag); - rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); continue; } @@ -435,6 +430,12 @@ static void rsxx_issue_dmas(struct work_struct *work) atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); mod_timer(&ctrl->activity_timer, jiffies + DMA_ACTIVITY_TIMEOUT); + + if (unlikely(ctrl->card->eeh_state)) { + del_timer_sync(&ctrl->activity_timer); + return; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); } } @@ -453,7 +454,8 @@ static void rsxx_dma_done(struct work_struct *work) hw_st_buf = ctrl->status.buf; if (unlikely(ctrl->card->halt) || - unlikely(ctrl->card->dma_fault)) + unlikely(ctrl->card->dma_fault) || + unlikely(ctrl->card->eeh_state)) return; count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); @@ -498,7 +500,7 @@ static void rsxx_dma_done(struct work_struct *work) if (status) rsxx_handle_dma_error(ctrl, dma, status); else - rsxx_complete_dma(ctrl->card, dma, 0); + rsxx_complete_dma(ctrl, dma, 0); push_tracker(ctrl->trackers, tag); @@ -717,20 +719,54 @@ bvec_err: /*----------------- DMA Engine Initialization & Setup -------------------*/ +int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) +{ + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + return 0; +} + static int rsxx_dma_ctrl_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) { int i; + int st; memset(&ctrl->stats, 0, sizeof(ctrl->stats)); - ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, - &ctrl->status.dma_addr); - ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, - &ctrl->cmd.dma_addr); - if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) - return -ENOMEM; - ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); if (!ctrl->trackers) return -ENOMEM; @@ -760,33 +796,9 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); - memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); - iowrite32(lower_32_bits(ctrl->status.dma_addr), - ctrl->regmap + SB_ADD_LO); - iowrite32(upper_32_bits(ctrl->status.dma_addr), - ctrl->regmap + SB_ADD_HI); - - memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); - iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); - iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); - - ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); - if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { - dev_crit(&dev->dev, "Failed reading status cnt x%x\n", - ctrl->status.idx); - return -EINVAL; - } - iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); - iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); - - ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); - if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { - dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", - ctrl->status.idx); - return -EINVAL; - } - iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); - iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + st = rsxx_hw_buffers_init(dev, ctrl); + if (st) + return st; return 0; } @@ -822,7 +834,7 @@ static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, return 0; } -static int rsxx_dma_configure(struct rsxx_cardinfo *card) +int rsxx_dma_configure(struct rsxx_cardinfo *card) { u32 intr_coal; @@ -968,6 +980,94 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) } } +void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) +{ + int i; + int j; + int cnt; + struct rsxx_dma *dma; + struct list_head issued_dmas[card->n_targets]; + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&issued_dmas[i]); + cnt = 0; + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(card->ctrl[i].trackers, j); + if (dma == NULL) + continue; + + if (dma->cmd == HW_CMD_BLK_WRITE) + card->ctrl[i].stats.writes_issued--; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + card->ctrl[i].stats.discards_issued--; + else + card->ctrl[i].stats.reads_issued--; + + list_add_tail(&dma->list, &issued_dmas[i]); + push_tracker(card->ctrl[i].trackers, j); + cnt++; + } + + spin_lock(&card->ctrl[i].queue_lock); + list_splice(&issued_dmas[i], &card->ctrl[i].queue); + + atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); + card->ctrl[i].stats.sw_q_depth += cnt; + card->ctrl[i].e_cnt = 0; + + list_for_each_entry(dma, &card->ctrl[i].queue, list) { + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + } + spin_unlock(&card->ctrl[i].queue_lock); + } +} + +void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int i; + + for (i = 0; i < card->n_targets; i++) { + spin_lock(&card->ctrl[i].queue_lock); + list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) { + list_del(&dma->list); + + rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED); + } + spin_unlock(&card->ctrl[i].queue_lock); + } +} + +int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int i; + + for (i = 0; i < card->n_targets; i++) { + spin_lock(&card->ctrl[i].queue_lock); + list_for_each_entry(dma, &card->ctrl[i].queue, list) { + dma->dma_addr = pci_map_page(card->dev, dma->page, + dma->pg_off, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + } + spin_unlock(&card->ctrl[i].queue_lock); + } + + return 0; +} int rsxx_dma_init(void) { diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index f5a95f75bd5..8a7ac87f1dc 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -64,6 +64,9 @@ struct proc_cmd; #define RSXX_MAX_OUTSTANDING_CMDS 255 #define RSXX_CS_IDX_MASK 0xff +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + #define RSXX_MAX_TARGETS 8 struct dma_tracker_list; @@ -88,6 +91,9 @@ struct rsxx_dma_stats { u32 discards_failed; u32 done_rescheduled; u32 issue_rescheduled; + u32 dma_sw_err; + u32 dma_hw_fault; + u32 dma_cancelled; u32 sw_q_depth; /* Number of DMAs on the SW queue. */ atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ }; @@ -113,6 +119,7 @@ struct rsxx_dma_ctrl { struct rsxx_cardinfo { struct pci_dev *dev; unsigned int halt; + unsigned int eeh_state; void __iomem *regmap; spinlock_t irq_lock; @@ -221,6 +228,7 @@ enum rsxx_pci_regmap { PERF_RD512_HI = 0xac, PERF_WR512_LO = 0xb0, PERF_WR512_HI = 0xb4, + PCI_RECONFIG = 0xb8, }; enum rsxx_intr { @@ -234,6 +242,8 @@ enum rsxx_intr { CR_INTR_DMA5 = 0x00000080, CR_INTR_DMA6 = 0x00000100, CR_INTR_DMA7 = 0x00000200, + CR_INTR_ALL_C = 0x0000003f, + CR_INTR_ALL_G = 0x000003ff, CR_INTR_DMA_ALL = 0x000003f5, CR_INTR_ALL = 0xffffffff, }; @@ -250,8 +260,14 @@ enum rsxx_pci_reset { DMA_QUEUE_RESET = 0x00000001, }; +enum rsxx_hw_fifo_flush { + RSXX_FLUSH_BUSY = 0x00000002, + RSXX_FLUSH_TIMEOUT = 0x00000004, +}; + enum rsxx_pci_revision { RSXX_DISCARD_SUPPORT = 2, + RSXX_EEH_SUPPORT = 3, }; enum rsxx_creg_cmd { @@ -357,11 +373,17 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card); void rsxx_dma_destroy(struct rsxx_cardinfo *card); int rsxx_dma_init(void); void rsxx_dma_cleanup(void); +void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); +int rsxx_dma_configure(struct rsxx_cardinfo *card); int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, struct bio *bio, atomic_t *n_dmas, rsxx_dma_cb cb, void *cb_data); +int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); +void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); +void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card); +int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card); /***** cregs.c *****/ int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, @@ -386,10 +408,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card); void rsxx_creg_destroy(struct rsxx_cardinfo *card); int rsxx_creg_init(void); void rsxx_creg_cleanup(void); - int rsxx_reg_access(struct rsxx_cardinfo *card, struct rsxx_reg_access __user *ucmd, int read); +void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card); +void rsxx_kick_creg_queue(struct rsxx_cardinfo *card); -- cgit v1.2.3-70-g09d2 From 351a2c6e7d265f97799ec7f6b1dde7fc7cb4b92d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Mar 2013 10:10:48 +0100 Subject: rsxx: fix missing unlock on error return in rsxx_eeh_remap_dmas() Spotted by Fenguan Wu's super build robot. Signed-off-by: Jens Axboe --- drivers/block/rsxx/dma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 60d344d002e..d523e9c5657 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -1059,6 +1059,7 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (!dma->dma_addr) { + spin_unlock(&card->ctrl[i].queue_lock); kmem_cache_free(rsxx_dma_pool, dma); return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 0e5e098ac22dae38f957e951b70d3cf73beff0f7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 11 Mar 2013 09:39:55 +0000 Subject: xen-blkback: fix dispatch_rw_block_io() error path Commit 7708992 ("xen/blkback: Seperate the bio allocation and the bio submission") consolidated the pendcnt updates to just a single write, neglecting the fact that the error path relied on it getting set to 1 up front (such that the decrement in __end_block_io_op() would actually drop the count to zero, triggering the necessary cleanup actions). Also remove a misleading and a stale (after said commit) comment. CC: stable@vger.kernel.org Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index eaccc222a1d..477a17c2082 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1001,13 +1001,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, bio->bi_end_io = end_block_io_op; } - /* - * We set it one so that the last submit_bio does not have to call - * atomic_inc. - */ atomic_set(&pending_req->pendcnt, nbio); - - /* Get a reference count for the disk queue and start sending I/O */ blk_start_plug(&plug); for (i = 0; i < nbio; i++) @@ -1035,6 +1029,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, fail_put_bio: for (i = 0; i < nbio; i++) bio_put(biolist[i]); + atomic_set(&pending_req->pendcnt, 1); __end_block_io_op(pending_req, -EINVAL); msleep(1); /* back off a bit */ return -EIO; -- cgit v1.2.3-70-g09d2 From 29d0b218c87ace1078e08bb32c2e72fc96fa3db3 Mon Sep 17 00:00:00 2001 From: Mihnea Dobrescu-Balaur Date: Mon, 11 Mar 2013 13:23:36 +0200 Subject: xen-blkfront: replace kmalloc and then memcpy with kmemdup The benefits are: * code is cleaner * kmemdup adds additional debugging info useful for tracking the real place where memory was allocated (CONFIG_DEBUG_SLAB). Signed-off-by: Mihnea Dobrescu-Balaur Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index c3dae2e0f29..962064487ef 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1203,11 +1203,10 @@ static int blkif_recover(struct blkfront_info *info) int j; /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmalloc(sizeof(info->shadow), + copy = kmemdup(info->shadow, sizeof(info->shadow), GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); if (!copy) return -ENOMEM; - memcpy(copy, info->shadow, sizeof(info->shadow)); /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); -- cgit v1.2.3-70-g09d2 From 217fd5e709f029c125a9d39de5f13387407f131a Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 18 Mar 2013 17:49:33 +0100 Subject: xen-blkback: fix foreach_grant_safe to handle empty lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We may use foreach_grant_safe in the future with empty lists, so make sure we can handle them. Signed-off-by: Roger Pau Monné Cc: xen-devel@lists.xen.org Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 477a17c2082..2cf8381a1c6 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -164,7 +164,7 @@ static void make_response(struct xen_blkif *blkif, u64 id, #define foreach_grant_safe(pos, n, rbtree, node) \ for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \ - (n) = rb_next(&(pos)->node); \ + (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \ &(pos)->node != NULL; \ (pos) = container_of(n, typeof(*(pos)), node), \ (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) -- cgit v1.2.3-70-g09d2 From 155b7edb51430a280f86c1e21b7be308b0d219d4 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 18 Mar 2013 17:49:34 +0100 Subject: xen-blkfront: switch from llist to list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The git commit f84adf4921ae3115502f44ff467b04bf2f88cf04 (xen-blkfront: drop the use of llist_for_each_entry_safe) was a stop-gate to fix a GCC4.1 bug. The appropiate way is to actually use an list instead of using an llist. As such this patch replaces the usage of llist with an list. Since we always manipulate the list while holding the io_lock, there's no need for additional locking (llist used previously is safe to use concurrently without additional locking). Signed-off-by: Roger Pau Monné CC: stable@vger.kernel.org [v1: Redid the git commit description] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 962064487ef..97324cd18f4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include @@ -68,7 +68,7 @@ enum blkif_state { struct grant { grant_ref_t gref; unsigned long pfn; - struct llist_node node; + struct list_head node; }; struct blk_shadow { @@ -105,7 +105,7 @@ struct blkfront_info struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; - struct llist_head persistent_gnts; + struct list_head persistent_gnts; unsigned int persistent_gnts_c; unsigned long shadow_free; unsigned int feature_flush; @@ -371,10 +371,11 @@ static int blkif_queue_request(struct request *req) lsect = fsect + (sg->length >> 9) - 1; if (info->persistent_gnts_c) { - BUG_ON(llist_empty(&info->persistent_gnts)); - gnt_list_entry = llist_entry( - llist_del_first(&info->persistent_gnts), - struct grant, node); + BUG_ON(list_empty(&info->persistent_gnts)); + gnt_list_entry = list_first_entry( + &info->persistent_gnts, + struct grant, node); + list_del(&gnt_list_entry->node); ref = gnt_list_entry->gref; buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); @@ -790,9 +791,8 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { - struct llist_node *all_gnts; - struct grant *persistent_gnt, *tmp; - struct llist_node *n; + struct grant *persistent_gnt; + struct grant *n; /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); @@ -804,20 +804,15 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { - all_gnts = llist_del_all(&info->persistent_gnts); - persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); - while (persistent_gnt) { + list_for_each_entry_safe(persistent_gnt, n, + &info->persistent_gnts, node) { + list_del(&persistent_gnt->node); gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - tmp = persistent_gnt; - n = persistent_gnt->node.next; - if (n) - persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); - else - persistent_gnt = NULL; - kfree(tmp); + kfree(persistent_gnt); + info->persistent_gnts_c--; } - info->persistent_gnts_c = 0; + BUG_ON(info->persistent_gnts_c != 0); } /* No more gnttab callback work. */ @@ -875,7 +870,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } /* Add the persistent grant into the list of free grants */ for (i = 0; i < s->req.u.rw.nr_segments; i++) { - llist_add(&s->grants_used[i]->node, &info->persistent_gnts); + list_add(&s->grants_used[i]->node, &info->persistent_gnts); info->persistent_gnts_c++; } } @@ -1171,7 +1166,7 @@ static int blkfront_probe(struct xenbus_device *dev, spin_lock_init(&info->io_lock); info->xbdev = dev; info->vdevice = vdevice; - init_llist_head(&info->persistent_gnts); + INIT_LIST_HEAD(&info->persistent_gnts); info->persistent_gnts_c = 0; info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); -- cgit v1.2.3-70-g09d2 From ffb1dabd1eb10c76a1e7af62f75a1aaa8d590b5a Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 18 Mar 2013 17:49:32 +0100 Subject: xen-blkback: don't store dev_bus_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_bus_addr returned in the grant ref map operation is the mfn of the passed page, there's no need to store it in the persistent grant entry, since we can always get it provided that we have the page. This reduces the memory overhead of persistent grants in blkback. While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as it makes much more sense - as we use that value in bio_add_page which as the fourth argument expects the offset. We hadn't used the physical address as part of this at all. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org [v1: s/buf/offset/] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 21 ++++++--------------- drivers/block/xen-blkback/common.h | 1 - 2 files changed, 6 insertions(+), 16 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 2cf8381a1c6..dd5b2fed97e 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg) } struct seg_buf { - unsigned long buf; + unsigned int offset; unsigned int nsec; }; /* @@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req, * If this is a new persistent grant * save the handler */ - persistent_gnts[i]->handle = map[j].handle; - persistent_gnts[i]->dev_bus_addr = - map[j++].dev_bus_addr; + persistent_gnts[i]->handle = map[j++].handle; } pending_handle(pending_req, i) = persistent_gnts[i]->handle; if (ret) continue; - - seg[i].buf = persistent_gnts[i]->dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); } else { - pending_handle(pending_req, i) = map[j].handle; + pending_handle(pending_req, i) = map[j++].handle; bitmap_set(pending_req->unmap_seg, i, 1); - if (ret) { - j++; + if (ret) continue; - } - - seg[i].buf = map[j++].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); } + seg[i].offset = (req->u.rw.seg[i].first_sect << 9); } return ret; } @@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, (bio_add_page(bio, pages[i], seg[i].nsec << 9, - seg[i].buf & ~PAGE_MASK) == 0)) { + seg[i].offset) == 0)) { bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index da78346487a..60103e2517b 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -187,7 +187,6 @@ struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; - uint64_t dev_bus_addr; struct rb_node node; }; -- cgit v1.2.3-70-g09d2 From 9c1e050caeb4d1250f8ceef1180a8b3d0db6c624 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 18 Mar 2013 17:49:35 +0100 Subject: xen-blkfront: pre-allocate pages for requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents us from having to call alloc_page while we are preparing the request. Since blkfront was calling alloc_page with a spinlock held we used GFP_ATOMIC, which can fail if we are requesting a lot of pages since it is using the emergency memory pools. Allocating all the pages at init prevents us from having to call alloc_page, thus preventing possible failures. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 120 ++++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 41 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 97324cd18f4..c6404332339 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -165,6 +165,69 @@ static int add_id_to_freelist(struct blkfront_info *info, return 0; } +static int fill_grant_buffer(struct blkfront_info *info, int num) +{ + struct page *granted_page; + struct grant *gnt_list_entry, *n; + int i = 0; + + while(i < num) { + gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO); + if (!gnt_list_entry) + goto out_of_memory; + + granted_page = alloc_page(GFP_NOIO); + if (!granted_page) { + kfree(gnt_list_entry); + goto out_of_memory; + } + + gnt_list_entry->pfn = page_to_pfn(granted_page); + gnt_list_entry->gref = GRANT_INVALID_REF; + list_add(&gnt_list_entry->node, &info->persistent_gnts); + i++; + } + + return 0; + +out_of_memory: + list_for_each_entry_safe(gnt_list_entry, n, + &info->persistent_gnts, node) { + list_del(&gnt_list_entry->node); + __free_page(pfn_to_page(gnt_list_entry->pfn)); + kfree(gnt_list_entry); + i--; + } + BUG_ON(i != 0); + return -ENOMEM; +} + +static struct grant *get_grant(grant_ref_t *gref_head, + struct blkfront_info *info) +{ + struct grant *gnt_list_entry; + unsigned long buffer_mfn; + + BUG_ON(list_empty(&info->persistent_gnts)); + gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant, + node); + list_del(&gnt_list_entry->node); + + if (gnt_list_entry->gref != GRANT_INVALID_REF) { + info->persistent_gnts_c--; + return gnt_list_entry; + } + + /* Assign a gref to this page */ + gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); + BUG_ON(gnt_list_entry->gref == -ENOSPC); + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); + gnttab_grant_foreign_access_ref(gnt_list_entry->gref, + info->xbdev->otherend_id, + buffer_mfn, 0); + return gnt_list_entry; +} + static const char *op_name(int op) { static const char *const names[] = { @@ -306,7 +369,6 @@ static int blkif_queue_request(struct request *req) */ bool new_persistent_gnts; grant_ref_t gref_head; - struct page *granted_page; struct grant *gnt_list_entry = NULL; struct scatterlist *sg; @@ -370,42 +432,9 @@ static int blkif_queue_request(struct request *req) fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; - if (info->persistent_gnts_c) { - BUG_ON(list_empty(&info->persistent_gnts)); - gnt_list_entry = list_first_entry( - &info->persistent_gnts, - struct grant, node); - list_del(&gnt_list_entry->node); - - ref = gnt_list_entry->gref; - buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); - info->persistent_gnts_c--; - } else { - ref = gnttab_claim_grant_reference(&gref_head); - BUG_ON(ref == -ENOSPC); - - gnt_list_entry = - kmalloc(sizeof(struct grant), - GFP_ATOMIC); - if (!gnt_list_entry) - return -ENOMEM; - - granted_page = alloc_page(GFP_ATOMIC); - if (!granted_page) { - kfree(gnt_list_entry); - return -ENOMEM; - } - - gnt_list_entry->pfn = - page_to_pfn(granted_page); - gnt_list_entry->gref = ref; - - buffer_mfn = pfn_to_mfn(page_to_pfn( - granted_page)); - gnttab_grant_foreign_access_ref(ref, - info->xbdev->otherend_id, - buffer_mfn, 0); - } + gnt_list_entry = get_grant(&gref_head, info); + ref = gnt_list_entry->gref; + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); info->shadow[id].grants_used[i] = gnt_list_entry; @@ -803,17 +832,20 @@ static void blkif_free(struct blkfront_info *info, int suspend) blk_stop_queue(info->rq); /* Remove all persistent grants */ - if (info->persistent_gnts_c) { + if (!list_empty(&info->persistent_gnts)) { list_for_each_entry_safe(persistent_gnt, n, &info->persistent_gnts, node) { list_del(&persistent_gnt->node); - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + if (persistent_gnt->gref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(persistent_gnt->gref, + 0, 0UL); + info->persistent_gnts_c--; + } __free_page(pfn_to_page(persistent_gnt->pfn)); kfree(persistent_gnt); - info->persistent_gnts_c--; } - BUG_ON(info->persistent_gnts_c != 0); } + BUG_ON(info->persistent_gnts_c != 0); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); @@ -1008,6 +1040,12 @@ static int setup_blkring(struct xenbus_device *dev, sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + /* Allocate memory for grants */ + err = fill_grant_buffer(info, BLK_RING_SIZE * + BLKIF_MAX_SEGMENTS_PER_REQUEST); + if (err) + goto fail; + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); -- cgit v1.2.3-70-g09d2 From b1173e316bf2ff3c11f46247417f0f5789a4ea0c Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 18 Mar 2013 17:49:36 +0100 Subject: xen-blkfront: remove frame list from blk_shadow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have the frame (pfn of the grant page) stored inside struct grant, so there's no need to keep an aditional list of mapped frames for a specific request. This reduces memory usage in blkfront. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xen.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index c6404332339..a894f88762d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -74,7 +74,6 @@ struct grant { struct blk_shadow { struct blkif_request req; struct request *request; - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; @@ -356,7 +355,6 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, static int blkif_queue_request(struct request *req) { struct blkfront_info *info = req->rq_disk->private_data; - unsigned long buffer_mfn; struct blkif_request *ring_req; unsigned long id; unsigned int fsect, lsect; @@ -434,7 +432,6 @@ static int blkif_queue_request(struct request *req) gnt_list_entry = get_grant(&gref_head, info); ref = gnt_list_entry->gref; - buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); info->shadow[id].grants_used[i] = gnt_list_entry; @@ -465,7 +462,6 @@ static int blkif_queue_request(struct request *req) kunmap_atomic(shared_data); } - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); ring_req->u.rw.seg[i] = (struct blkif_request_segment) { .gref = ref, @@ -1268,7 +1264,7 @@ static int blkif_recover(struct blkfront_info *info) gnttab_grant_foreign_access_ref( req->u.rw.seg[j].gref, info->xbdev->otherend_id, - pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), + pfn_to_mfn(copy[i].grants_used[j]->pfn), 0); } info->shadow[req->u.rw.id].req = *req; -- cgit v1.2.3-70-g09d2 From d137c8306c748d89260400176613b5a85574b255 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Mar 2013 08:58:23 -0600 Subject: mtip32xx: fix error return code in mtip_pci_probe() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 11cc9522cdd..92250af84e7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4224,6 +4224,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, dd->isr_workq = create_workqueue(dd->workq_name); if (!dd->isr_workq) { dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + rv = -ENOMEM; goto block_initialize_err; } @@ -4282,7 +4283,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); pci_set_master(pdev); - if (pci_enable_msi(pdev)) { + rv = pci_enable_msi(pdev); + if (rv) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; -- cgit v1.2.3-70-g09d2 From 183cfb5720dfc393641b87710ce78561af3db6cd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Mar 2013 08:59:19 -0600 Subject: loop: fix error return code in loop_add() Fix to return a negative error code from the error handling case, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Jens Axboe --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 747bb2af69d..ee13a82f3f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1623,6 +1623,7 @@ static int loop_add(struct loop_device **l, int i) goto out_free_dev; i = err; + err = -ENOMEM; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) goto out_free_dev; -- cgit v1.2.3-70-g09d2 From 8761a3dc1f07b163414e2215a2cadbb4cfe2a107 Mon Sep 17 00:00:00 2001 From: Phillip Susi Date: Fri, 22 Mar 2013 12:21:53 -0600 Subject: loop: cleanup partitions when detaching loop device Any partitions added by user space to the loop device were being left in place after detaching the loop device. This was because the detach path issued a BLKRRPART to clean up partitions if LO_FLAGS_PARTSCAN was set, meaning that the partitions were auto scanned on attach. Replace this BLKRRPART with code that unconditionally cleans up partitions on detach instead. Signed-off-by: Phillip Susi Modified by Jens to export delete_partition(). Signed-off-by: Jens Axboe --- block/partition-generic.c | 1 + drivers/block/loop.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/block/partition-generic.c b/block/partition-generic.c index 789cdea0589..ae95ee6a58a 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -257,6 +257,7 @@ void delete_partition(struct gendisk *disk, int partno) hd_struct_put(part); } +EXPORT_SYMBOL(delete_partition); static ssize_t whole_disk_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ee13a82f3f5..fe5f6403417 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1044,12 +1044,29 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); - if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - ioctl_by_bdev(bdev, BLKRRPART, 0); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; mutex_unlock(&lo->lo_ctl_mutex); + + /* + * Remove all partitions, since BLKRRPART won't remove user + * added partitions when max_part=0 + */ + if (bdev) { + struct disk_part_iter piter; + struct hd_struct *part; + + mutex_lock_nested(&bdev->bd_mutex, 1); + invalidate_partition(bdev->bd_disk, 0); + disk_part_iter_init(&piter, bdev->bd_disk, + DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + delete_partition(bdev->bd_disk, part->partno); + disk_part_iter_exit(&piter); + mutex_unlock(&bdev->bd_mutex); + } + /* * Need not hold lo_ctl_mutex to fput backing file. * Calling fput holding lo_ctl_mutex triggers a circular -- cgit v1.2.3-70-g09d2 From d2b805d89510737ea80c1469f854a16480d19778 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Mar 2013 09:11:00 -0600 Subject: cciss: fix invalid use of sizeof in cciss_find_cfgtables() sizeof() when applied to a pointer typed expression gives the size of the pointer, not that of the pointed data. Signed-off-by: Wei Yongjun Acked-by: scameron@beardog.cce.hp.com Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ade58bc8f3c..1c1b8e544aa 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4206,7 +4206,7 @@ static int cciss_find_cfgtables(ctlr_info_t *h) if (rc) return rc; h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev, - cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable)); + cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable)); if (!h->cfgtable) return -ENOMEM; rc = write_driver_ver_to_cfgtable(h->cfgtable); -- cgit v1.2.3-70-g09d2 From 122090366d1d5c6ec1bfb6dfdb3a6d121ff074aa Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 31 Jan 2013 14:40:38 -0700 Subject: NVMe: Add namespaces with no LBA range feature The LBA Range Type feature is optional in the NVMe specification, so we should continue with adding namespaces for controllers that do not implement this feature. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 993c014d195..e209ec5930c 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -1540,7 +1540,7 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i, dma_addr + 4096, NULL); if (res) - continue; + memset(mem + 4096, 0, 4096); ns = nvme_alloc_ns(dev, i, mem, mem + 4096); if (ns) -- cgit v1.2.3-70-g09d2 From d8d595dfce7925627de78b9eecc8598a6ffda610 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Mon, 25 Mar 2013 19:22:31 -0600 Subject: block: removes dynamic allocation on stack This patch removes dynamic allocation on the stack error. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index d523e9c5657..95047e111a3 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -986,7 +986,10 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) int j; int cnt; struct rsxx_dma *dma; - struct list_head issued_dmas[card->n_targets]; + struct list_head *issued_dmas; + + issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets, + GFP_KERNEL); for (i = 0; i < card->n_targets; i++) { INIT_LIST_HEAD(&issued_dmas[i]); @@ -1025,6 +1028,8 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) } spin_unlock(&card->ctrl[i].queue_lock); } + + kfree(issued_dmas); } void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) -- cgit v1.2.3-70-g09d2 From 4dcaf47258d59010802bd0eda933f69ee7d98cc7 Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 26 Mar 2013 11:03:07 -0500 Subject: rsxx: enable error return of rsxx_eeh_save_issued_dmas() Commit d8d595df introduced a bug where we did not check for a NULL return from kmalloc(). Make rsxx_eeh_save_issued_dmas() return an error for that case, and make the callers handle that. Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 19 ++++++++++++++++--- drivers/block/rsxx/dma.c | 6 +++++- drivers/block/rsxx/rsxx_priv.h | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 93f28191a0f..5af21f2db29 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -323,10 +323,11 @@ static int card_shutdown(struct rsxx_cardinfo *card) return 0; } -static void rsxx_eeh_frozen(struct pci_dev *dev) +static int rsxx_eeh_frozen(struct pci_dev *dev) { struct rsxx_cardinfo *card = pci_get_drvdata(dev); int i; + int st; dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n"); @@ -342,7 +343,9 @@ static void rsxx_eeh_frozen(struct pci_dev *dev) pci_disable_device(dev); - rsxx_eeh_save_issued_dmas(card); + st = rsxx_eeh_save_issued_dmas(card); + if (st) + return st; rsxx_eeh_save_issued_creg(card); @@ -356,6 +359,8 @@ static void rsxx_eeh_frozen(struct pci_dev *dev) card->ctrl[i].cmd.buf, card->ctrl[i].cmd.dma_addr); } + + return 0; } static void rsxx_eeh_failure(struct pci_dev *dev) @@ -399,6 +404,8 @@ static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, enum pci_channel_state error) { + int st; + if (dev->revision < RSXX_EEH_SUPPORT) return PCI_ERS_RESULT_NONE; @@ -407,7 +414,13 @@ static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, return PCI_ERS_RESULT_DISCONNECT; } - rsxx_eeh_frozen(dev); + st = rsxx_eeh_frozen(dev); + if (st) { + dev_err(&dev->dev, "Slot reset setup failed\n"); + rsxx_eeh_failure(dev); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 95047e111a3..7594c6ddc18 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -980,7 +980,7 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) } } -void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) +int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) { int i; int j; @@ -990,6 +990,8 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets, GFP_KERNEL); + if (!issued_dmas) + return -ENOMEM; for (i = 0; i < card->n_targets; i++) { INIT_LIST_HEAD(&issued_dmas[i]); @@ -1030,6 +1032,8 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) } kfree(issued_dmas); + + return 0; } void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 8a7ac87f1dc..382e8bf5c03 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -381,7 +381,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, rsxx_dma_cb cb, void *cb_data); int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); -void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); +int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card); int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card); -- cgit v1.2.3-70-g09d2 From 80b00df291684850b5659ec95fb1fd2acbd2c0ec Mon Sep 17 00:00:00 2001 From: Philip J Kelleher Date: Tue, 26 Mar 2013 11:06:35 -0500 Subject: rsxx: remove unused variable Signed-off-by: Philip J Kelleher Signed-off-by: Jens Axboe --- drivers/block/rsxx/dma.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 7594c6ddc18..0607513cfb4 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -1056,7 +1056,6 @@ void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) { struct rsxx_dma *dma; - struct rsxx_dma *tmp; int i; for (i = 0; i < card->n_targets; i++) { -- cgit v1.2.3-70-g09d2 From c613c5f686b5493290aeb6a3c4b3b2371a8582cf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 28 Mar 2013 09:43:43 -0600 Subject: mg_disk: fix error return code in mg_probe() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Reviewed-by: Jingoo Han Signed-off-by: Jens Axboe --- drivers/block/mg_disk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 1788f491e0f..076ae7f1b78 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -890,8 +890,10 @@ static int mg_probe(struct platform_device *plat_dev) gpio_direction_output(host->rst, 1); /* reset out pin */ - if (!(prv_data->dev_attr & MG_DEV_MASK)) + if (!(prv_data->dev_attr & MG_DEV_MASK)) { + err = -EINVAL; goto probe_err_3a; + } if (prv_data->dev_attr != MG_BOOT_DEV) { rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, -- cgit v1.2.3-70-g09d2 From 91c5746425aed8f7188a351f1224a26aa232e4b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Mar 2013 18:28:41 +0000 Subject: aoe: reserve enough headroom on skbs Some network drivers use a non default hard_header_len Transmitted skb should take into account dev->hard_header_len, or risk crashes or expensive reallocations. In the case of aoe, lets reserve MAX_HEADER bytes. David reported a crash in defxx driver, solved by this patch. Reported-by: David Oostdyk Tested-by: David Oostdyk Signed-off-by: Eric Dumazet Cc: Ed Cashin Signed-off-by: David S. Miller --- drivers/block/aoe/aoecmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 25ef5c014fc..92b6d7c51e3 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -51,8 +51,9 @@ new_skb(ulong len) { struct sk_buff *skb; - skb = alloc_skb(len, GFP_ATOMIC); + skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC); if (skb) { + skb_reserve(skb, MAX_HEADER); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb->protocol = __constant_htons(ETH_P_AOE); -- cgit v1.2.3-70-g09d2 From 6e2a4505dba0cae8faa701426185dfb7b49f537c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 27 Mar 2013 09:16:30 -0500 Subject: rbd: don't zero-fill non-image object requests A result of ENOENT from a read request for an object that's part of an rbd image indicates that there is a hole in that portion of the image. Similarly, a short read for such an object indicates that the remainder of the read should be interpreted a full read with zeros filling out the end of the request. This behavior is not correct for objects that are not backing rbd image data. Currently rbd_img_obj_request_callback() assumes it should be done for all objects. Change rbd_img_obj_request_callback() so it only does this zeroing for image objects. Encapsulate that special handling in its own function. Add an assertion that the image object request is a bio request, since we assume that (and we currently don't support any other types). This resolves a problem identified here: http://tracker.ceph.com/issues/4559 The regression was introduced by bf0d5f503dc11d6314c0503591d258d60ee9c944. Reported-by: Dan van der Ster Signed-off-by: Alex Elder Reviewed-off-by: Sage Weil --- drivers/block/rbd.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c81a4c040b..f556f8a8b3f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1264,6 +1264,32 @@ static bool obj_request_done_test(struct rbd_obj_request *obj_request) return atomic_read(&obj_request->done) != 0; } +static void +rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, + obj_request, obj_request->img_request, obj_request->result, + obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the image. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + BUG_ON(obj_request->type != OBJ_REQUEST_BIO); + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; + } + obj_request_done_set(obj_request); +} + static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) { dout("%s: obj %p cb %p\n", __func__, obj_request, @@ -1284,23 +1310,10 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) { dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, obj_request->result, obj_request->xferred, obj_request->length); - /* - * ENOENT means a hole in the object. We zero-fill the - * entire length of the request. A short read also implies - * zero-fill to the end of the request. Either way we - * update the xferred count to indicate the whole request - * was satisfied. - */ - if (obj_request->result == -ENOENT) { - zero_bio_chain(obj_request->bio_list, 0); - obj_request->result = 0; - obj_request->xferred = obj_request->length; - } else if (obj_request->xferred < obj_request->length && - !obj_request->result) { - zero_bio_chain(obj_request->bio_list, obj_request->xferred); - obj_request->xferred = obj_request->length; - } - obj_request_done_set(obj_request); + if (obj_request->img_request) + rbd_img_obj_request_read_callback(obj_request); + else + obj_request_done_set(obj_request); } static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) -- cgit v1.2.3-70-g09d2 From c1681bf8a7b1b98edee8b862a42c19c4e53205fd Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Mon, 1 Apr 2013 09:47:56 -0700 Subject: loop: prevent bdev freeing while device in use struct block_device lifecycle is defined by its inode (see fs/block_dev.c) - block_device allocated first time we access /dev/loopXX and deallocated on bdev_destroy_inode. When we create the device "losetup /dev/loopXX afile" we want that block_device stay alive until we destroy the loop device with "losetup -d". But because we do not hold /dev/loopXX inode its counter goes 0, and inode/bdev can be destroyed at any moment. Usually it happens at memory pressure or when user drops inode cache (like in the test below). When later in loop_clr_fd() we want to use bdev we have use-after-free error with following stack: BUG: unable to handle kernel NULL pointer dereference at 0000000000000280 bd_set_size+0x10/0xa0 loop_clr_fd+0x1f8/0x420 [loop] lo_ioctl+0x200/0x7e0 [loop] lo_compat_ioctl+0x47/0xe0 [loop] compat_blkdev_ioctl+0x341/0x1290 do_filp_open+0x42/0xa0 compat_sys_ioctl+0xc1/0xf20 do_sys_open+0x16e/0x1d0 sysenter_dispatch+0x7/0x1a To prevent use-after-free we need to grab the device in loop_set_fd() and put it later in loop_clr_fd(). The issue is reprodusible on current Linus head and v3.3. Here is the test: dd if=/dev/zero of=loop.file bs=1M count=1 while [ true ]; do losetup /dev/loop0 loop.file echo 2 > /proc/sys/vm/drop_caches losetup -d /dev/loop0 done [ Doing bdgrab/bput in loop_set_fd/loop_clr_fd is safe, because every time we call loop_set_fd() we check that loop_device->lo_state is Lo_unbound and set it to Lo_bound If somebody will try to set_fd again it will get EBUSY. And if we try to loop_clr_fd() on unbound loop device we'll get ENXIO. loop_set_fd/loop_clr_fd (and any other loop ioctl) is called under loop_device->lo_ctl_mutex. ] Signed-off-by: Anatol Pomozov Cc: Al Viro Signed-off-by: Linus Torvalds --- drivers/block/loop.c | 9 ++++++++- fs/block_dev.c | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fe5f6403417..2c127f9c3f3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -922,6 +922,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_flags |= LO_FLAGS_PARTSCAN; if (lo->lo_flags & LO_FLAGS_PARTSCAN) ioctl_by_bdev(bdev, BLKRRPART, 0); + + /* Grab the block_device to prevent its destruction after we + * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). + */ + bdgrab(bdev); return 0; out_clr: @@ -1031,8 +1036,10 @@ static int loop_clr_fd(struct loop_device *lo) memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); - if (bdev) + if (bdev) { + bdput(bdev); invalidate_bdev(bdev); + } set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { diff --git a/fs/block_dev.c b/fs/block_dev.c index aea605c98ba..aae187a7f94 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -551,6 +551,7 @@ struct block_device *bdgrab(struct block_device *bdev) ihold(bdev->bd_inode); return bdev; } +EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { -- cgit v1.2.3-70-g09d2 From d0d096b1d8b97172cee953f1a00b7d4379d398da Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Wed, 3 Apr 2013 19:53:07 +0530 Subject: mtip32xx: recovery from command timeout To recover from command timeouts, reset the device. In addition to that improved timeout handling of PIO commands. Signed-off-by: Sam Bradshaw Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 177 +++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 90 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 92250af84e7..30f23363465 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -243,40 +243,31 @@ static inline void release_slot(struct mtip_port *port, int tag) /* * Reset the HBA (without sleeping) * - * Just like hba_reset, except does not call sleep, so can be - * run from interrupt/tasklet context. - * * @dd Pointer to the driver data structure. * * return value * 0 The reset was successful. * -1 The HBA Reset bit did not clear. */ -static int hba_reset_nosleep(struct driver_data *dd) +static int mtip_hba_reset(struct driver_data *dd) { unsigned long timeout; - /* Chip quirk: quiesce any chip function */ - mdelay(10); - /* Set the reset bit */ writel(HOST_RESET, dd->mmio + HOST_CTL); /* Flush */ readl(dd->mmio + HOST_CTL); - /* - * Wait 10ms then spin for up to 1 second - * waiting for reset acknowledgement - */ - timeout = jiffies + msecs_to_jiffies(1000); - mdelay(10); - while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) - && time_before(jiffies, timeout)) - mdelay(1); + /* Spin for up to 2 seconds, waiting for reset acknowledgement */ + timeout = jiffies + msecs_to_jiffies(2000); + do { + mdelay(10); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) + return -1; - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) - return -1; + } while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) + && time_before(jiffies, timeout)); if (readl(dd->mmio + HOST_CTL) & HOST_RESET) return -1; @@ -481,7 +472,7 @@ static void mtip_restart_port(struct mtip_port *port) dev_warn(&port->dd->pdev->dev, "PxCMD.CR not clear, escalating reset\n"); - if (hba_reset_nosleep(port->dd)) + if (mtip_hba_reset(port->dd)) dev_err(&port->dd->pdev->dev, "HBA reset escalation failed.\n"); @@ -527,6 +518,26 @@ static void mtip_restart_port(struct mtip_port *port) } +static int mtip_device_reset(struct driver_data *dd) +{ + int rv = 0; + + if (mtip_check_surprise_removal(dd->pdev)) + return 0; + + if (mtip_hba_reset(dd) < 0) + rv = -EFAULT; + + mdelay(1); + mtip_init_port(dd->port); + mtip_start_port(dd->port); + + /* Enable interrupts on the HBA. */ + writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, + dd->mmio + HOST_CTL); + return rv; +} + /* * Helper function for tag logging */ @@ -632,7 +643,7 @@ static void mtip_timeout_function(unsigned long int data) if (cmdto_cnt) { print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { - mtip_restart_port(port); + mtip_device_reset(port->dd); wake_up_interruptible(&port->svc_wait); } clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); @@ -1283,11 +1294,11 @@ static int mtip_exec_internal_command(struct mtip_port *port, int rv = 0, ready2go = 1; struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; unsigned long to; + struct driver_data *dd = port->dd; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { - dev_err(&port->dd->pdev->dev, - "SG buffer is not 8 byte aligned\n"); + dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n"); return -EFAULT; } @@ -1300,23 +1311,21 @@ static int mtip_exec_internal_command(struct mtip_port *port, mdelay(100); } while (time_before(jiffies, to)); if (!ready2go) { - dev_warn(&port->dd->pdev->dev, + dev_warn(&dd->pdev->dev, "Internal cmd active. new cmd [%02X]\n", fis->command); return -EBUSY; } set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); port->ic_pause_timer = 0; - if (fis->command == ATA_CMD_SEC_ERASE_UNIT) - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - else if (fis->command == ATA_CMD_DOWNLOAD_MICRO) - clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, 5000) < 0) { - dev_warn(&port->dd->pdev->dev, + dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); release_slot(port, MTIP_TAG_INTERNAL); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1361,58 +1370,84 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); - /* Poll if atomic, wait_for_completion otherwise */ if (atomic == GFP_KERNEL) { /* Wait for the command to complete or timeout. */ - if (wait_for_completion_timeout( + if (wait_for_completion_interruptible_timeout( &wait, - msecs_to_jiffies(timeout)) == 0) { - dev_err(&port->dd->pdev->dev, - "Internal command did not complete [%d] " - "within timeout of %lu ms\n", - atomic, timeout); - if (mtip_check_surprise_removal(port->dd->pdev) || + msecs_to_jiffies(timeout)) <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ + dev_err(&dd->pdev->dev, + "Internal command [%02X] was interrupted after %lu ms\n", + fis->command, timeout); + rv = -EINTR; + goto exec_ic_exit; + } else if (rv == 0) /* timeout */ + dev_err(&dd->pdev->dev, + "Internal command did not complete [%02X] within timeout of %lu ms\n", + fis->command, timeout); + else + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", + fis->command, rv, timeout); + + if (mtip_check_surprise_removal(dd->pdev) || test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { + &dd->dd_flag)) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned due to SR\n", + fis->command); rv = -ENXIO; goto exec_ic_exit; } + mtip_device_reset(dd); /* recover from timeout issue */ rv = -EAGAIN; + goto exec_ic_exit; } } else { + u32 hba_stat, port_stat; + /* Spin for checking if command still outstanding */ timeout = jiffies + msecs_to_jiffies(timeout); while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) && time_before(jiffies, timeout)) { - if (mtip_check_surprise_removal(port->dd->pdev)) { + if (mtip_check_surprise_removal(dd->pdev)) { rv = -ENXIO; goto exec_ic_exit; } if ((fis->command != ATA_CMD_STANDBYNOW1) && test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { + &dd->dd_flag)) { rv = -ENXIO; goto exec_ic_exit; } - if (readl(port->mmio + PORT_IRQ_STAT) & PORT_IRQ_ERR) { - atomic_inc(&int_cmd->active); /* error */ - break; + port_stat = readl(port->mmio + PORT_IRQ_STAT); + if (!port_stat) + continue; + + if (port_stat & PORT_IRQ_ERR) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] failed\n", + fis->command); + mtip_device_reset(dd); + rv = -EIO; + goto exec_ic_exit; + } else { + writel(port_stat, port->mmio + PORT_IRQ_STAT); + hba_stat = readl(dd->mmio + HOST_IRQ_STAT); + if (hba_stat) + writel(hba_stat, + dd->mmio + HOST_IRQ_STAT); } + break; } } - if (atomic_read(&int_cmd->active) > 1) { - dev_err(&port->dd->pdev->dev, - "Internal command [%02X] failed\n", fis->command); - rv = -EIO; - } if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; - if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { - mtip_restart_port(port); + if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { + mtip_device_reset(dd); rv = -EAGAIN; } } @@ -1724,7 +1759,8 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, * -EINVAL Invalid parameters passed in, trim not supported * -EIO Error submitting trim request to hw */ -static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, + unsigned int len) { int i, rv = 0; u64 tlba, tlen, sect_left; @@ -1810,45 +1846,6 @@ static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors) return (bool) !!port->identify_valid; } -/* - * Reset the HBA. - * - * Resets the HBA by setting the HBA Reset bit in the Global - * HBA Control register. After setting the HBA Reset bit the - * function waits for 1 second before reading the HBA Reset - * bit to make sure it has cleared. If HBA Reset is not clear - * an error is returned. Cannot be used in non-blockable - * context. - * - * @dd Pointer to the driver data structure. - * - * return value - * 0 The reset was successful. - * -1 The HBA Reset bit did not clear. - */ -static int mtip_hba_reset(struct driver_data *dd) -{ - mtip_deinit_port(dd->port); - - /* Set the reset bit */ - writel(HOST_RESET, dd->mmio + HOST_CTL); - - /* Flush */ - readl(dd->mmio + HOST_CTL); - - /* Wait for reset to clear */ - ssleep(1); - - /* Check the bit has cleared */ - if (readl(dd->mmio + HOST_CTL) & HOST_RESET) { - dev_err(&dd->pdev->dev, - "Reset bit did not clear.\n"); - return -1; - } - - return 0; -} - /* * Display the identify command data. * -- cgit v1.2.3-70-g09d2 From 6b06d35f3f63dbfde71c792b8584225f3d1fc7f2 Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Wed, 3 Apr 2013 19:54:35 +0530 Subject: mtip32xx: return 0 from pci probe in case of rebuild Fix to return 0 from pci probe in case of rebuild. If not, pci consider probe has failed, and crash during rmmod. Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 30f23363465..ce4e47dcc15 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4302,6 +4302,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, instance++; if (rv != MTIP_FTL_REBUILD_MAGIC) set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); + else + rv = 0; /* device in rebuild state, return 0 from probe */ goto done; block_initialize_err: -- cgit v1.2.3-70-g09d2 From 0caff00390db41c98976e65f2ebc3eeaa4861358 Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Wed, 3 Apr 2013 19:56:21 +0530 Subject: mtip32xx: Add debugfs entry device_status This patch adds a new debugfs entry 'device_status' in /sys/kernel/debug/rssd. The value of this entry shows devices online and those in the process of removing. Signed-off-by: Sam Bradshaw Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 148 ++++++++++++++++++++++++++++++++++++-- drivers/block/mtip32xx/mtip32xx.h | 18 +++-- 2 files changed, 154 insertions(+), 12 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index ce4e47dcc15..8517af8c0fc 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -81,12 +81,17 @@ /* Device instance number, incremented each time a device is probed. */ static int instance; +struct list_head online_list; +struct list_head removing_list; +spinlock_t dev_lock; + /* * Global variable used to hold the major block device number * allocated in mtip_init(). */ static int mtip_major; static struct dentry *dfs_parent; +static struct dentry *dfs_device_status; static u32 cpu_use[NR_CPUS]; @@ -2707,6 +2712,100 @@ static ssize_t mtip_hw_show_status(struct device *dev, static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); +/* debugsfs entries */ + +static ssize_t show_device_status(struct device_driver *drv, char *buf) +{ + int size = 0; + struct driver_data *dd, *tmp; + unsigned long flags; + char id_buf[42]; + u16 status = 0; + + spin_lock_irqsave(&dev_lock, flags); + size += sprintf(&buf[size], "Devices Present:\n"); + list_for_each_entry_safe(dd, tmp, &online_list, online_list) { + if (dd && dd->pdev) { + if (dd->port && + dd->port->identify && + dd->port->identify_valid) { + strlcpy(id_buf, + (char *) (dd->port->identify + 10), 21); + status = *(dd->port->identify + 141); + } else { + memset(id_buf, 0, 42); + status = 0; + } + + if (dd->port && + test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { + size += sprintf(&buf[size], + " device %s %s (ftl rebuild %d %%)\n", + dev_name(&dd->pdev->dev), + id_buf, + status); + } else { + size += sprintf(&buf[size], + " device %s %s\n", + dev_name(&dd->pdev->dev), + id_buf); + } + } + } + + size += sprintf(&buf[size], "Devices Being Removed:\n"); + list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) { + if (dd && dd->pdev) { + if (dd->port && + dd->port->identify && + dd->port->identify_valid) { + strlcpy(id_buf, + (char *) (dd->port->identify+10), 21); + status = *(dd->port->identify + 141); + } else { + memset(id_buf, 0, 42); + status = 0; + } + + if (dd->port && + test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { + size += sprintf(&buf[size], + " device %s %s (ftl rebuild %d %%)\n", + dev_name(&dd->pdev->dev), + id_buf, + status); + } else { + size += sprintf(&buf[size], + " device %s %s\n", + dev_name(&dd->pdev->dev), + id_buf); + } + } + } + spin_unlock_irqrestore(&dev_lock, flags); + + return size; +} + +static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) +{ + int size = *offset; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + + if (!len || *offset) + return 0; + + size += show_device_status(NULL, buf); + + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; + + return *offset; +} + static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { @@ -2801,6 +2900,13 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, return *offset; } +static const struct file_operations mtip_device_status_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_device_status, + .llseek = no_llseek, +}; + static const struct file_operations mtip_regs_fops = { .owner = THIS_MODULE, .open = simple_open, @@ -4158,6 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, const struct cpumask *node_mask; int cpu, i = 0, j = 0; int my_node = NUMA_NO_NODE; + unsigned long flags; /* Allocate memory for this devices private data. */ my_node = pcibus_to_node(pdev->bus); @@ -4215,6 +4322,9 @@ static int mtip_pci_probe(struct pci_dev *pdev, dd->pdev = pdev; dd->numa_node = my_node; + INIT_LIST_HEAD(&dd->online_list); + INIT_LIST_HEAD(&dd->remove_list); + memset(dd->workq_name, 0, 32); snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); @@ -4304,6 +4414,12 @@ static int mtip_pci_probe(struct pci_dev *pdev, set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); else rv = 0; /* device in rebuild state, return 0 from probe */ + + /* Add to online list even if in ftl rebuild */ + spin_lock_irqsave(&dev_lock, flags); + list_add(&dd->online_list, &online_list); + spin_unlock_irqrestore(&dev_lock, flags); + goto done; block_initialize_err: @@ -4337,9 +4453,15 @@ static void mtip_pci_remove(struct pci_dev *pdev) { struct driver_data *dd = pci_get_drvdata(pdev); int counter = 0; + unsigned long flags; set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + spin_lock_irqsave(&dev_lock, flags); + list_del_init(&dd->online_list); + list_add(&dd->remove_list, &removing_list); + spin_unlock_irqrestore(&dev_lock, flags); + if (mtip_check_surprise_removal(pdev)) { while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { counter++; @@ -4365,6 +4487,10 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); + spin_lock_irqsave(&dev_lock, flags); + list_del_init(&dd->remove_list); + spin_unlock_irqrestore(&dev_lock, flags); + kfree(dd); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); } @@ -4512,6 +4638,11 @@ static int __init mtip_init(void) pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); + spin_lock_init(&dev_lock); + + INIT_LIST_HEAD(&online_list); + INIT_LIST_HEAD(&removing_list); + /* Allocate a major block device number to use with this driver. */ error = register_blkdev(0, MTIP_DRV_NAME); if (error <= 0) { @@ -4521,11 +4652,18 @@ static int __init mtip_init(void) } mtip_major = error; - if (!dfs_parent) { - dfs_parent = debugfs_create_dir("rssd", NULL); - if (IS_ERR_OR_NULL(dfs_parent)) { - pr_warn("Error creating debugfs parent\n"); - dfs_parent = NULL; + dfs_parent = debugfs_create_dir("rssd", NULL); + if (IS_ERR_OR_NULL(dfs_parent)) { + pr_warn("Error creating debugfs parent\n"); + dfs_parent = NULL; + } + if (dfs_parent) { + dfs_device_status = debugfs_create_file("device_status", + S_IRUGO, dfs_parent, NULL, + &mtip_device_status_fops); + if (IS_ERR_OR_NULL(dfs_device_status)) { + pr_err("Error creating device_status node\n"); + dfs_device_status = NULL; } } diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 3bffff5f670..8e8334c9dd0 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -129,9 +129,9 @@ enum { MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */ MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */ MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */ - MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | \ - (1 << MTIP_PF_EH_ACTIVE_BIT) | \ - (1 << MTIP_PF_SE_ACTIVE_BIT) | \ + MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | + (1 << MTIP_PF_EH_ACTIVE_BIT) | + (1 << MTIP_PF_SE_ACTIVE_BIT) | (1 << MTIP_PF_DM_ACTIVE_BIT)), MTIP_PF_SVC_THD_ACTIVE_BIT = 4, @@ -144,9 +144,9 @@ enum { MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_WRITE_PROTECT_BIT = 3, - MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ - (1 << MTIP_DDF_SEC_LOCK_BIT) | \ - (1 << MTIP_DDF_OVER_TEMP_BIT) | \ + MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | + (1 << MTIP_DDF_SEC_LOCK_BIT) | + (1 << MTIP_DDF_OVER_TEMP_BIT) | (1 << MTIP_DDF_WRITE_PROTECT_BIT)), MTIP_DDF_CLEANUP_BIT = 5, @@ -180,7 +180,7 @@ struct mtip_work { #define MTIP_TRIM_TIMEOUT_MS 240000 #define MTIP_MAX_TRIM_ENTRIES 8 -#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 struct mtip_trim_entry { u32 lba; /* starting lba of region */ @@ -501,6 +501,10 @@ struct driver_data { atomic_t irq_workers_active; int isr_binding; + + struct list_head online_list; /* linkage for online list */ + + struct list_head remove_list; /* linkage for removing list */ }; #endif -- cgit v1.2.3-70-g09d2 From c66bb3f075cfe2d17b2427e96e043622db02759c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Apr 2013 09:03:41 +0200 Subject: mtip32xx: fix two smatch warnings Dan reports: New smatch warnings: drivers/block/mtip32xx/mtip32xx.c:2728 show_device_status() warn: variable dereferenced before check 'dd' (see line 2727) drivers/block/mtip32xx/mtip32xx.c:2758 show_device_status() warn: variable dereferenced before check 'dd' (see line 2757) which are checking if dd == NULL, in a list_for_each_entry() type loop. Get rid of the check, dd can never be NULL here. Reported-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8517af8c0fc..32c678028e5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2725,7 +2725,7 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf) spin_lock_irqsave(&dev_lock, flags); size += sprintf(&buf[size], "Devices Present:\n"); list_for_each_entry_safe(dd, tmp, &online_list, online_list) { - if (dd && dd->pdev) { + if (dd->pdev) { if (dd->port && dd->port->identify && dd->port->identify_valid) { @@ -2755,7 +2755,7 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf) size += sprintf(&buf[size], "Devices Being Removed:\n"); list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) { - if (dd && dd->pdev) { + if (dd->pdev) { if (dd->port && dd->port->identify && dd->port->identify_valid) { -- cgit v1.2.3-70-g09d2 From c2fccc1c9f7c81700cbac2120a4ad5441dd37004 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 8 Apr 2013 10:12:11 +0200 Subject: Revert "loop: cleanup partitions when detaching loop device" This reverts commit 8761a3dc1f07b163414e2215a2cadbb4cfe2a107. There are situations where the destruction path is called with the bdev->bd_mutex already held, which then deadlocks in loop_clr_fd(). The normal partition cleanup does a trylock() on the mutex, but it'd be nice to have a more bullet proof method in loop. So punt this more involved fix to the next merge window, and just back out this buggy fix for now. Signed-off-by: Jens Axboe --- block/partition-generic.c | 1 - drivers/block/loop.c | 21 ++------------------- 2 files changed, 2 insertions(+), 20 deletions(-) (limited to 'drivers/block') diff --git a/block/partition-generic.c b/block/partition-generic.c index ae95ee6a58a..789cdea0589 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -257,7 +257,6 @@ void delete_partition(struct gendisk *disk, int partno) hd_struct_put(part); } -EXPORT_SYMBOL(delete_partition); static ssize_t whole_disk_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fe5f6403417..ee13a82f3f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1044,29 +1044,12 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) + ioctl_by_bdev(bdev, BLKRRPART, 0); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; mutex_unlock(&lo->lo_ctl_mutex); - - /* - * Remove all partitions, since BLKRRPART won't remove user - * added partitions when max_part=0 - */ - if (bdev) { - struct disk_part_iter piter; - struct hd_struct *part; - - mutex_lock_nested(&bdev->bd_mutex, 1); - invalidate_partition(bdev->bd_disk, 0); - disk_part_iter_init(&piter, bdev->bd_disk, - DISK_PITER_INCL_EMPTY); - while ((part = disk_part_iter_next(&piter))) - delete_partition(bdev->bd_disk, part->partno); - disk_part_iter_exit(&piter); - mutex_unlock(&bdev->bd_mutex); - } - /* * Need not hold lo_ctl_mutex to fput backing file. * Calling fput holding lo_ctl_mutex triggers a circular -- cgit v1.2.3-70-g09d2 From 46faeed4a61e220b99591e9773057160eb437cc8 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Apr 2013 17:47:46 -0500 Subject: rbd: do a safe list traversal in rbd_img_request_submit() It's possible that the reference to the object request dropped inside the loop in rbd_img_request_submit() will be the last one, in which case the content of the object pointer can't be trusted. Use a safe form of the object request list traversal to avoid problems. This resolves: http://tracker.ceph.com/issues/4705 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f556f8a8b3f..b7b7a88d9f6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1742,9 +1742,10 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request) struct rbd_device *rbd_dev = img_request->rbd_dev; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; dout("%s: img %p\n", __func__, img_request); - for_each_obj_request(img_request, obj_request) { + for_each_obj_request_safe(img_request, obj_request, next_obj_request) { int ret; obj_request->callback = rbd_img_obj_callback; -- cgit v1.2.3-70-g09d2