diff options
Diffstat (limited to 'drivers/dma/ioat')
| -rw-r--r-- | drivers/dma/ioat/dca.c | 11 | ||||
| -rw-r--r-- | drivers/dma/ioat/dma.c | 130 | ||||
| -rw-r--r-- | drivers/dma/ioat/dma.h | 65 | ||||
| -rw-r--r-- | drivers/dma/ioat/dma_v2.c | 126 | ||||
| -rw-r--r-- | drivers/dma/ioat/dma_v2.h | 2 | ||||
| -rw-r--r-- | drivers/dma/ioat/dma_v3.c | 1128 | ||||
| -rw-r--r-- | drivers/dma/ioat/hw.h | 116 | ||||
| -rw-r--r-- | drivers/dma/ioat/pci.c | 45 | ||||
| -rw-r--r-- | drivers/dma/ioat/registers.h | 4 |
9 files changed, 1077 insertions, 550 deletions
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 9b041858d10..9e84d5bc930 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -470,8 +470,10 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) } if (!dca2_tag_map_valid(ioatdca->tag_map)) { - dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, " - "disabling DCA\n"); + WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, + "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", + dev_driver_string(&pdev->dev), + dev_name(&pdev->dev)); free_dca_provider(dca); return NULL; } @@ -689,7 +691,10 @@ struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) } if (dca3_tag_map_invalid(ioatdca->tag_map)) { - dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n"); + WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, + "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", + dev_driver_string(&pdev->dev), + dev_name(&pdev->dev)); free_dca_provider(dca); return NULL; } diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 1a68a8ba87e..4e3549a1613 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -77,7 +77,8 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { chan = ioat_chan_by_index(instance, bit); - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &chan->state)) + tasklet_schedule(&chan->cleanup_task); } writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); @@ -93,7 +94,8 @@ static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) { struct ioat_chan_common *chan = data; - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &chan->state)) + tasklet_schedule(&chan->cleanup_task); return IRQ_HANDLED; } @@ -116,7 +118,6 @@ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *c chan->timer.function = device->timer_fn; chan->timer.data = data; tasklet_init(&chan->cleanup_task, device->cleanup_fn, data); - tasklet_disable(&chan->cleanup_task); } /** @@ -354,13 +355,49 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) writel(((u64) chan->completion_dma) >> 32, chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - tasklet_enable(&chan->cleanup_task); + set_bit(IOAT_RUN, &chan->state); ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", __func__, ioat->desccount); return ioat->desccount; } +void ioat_stop(struct ioat_chan_common *chan) +{ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + int chan_id = chan_num(chan); + struct msix_entry *msix; + + /* 1/ stop irq from firing tasklets + * 2/ stop the tasklet from re-arming irqs + */ + clear_bit(IOAT_RUN, &chan->state); + + /* flush inflight interrupts */ + switch (device->irq_mode) { + case IOAT_MSIX: + msix = &device->msix_entries[chan_id]; + synchronize_irq(msix->vector); + break; + case IOAT_MSI: + case IOAT_INTX: + synchronize_irq(pdev->irq); + break; + default: + break; + } + + /* flush inflight timers */ + del_timer_sync(&chan->timer); + + /* flush inflight tasklet runs */ + tasklet_kill(&chan->cleanup_task); + + /* final cleanup now that everything is quiesced and can't re-arm */ + device->cleanup_fn((unsigned long) &chan->common); +} + /** * ioat1_dma_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned @@ -379,9 +416,7 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) if (ioat->desccount == 0) return; - tasklet_disable(&chan->cleanup_task); - del_timer_sync(&chan->timer); - ioat1_cleanup(ioat); + ioat_stop(chan); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. @@ -526,26 +561,14 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, static void ioat1_cleanup_event(unsigned long data) { struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; ioat1_cleanup(ioat); + if (!test_bit(IOAT_RUN, &chan->state)) + return; writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } -void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, - size_t len, struct ioat_dma_descriptor *hw) -{ - struct pci_dev *pdev = chan->device->pdev; - size_t offset = len - hw->size; - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) - ioat_unmap(pdev, hw->dst_addr - offset, len, - PCI_DMA_FROMDEVICE, flags, 1); - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) - ioat_unmap(pdev, hw->src_addr - offset, len, - PCI_DMA_TODEVICE, flags, 0); -} - dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan) { dma_addr_t phys_complete; @@ -602,7 +625,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete) dump_desc_dbg(ioat, desc); if (tx->cookie) { dma_cookie_complete(tx); - ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + dma_descriptor_unmap(tx); ioat->active -= desc->hw->tx_cnt; if (tx->callback) { tx->callback(tx->callback_param); @@ -733,7 +756,7 @@ ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, enum dma_status ret; ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_SUCCESS) + if (ret == DMA_COMPLETE) return ret; device->cleanup_fn((unsigned long) c); @@ -832,15 +855,22 @@ int ioat_dma_self_test(struct ioatdma_device *device) } dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "mapping src buffer failed\n"); + goto free_resources; + } dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); - flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE | - DMA_PREP_INTERRUPT; + if (dma_mapping_error(dev, dma_dest)) { + dev_err(dev, "mapping dest buffer failed\n"); + goto unmap_src; + } + flags = DMA_PREP_INTERRUPT; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, IOAT_TEST_SIZE, flags); if (!tx) { dev_err(dev, "Self-test prep failed, disabling\n"); err = -ENODEV; - goto free_resources; + goto unmap_dma; } async_tx_ack(tx); @@ -851,7 +881,7 @@ int ioat_dma_self_test(struct ioatdma_device *device) if (cookie < 0) { dev_err(dev, "Self-test setup failed, disabling\n"); err = -ENODEV; - goto free_resources; + goto unmap_dma; } dma->device_issue_pending(dma_chan); @@ -859,10 +889,10 @@ int ioat_dma_self_test(struct ioatdma_device *device) if (tmo == 0 || dma->device_tx_status(dma_chan, cookie, NULL) - != DMA_SUCCESS) { + != DMA_COMPLETE) { dev_err(dev, "Self-test copy timed out, disabling\n"); err = -ENODEV; - goto free_resources; + goto unmap_dma; } if (memcmp(src, dest, IOAT_TEST_SIZE)) { dev_err(dev, "Self-test copy failed compare, disabling\n"); @@ -870,6 +900,10 @@ int ioat_dma_self_test(struct ioatdma_device *device) goto free_resources; } +unmap_dma: + dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); +unmap_src: + dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); free_resources: dma->device_free_chan_resources(dma_chan); out: @@ -882,14 +916,13 @@ static char ioat_interrupt_style[32] = "msix"; module_param_string(ioat_interrupt_style, ioat_interrupt_style, sizeof(ioat_interrupt_style), 0644); MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), " - "msix-single-vector, msi, intx)"); + "set ioat interrupt style: msix (default), msi, intx"); /** * ioat_dma_setup_interrupts - setup interrupt handler * @device: ioat device */ -static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +int ioat_dma_setup_interrupts(struct ioatdma_device *device) { struct ioat_chan_common *chan; struct pci_dev *pdev = device->pdev; @@ -901,8 +934,6 @@ static int ioat_dma_setup_interrupts(struct ioatdma_device *device) if (!strcmp(ioat_interrupt_style, "msix")) goto msix; - if (!strcmp(ioat_interrupt_style, "msix-single-vector")) - goto msix_single_vector; if (!strcmp(ioat_interrupt_style, "msi")) goto msi; if (!strcmp(ioat_interrupt_style, "intx")) @@ -917,10 +948,8 @@ msix: device->msix_entries[i].entry = i; err = pci_enable_msix(pdev, device->msix_entries, msixcnt); - if (err < 0) + if (err) goto msi; - if (err > 0) - goto msix_single_vector; for (i = 0; i < msixcnt; i++) { msix = &device->msix_entries[i]; @@ -934,25 +963,11 @@ msix: chan = ioat_chan_by_index(device, j); devm_free_irq(dev, msix->vector, chan); } - goto msix_single_vector; + goto msi; } } intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - goto done; - -msix_single_vector: - msix = &device->msix_entries[0]; - msix->entry = 0; - err = pci_enable_msix(pdev, device->msix_entries, 1); - if (err) - goto msi; - - err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, - "ioat-msix", device); - if (err) { - pci_disable_msix(pdev); - goto msi; - } + device->irq_mode = IOAT_MSIX; goto done; msi: @@ -966,6 +981,7 @@ msi: pci_disable_msi(pdev); goto intx; } + device->irq_mode = IOAT_MSI; goto done; intx: @@ -974,6 +990,7 @@ intx: if (err) goto err_no_irq; + device->irq_mode = IOAT_INTX; done: if (device->intr_quirk) device->intr_quirk(device); @@ -984,9 +1001,11 @@ done: err_no_irq: /* Disable all interrupt generation */ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + device->irq_mode = IOAT_NOIRQ; dev_err(dev, "no usable interrupts\n"); return err; } +EXPORT_SYMBOL(ioat_dma_setup_interrupts); static void ioat_disable_interrupts(struct ioatdma_device *device) { @@ -1096,12 +1115,11 @@ static ssize_t cap_show(struct dma_chan *c, char *page) { struct dma_device *dma = c->device; - return sprintf(page, "copy%s%s%s%s%s%s\n", + return sprintf(page, "copy%s%s%s%s%s\n", dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", - dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 087935f1565..e982f00a984 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -39,6 +39,7 @@ #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) #define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) +#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev) #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) @@ -48,6 +49,13 @@ */ #define NULL_DESC_BUFFER_SIZE 1 +enum ioat_irq_mode { + IOAT_NOIRQ = 0, + IOAT_MSIX, + IOAT_MSI, + IOAT_INTX +}; + /** * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device @@ -72,11 +80,15 @@ struct ioatdma_device { void __iomem *reg_base; struct pci_pool *dma_pool; struct pci_pool *completion_pool; +#define MAX_SED_POOLS 5 + struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; struct dma_device common; u8 version; struct msix_entry msix_entries[4]; struct ioat_chan_common *idx[4]; struct dca_provider *dca; + enum ioat_irq_mode irq_mode; + u32 cap; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); int (*reset_hw)(struct ioat_chan_common *chan); @@ -97,6 +109,7 @@ struct ioat_chan_common { #define IOAT_KOBJ_INIT_FAIL 3 #define IOAT_RESHAPE_PENDING 4 #define IOAT_RUN 5 + #define IOAT_CHAN_ACTIVE 6 struct timer_list timer; #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) @@ -130,6 +143,20 @@ struct ioat_dma_chan { u16 active; }; +/** + * struct ioat_sed_ent - wrapper around super extended hardware descriptor + * @hw: hardware SED + * @sed_dma: dma address for the SED + * @list: list member + * @parent: point to the dma descriptor that's the parent + */ +struct ioat_sed_ent { + struct ioat_sed_raw_descriptor *hw; + dma_addr_t dma; + struct ioat_ring_ent *parent; + unsigned int hw_pool; +}; + static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) { return container_of(c, struct ioat_chan_common, common); @@ -178,7 +205,7 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, struct device *dev = to_dev(chan); dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" - " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, + " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id, (unsigned long long) tx->phys, (unsigned long long) hw->next, tx->cookie, tx->flags, hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); @@ -200,7 +227,7 @@ ioat_chan_by_index(struct ioatdma_device *device, int index) return device->idx[index]; } -static inline u64 ioat_chansts(struct ioat_chan_common *chan) +static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) { u8 ver = chan->device->version; u64 status; @@ -217,6 +244,26 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan) return status; } +#if BITS_PER_LONG == 64 + +static inline u64 ioat_chansts(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u64 status; + + /* With IOAT v3.3 the status register is 64bit. */ + if (ver >= IOAT_VER_3_3) + status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); + else + status = ioat_chansts_32(chan); + + return status; +} + +#else +#define ioat_chansts ioat_chansts_32 +#endif + static inline void ioat_start(struct ioat_chan_common *chan) { u8 ver = chan->device->version; @@ -293,16 +340,6 @@ static inline bool is_ioat_bug(unsigned long err) return !!err; } -static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, - int direction, enum dma_ctrl_flags flags, bool dst) -{ - if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || - (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) - pci_unmap_single(pdev, addr, len, direction); - else - pci_unmap_page(pdev, addr, len, direction); -} - int ioat_probe(struct ioatdma_device *device); int ioat_register(struct ioatdma_device *device); int ioat1_dma_probe(struct ioatdma_device *dev, int dca); @@ -314,12 +351,12 @@ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx); enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, struct dma_tx_state *txstate); -void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, - size_t len, struct ioat_dma_descriptor *hw); bool ioat_cleanup_preamble(struct ioat_chan_common *chan, dma_addr_t *phys_complete); void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); void ioat_kobject_del(struct ioatdma_device *device); +int ioat_dma_setup_interrupts(struct ioatdma_device *device); +void ioat_stop(struct ioat_chan_common *chan); extern const struct sysfs_ops ioat_sysfs_ops; extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 82d4e306c32..8d1058085ee 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -148,7 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) tx = &desc->txd; dump_desc_dbg(ioat, desc); if (tx->cookie) { - ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + dma_descriptor_unmap(tx); dma_cookie_complete(tx); if (tx->callback) { tx->callback(tx->callback_param); @@ -190,8 +190,11 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) void ioat2_cleanup_event(unsigned long data) { struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; ioat2_cleanup(ioat); + if (!test_bit(IOAT_RUN, &chan->state)) + return; writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } @@ -269,61 +272,22 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) __ioat2_restart_chan(ioat); } -void ioat2_timer_event(unsigned long data) +static void check_active(struct ioat2_dma_chan *ioat) { - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); struct ioat_chan_common *chan = &ioat->base; - if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) { - __cleanup(ioat, phys_complete); - } else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat2_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - spin_unlock_bh(&chan->cleanup_lock); - } else { - u16 active; + if (ioat2_ring_active(ioat)) { + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + return; + } + if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + else if (ioat->alloc_order > ioat_get_alloc_order()) { /* if the ring is idle, empty, and oversized try to step * down the size */ - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - active = ioat2_ring_active(ioat); - if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) - reshape_ring(ioat, ioat->alloc_order-1); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); + reshape_ring(ioat, ioat->alloc_order - 1); /* keep shrinking until we get back to our minimum * default size @@ -331,6 +295,60 @@ void ioat2_timer_event(unsigned long data) if (ioat->alloc_order > ioat_get_alloc_order()) mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } + +} + +void ioat2_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + u64 status; + + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { + spin_lock_bh(&ioat->prep_lock); + ioat2_restart_channel(ioat); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + return; + } else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + + + if (ioat2_ring_active(ioat)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + else { + spin_lock_bh(&ioat->prep_lock); + check_active(ioat); + spin_unlock_bh(&ioat->prep_lock); + } + spin_unlock_bh(&chan->cleanup_lock); } static int ioat2_reset_hw(struct ioat_chan_common *chan) @@ -404,7 +422,7 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); - if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &chan->state)) mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); /* make descriptor updates visible before advancing ioat->head, @@ -538,10 +556,10 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) ioat->issued = 0; ioat->tail = 0; ioat->alloc_order = order; + set_bit(IOAT_RUN, &chan->state); spin_unlock_bh(&ioat->prep_lock); spin_unlock_bh(&chan->cleanup_lock); - tasklet_enable(&chan->cleanup_task); ioat2_start_null_desc(ioat); /* check that we got off the ground */ @@ -551,7 +569,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); if (is_ioat_active(status) || is_ioat_idle(status)) { - set_bit(IOAT_RUN, &chan->state); return 1 << ioat->alloc_order; } else { u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); @@ -794,11 +811,8 @@ void ioat2_free_chan_resources(struct dma_chan *c) if (!ioat->ring) return; - tasklet_disable(&chan->cleanup_task); - del_timer_sync(&chan->timer); - device->cleanup_fn((unsigned long) c); + ioat_stop(chan); device->reset_hw(chan); - clear_bit(IOAT_RUN, &chan->state); spin_lock_bh(&chan->cleanup_lock); spin_lock_bh(&ioat->prep_lock); diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index e100f644e34..470292767e6 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -123,7 +123,6 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len struct ioat_ring_ent { union { struct ioat_dma_descriptor *hw; - struct ioat_fill_descriptor *fill; struct ioat_xor_descriptor *xor; struct ioat_xor_ext_descriptor *xor_ex; struct ioat_pq_descriptor *pq; @@ -137,6 +136,7 @@ struct ioat_ring_ent { #ifdef DEBUG int id; #endif + struct ioat_sed_ent *sed; }; static inline struct ioat_ring_ent * diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 3e9d66920eb..b9b38a1cf92 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -55,7 +55,7 @@ /* * Support routines for v3+ hardware */ - +#include <linux/module.h> #include <linux/pci.h> #include <linux/gfp.h> #include <linux/dmaengine.h> @@ -67,9 +67,15 @@ #include "dma.h" #include "dma_v2.h" +extern struct kmem_cache *ioat3_sed_cache; + /* ioat hardware assumes at least two sources for raid operations */ #define src_cnt_to_sw(x) ((x) + 2) #define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor @@ -77,14 +83,13 @@ static const u8 xor_idx_to_desc = 0xe0; static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; -static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) -{ - struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; - - return raw->field[xor_idx_to_field[idx]]; -} +static void ioat3_eh(struct ioat2_dma_chan *ioat); static void xor_set_src(struct ioat_raw_descriptor *descs[2], dma_addr_t addr, u32 offset, int idx) @@ -101,6 +106,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) return raw->field[pq_idx_to_field[idx]]; } +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + static void pq_set_src(struct ioat_raw_descriptor *descs[2], dma_addr_t addr, u32 offset, u8 coef, int idx) { @@ -111,122 +123,159 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2], pq->coef[idx] = coef; } -static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, - struct ioat_ring_ent *desc, int idx) +static bool is_jf_ioat(struct pci_dev *pdev) { - struct ioat_chan_common *chan = &ioat->base; - struct pci_dev *pdev = chan->device->pdev; - size_t len = desc->len; - size_t offset = len - desc->hw->size; - struct dma_async_tx_descriptor *tx = &desc->txd; - enum dma_ctrl_flags flags = tx->flags; - - switch (desc->hw->ctl_f.op) { - case IOAT_OP_COPY: - if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ - ioat_dma_unmap(chan, flags, len, desc->hw); - break; - case IOAT_OP_FILL: { - struct ioat_fill_descriptor *hw = desc->fill; + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) - ioat_unmap(pdev, hw->dst_addr - offset, len, - PCI_DMA_FROMDEVICE, flags, 1); - break; +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; } - case IOAT_OP_XOR_VAL: - case IOAT_OP_XOR: { - struct ioat_xor_descriptor *xor = desc->xor; - struct ioat_ring_ent *ext; - struct ioat_xor_ext_descriptor *xor_ex = NULL; - int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); - struct ioat_raw_descriptor *descs[2]; - int i; +} - if (src_cnt > 5) { - ext = ioat2_get_ring_ent(ioat, idx + 1); - xor_ex = ext->xor_ex; - } +static bool is_ivb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_IVB0: + case PCI_DEVICE_ID_INTEL_IOAT_IVB1: + case PCI_DEVICE_ID_INTEL_IOAT_IVB2: + case PCI_DEVICE_ID_INTEL_IOAT_IVB3: + case PCI_DEVICE_ID_INTEL_IOAT_IVB4: + case PCI_DEVICE_ID_INTEL_IOAT_IVB5: + case PCI_DEVICE_ID_INTEL_IOAT_IVB6: + case PCI_DEVICE_ID_INTEL_IOAT_IVB7: + case PCI_DEVICE_ID_INTEL_IOAT_IVB8: + case PCI_DEVICE_ID_INTEL_IOAT_IVB9: + return true; + default: + return false; + } - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - descs[0] = (struct ioat_raw_descriptor *) xor; - descs[1] = (struct ioat_raw_descriptor *) xor_ex; - for (i = 0; i < src_cnt; i++) { - dma_addr_t src = xor_get_src(descs, i); +} - ioat_unmap(pdev, src - offset, len, - PCI_DMA_TODEVICE, flags, 0); - } +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } - /* dest is a source in xor validate operations */ - if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { - ioat_unmap(pdev, xor->dst_addr - offset, len, - PCI_DMA_TODEVICE, flags, 1); - break; - } - } +} - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) - ioat_unmap(pdev, xor->dst_addr - offset, len, - PCI_DMA_FROMDEVICE, flags, 1); - break; +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev); +} + +static bool is_bwd_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD0: + case PCI_DEVICE_ID_INTEL_IOAT_BWD1: + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + return true; + default: + return false; } - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ: { - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_ring_ent *ext; - struct ioat_pq_ext_descriptor *pq_ex = NULL; - int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); - struct ioat_raw_descriptor *descs[2]; - int i; +} - if (src_cnt > 3) { - ext = ioat2_get_ring_ent(ioat, idx + 1); - pq_ex = ext->pq_ex; - } +static bool is_bwd_noraid(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + return true; + default: + return false; + } - /* in the 'continue' case don't unmap the dests as sources */ - if (dmaf_p_disabled_continue(flags)) - src_cnt--; - else if (dmaf_continue(flags)) - src_cnt -= 3; - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - descs[0] = (struct ioat_raw_descriptor *) pq; - descs[1] = (struct ioat_raw_descriptor *) pq_ex; - for (i = 0; i < src_cnt; i++) { - dma_addr_t src = pq_get_src(descs, i); - - ioat_unmap(pdev, src - offset, len, - PCI_DMA_TODEVICE, flags, 0); - } +} - /* the dests are sources in pq validate operations */ - if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { - if (!(flags & DMA_PREP_PQ_DISABLE_P)) - ioat_unmap(pdev, pq->p_addr - offset, - len, PCI_DMA_TODEVICE, flags, 0); - if (!(flags & DMA_PREP_PQ_DISABLE_Q)) - ioat_unmap(pdev, pq->q_addr - offset, - len, PCI_DMA_TODEVICE, flags, 0); - break; - } - } +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, unsigned idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (!(flags & DMA_PREP_PQ_DISABLE_P)) - ioat_unmap(pdev, pq->p_addr - offset, len, - PCI_DMA_BIDIRECTIONAL, flags, 1); - if (!(flags & DMA_PREP_PQ_DISABLE_Q)) - ioat_unmap(pdev, pq->q_addr - offset, len, - PCI_DMA_BIDIRECTIONAL, flags, 1); - } - break; - } - default: - dev_err(&pdev->dev, "%s: unknown op type: %#x\n", - __func__, desc->hw->ctl_f.op); + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +static struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(ioat3_sed_cache, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(ioat3_sed_cache, sed); + return NULL; } + + return sed; +} + +static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) +{ + if (!sed) + return; + + dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(ioat3_sed_cache, sed); } static bool desc_has_ext(struct ioat_ring_ent *desc) @@ -250,6 +299,63 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) return false; } +static u64 ioat3_get_current_completion(struct ioat_chan_common *chan) +{ + u64 phys_complete; + u64 completion; + + completion = *chan->completion; + phys_complete = ioat_chansts_to_addr(completion); + + dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + + return phys_complete; +} + +static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, + u64 *phys_complete) +{ + *phys_complete = ioat3_get_current_completion(chan); + if (*phys_complete == chan->last_completion) + return false; + + clear_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + return true; +} + +static void +desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + switch (hw->ctl_f.op) { + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + { + struct ioat_pq_descriptor *pq = desc->pq; + + /* check if there's error written */ + if (!pq->dwbes_f.wbes) + return; + + /* need to set a chanerr var for checking to clear later */ + + if (pq->dwbes_f.p_val_err) + *desc->result |= SUM_CHECK_P_RESULT; + + if (pq->dwbes_f.q_val_err) + *desc->result |= SUM_CHECK_Q_RESULT; + + return; + } + default: + return; + } +} + /** * __cleanup - reclaim used descriptors * @ioat: channel (ring) to clean @@ -260,6 +366,7 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) { struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *desc; bool seen_current = false; int idx = ioat->tail, i; @@ -268,6 +375,16 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued); + /* + * At restart of the channel, the completion address and the + * channel status will be 0 due to starting a new chain. Since + * it's new chain and the first descriptor "fails", there is + * nothing to clean up. We do not want to reap the entire submitted + * chain due to this 0 address value and then BUG. + */ + if (!phys_complete) + return; + active = ioat2_ring_active(ioat); for (i = 0; i < active && !seen_current; i++) { struct dma_async_tx_descriptor *tx; @@ -276,10 +393,15 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); desc = ioat2_get_ring_ent(ioat, idx + i); dump_desc_dbg(ioat, desc); + + /* set err stat if we are using dwbes */ + if (device->cap & IOAT_CAP_DWBES) + desc_get_errstat(ioat, desc); + tx = &desc->txd; if (tx->cookie) { dma_cookie_complete(tx); - ioat3_dma_unmap(ioat, desc, idx + i); + dma_descriptor_unmap(tx); if (tx->callback) { tx->callback(tx->callback_param); tx->callback = NULL; @@ -294,6 +416,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) BUG_ON(i + 1 >= active); i++; } + + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat3_free_sed(device, desc->sed); + desc->sed = NULL; + } } smp_mb(); /* finish all descriptor reads before incrementing tail */ ioat->tail = idx + i; @@ -314,89 +442,127 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) static void ioat3_cleanup(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; + u64 phys_complete; spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) + + if (ioat3_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); + + if (is_ioat_halted(*chan->completion)) { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + + if (chanerr & IOAT_CHANERR_HANDLE_MASK) { + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + ioat3_eh(ioat); + } + } + spin_unlock_bh(&chan->cleanup_lock); } static void ioat3_cleanup_event(unsigned long data) { struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; ioat3_cleanup(ioat); + if (!test_bit(IOAT_RUN, &chan->state)) + return; writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; + u64 phys_complete; ioat2_quiesce(chan, 0); - if (ioat_cleanup_preamble(chan, &phys_complete)) + if (ioat3_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); __ioat2_restart_chan(ioat); } -static void ioat3_timer_event(unsigned long data) +static void ioat3_eh(struct ioat2_dma_chan *ioat) { - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = to_pdev(chan); + struct ioat_dma_descriptor *hw; + u64 phys_complete; + struct ioat_ring_ent *desc; + u32 err_handled = 0; + u32 chanerr_int; + u32 chanerr; - if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { - dma_addr_t phys_complete; - u64 status; + /* cleanup so tail points to descriptor that caused the error */ + if (ioat3_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); - status = ioat_chansts(chan); + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } + dev_dbg(to_dev(chan), "%s: error = %x:%x\n", + __func__, chanerr, chanerr_int); - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + desc = ioat2_get_ring_ent(ioat, ioat->tail); + hw = desc->hw; + dump_desc_dbg(ioat, desc); + + switch (hw->ctl_f.op) { + case IOAT_OP_XOR_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; } - spin_unlock_bh(&chan->cleanup_lock); - } else { - u16 active; + break; + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { + *desc->result |= SUM_CHECK_Q_RESULT; + err_handled |= IOAT_CHANERR_XOR_Q_ERR; + } + break; + } + + /* fault on unhandled error or spurious halt */ + if (chanerr ^ err_handled || chanerr == 0) { + dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n", + __func__, chanerr, err_handled); + BUG(); + } + + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); + + /* mark faulting descriptor as complete */ + *chan->completion = desc->txd.phys; + spin_lock_bh(&ioat->prep_lock); + ioat3_restart_channel(ioat); + spin_unlock_bh(&ioat->prep_lock); +} + +static void check_active(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + + if (ioat2_ring_active(ioat)) { + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + return; + } + + if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + else if (ioat->alloc_order > ioat_get_alloc_order()) { /* if the ring is idle, empty, and oversized try to step * down the size */ - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - active = ioat2_ring_active(ioat); - if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) - reshape_ring(ioat, ioat->alloc_order-1); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); + reshape_ring(ioat, ioat->alloc_order - 1); /* keep shrinking until we get back to our minimum * default size @@ -404,6 +570,60 @@ static void ioat3_timer_event(unsigned long data) if (ioat->alloc_order > ioat_get_alloc_order()) mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } + +} + +static void ioat3_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + u64 status; + + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { + spin_lock_bh(&ioat->prep_lock); + ioat3_restart_channel(ioat); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + return; + } else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + + + if (ioat2_ring_active(ioat)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + else { + spin_lock_bh(&ioat->prep_lock); + check_active(ioat); + spin_unlock_bh(&ioat->prep_lock); + } + spin_unlock_bh(&chan->cleanup_lock); } static enum dma_status @@ -414,7 +634,7 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, enum dma_status ret; ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_SUCCESS) + if (ret == DMA_COMPLETE) return ret; ioat3_cleanup(ioat); @@ -423,51 +643,6 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, } static struct dma_async_tx_descriptor * -ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *desc; - size_t total_len = len; - struct ioat_fill_descriptor *fill; - u64 src_data = (0x0101010101010101ULL) * (value & 0xff); - int num_descs, idx, i; - - num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - fill = desc->fill; - - fill->size = xfer_size; - fill->src_data = src_data; - fill->dst_addr = dest; - fill->ctl = 0; - fill->ctl_f.op = IOAT_OP_FILL; - - len -= xfer_size; - dest += xfer_size; - dump_desc_dbg(ioat, desc); - } while (++i < num_descs); - - desc->txd.flags = flags; - desc->len = total_len; - fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - fill->ctl_f.compl_write = 1; - dump_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static struct dma_async_tx_descriptor * __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) @@ -590,7 +765,8 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct int i; dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", desc_id(desc), (unsigned long long) desc->txd.phys, (unsigned long long) (pq_ex ? pq_ex->next : pq->next), desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, @@ -602,6 +778,42 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct (unsigned long long) pq_get_src(descs, i), pq->coef[i]); dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); +} + +static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); } static struct dma_async_tx_descriptor * @@ -612,6 +824,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *compl_desc; struct ioat_ring_ent *desc; struct ioat_ring_ent *ext; @@ -622,6 +835,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, u32 offset = 0; u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; int i, s, idx, with_ext, num_descs; + int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0; dev_dbg(to_dev(chan), "%s\n", __func__); /* the engine requires at least two sources (we provide @@ -647,7 +861,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, * order. */ if (likely(num_descs) && - ioat2_check_space_lock(ioat, num_descs+1) == 0) + ioat2_check_space_lock(ioat, num_descs + cb32) == 0) idx = ioat->head; else return NULL; @@ -685,6 +899,9 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, pq->q_addr = dst[1] + offset; pq->ctl = 0; pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (device->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; pq->ctl_f.src_cnt = src_cnt_to_hw(s); pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); @@ -701,22 +918,140 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); dump_pq_desc_dbg(ioat, desc, ext); - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + } + /* we leave the channel locked to ensure in order submission */ return &compl_desc->txd; } static struct dma_async_tx_descriptor * +__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(chan), "%s\n", __func__); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3); + if (!desc->sed) { + dev_err(to_dev(chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (device->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + +static struct dma_async_tx_descriptor * ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, unsigned long flags) @@ -740,11 +1075,20 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, single_source_coef[0] = scf[0]; single_source_coef[1] = 0; - return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - } else - return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, - len, flags); + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } } struct dma_async_tx_descriptor * @@ -763,8 +1107,11 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, */ *pqres = 0; - return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); } static struct dma_async_tx_descriptor * @@ -779,8 +1126,11 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = dst; /* specify valid address for disabled result */ - return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); } struct dma_async_tx_descriptor * @@ -801,8 +1151,11 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = pq[0]; /* specify valid address for disabled result */ - return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, - len, flags); + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); } static struct dma_async_tx_descriptor * @@ -851,7 +1204,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) struct page *xor_srcs[IOAT_NUM_SRC_TEST]; struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dma_addr, dest_dma; + dma_addr_t dest_dma; struct dma_async_tx_descriptor *tx; struct dma_chan *dma_chan; dma_cookie_t cookie; @@ -863,6 +1216,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) unsigned long tmo; struct device *dev = &device->pdev->dev; struct dma_device *dma = &device->common; + u8 op = 0; dev_dbg(dev, "%s\n", __func__); @@ -908,6 +1262,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) } /* test xor */ + op = IOAT_OP_XOR; + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); for (i = 0; i < IOAT_NUM_SRC_TEST; i++) dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, @@ -919,7 +1275,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (!tx) { dev_err(dev, "Self-test xor prep failed\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } async_tx_ack(tx); @@ -930,18 +1286,22 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (cookie < 0) { dev_err(dev, "Self-test xor setup failed\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } dma->device_issue_pending(dma_chan); tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { dev_err(dev, "Self-test xor timed out\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { u32 *ptr = page_address(dest); @@ -957,6 +1317,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) goto free_resources; + op = IOAT_OP_XOR_VAL; + /* validate the sources with the destintation page */ for (i = 0; i < IOAT_NUM_SRC_TEST; i++) xor_val_srcs[i] = xor_srcs[i]; @@ -973,7 +1335,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (!tx) { dev_err(dev, "Self-test zero prep failed\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } async_tx_ack(tx); @@ -984,69 +1346,32 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (cookie < 0) { dev_err(dev, "Self-test zero setup failed\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } dma->device_issue_pending(dma_chan); tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { dev_err(dev, "Self-test validate timed out\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + if (xor_val_result != 0) { dev_err(dev, "Self-test validate failed compare\n"); err = -ENODEV; goto free_resources; } - /* skip memset if the capability is not present */ - if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) - goto free_resources; - - /* test memset */ - dma_addr = dma_map_page(dev, dest, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, - DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test memset prep failed\n"); - err = -ENODEV; - goto free_resources; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test memset setup failed\n"); - err = -ENODEV; - goto free_resources; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { - dev_err(dev, "Self-test memset timed out\n"); - err = -ENODEV; - goto free_resources; - } - - for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { - u32 *ptr = page_address(dest); - if (ptr[i]) { - dev_err(dev, "Self-test memset failed compare\n"); - err = -ENODEV; - goto free_resources; - } - } + memset(page_address(dest), 0, PAGE_SIZE); /* test for non-zero parity sum */ + op = IOAT_OP_XOR_VAL; + xor_val_result = 0; for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, @@ -1057,7 +1382,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (!tx) { dev_err(dev, "Self-test 2nd zero prep failed\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } async_tx_ack(tx); @@ -1068,24 +1393,39 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) if (cookie < 0) { dev_err(dev, "Self-test 2nd zero setup failed\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } dma->device_issue_pending(dma_chan); tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { dev_err(dev, "Self-test 2nd validate timed out\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } if (xor_val_result != SUM_CHECK_P_RESULT) { dev_err(dev, "Self-test validate failed compare\n"); err = -ENODEV; - goto free_resources; + goto dma_unmap; } + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + goto free_resources; +dma_unmap: + if (op == IOAT_OP_XOR) { + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } else if (op == IOAT_OP_XOR_VAL) { + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } free_resources: dma->device_free_chan_resources(dma_chan); out: @@ -1110,6 +1450,40 @@ static int ioat3_dma_self_test(struct ioatdma_device *device) return 0; } +static int ioat3_irq_reinit(struct ioatdma_device *device) +{ + struct pci_dev *pdev = device->pdev; + int irq = pdev->irq, i; + + if (!is_bwd_ioat(pdev)) + return 0; + + switch (device->irq_mode) { + case IOAT_MSIX: + for (i = 0; i < device->common.chancnt; i++) { + struct msix_entry *msix = &device->msix_entries[i]; + struct ioat_chan_common *chan; + + chan = ioat_chan_by_index(device, i); + devm_free_irq(&pdev->dev, msix->vector, chan); + } + + pci_disable_msix(pdev); + break; + case IOAT_MSI: + pci_disable_msi(pdev); + /* fall through */ + case IOAT_INTX: + devm_free_irq(&pdev->dev, irq, device); + break; + default: + return 0; + } + device->irq_mode = IOAT_NOIRQ; + + return ioat_dma_setup_interrupts(device); +} + static int ioat3_reset_hw(struct ioat_chan_common *chan) { /* throw away whatever the channel was doing and get it @@ -1126,64 +1500,62 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan) chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3, and clear any - * pending errors - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); - if (err) { - dev_err(&pdev->dev, "channel error register unreachable\n"); - return err; + if (device->version < IOAT_VER_3_3) { + /* clear any pending errors */ + err = pci_read_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, + "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + pci_write_config_dword(pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + 0x10); + } } - pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); + if (!err) + err = ioat3_irq_reinit(device); - return ioat2_reset_sync(chan, msecs_to_jiffies(200)); -} + if (err) + dev_err(&pdev->dev, "Failed to reset: %d\n", err); -static bool is_jf_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_JSF0: - case PCI_DEVICE_ID_INTEL_IOAT_JSF1: - case PCI_DEVICE_ID_INTEL_IOAT_JSF2: - case PCI_DEVICE_ID_INTEL_IOAT_JSF3: - case PCI_DEVICE_ID_INTEL_IOAT_JSF4: - case PCI_DEVICE_ID_INTEL_IOAT_JSF5: - case PCI_DEVICE_ID_INTEL_IOAT_JSF6: - case PCI_DEVICE_ID_INTEL_IOAT_JSF7: - case PCI_DEVICE_ID_INTEL_IOAT_JSF8: - case PCI_DEVICE_ID_INTEL_IOAT_JSF9: - return true; - default: - return false; - } + return err; } -static bool is_snb_ioat(struct pci_dev *pdev) +static void ioat3_intr_quirk(struct ioatdma_device *device) { - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_SNB0: - case PCI_DEVICE_ID_INTEL_IOAT_SNB1: - case PCI_DEVICE_ID_INTEL_IOAT_SNB2: - case PCI_DEVICE_ID_INTEL_IOAT_SNB3: - case PCI_DEVICE_ID_INTEL_IOAT_SNB4: - case PCI_DEVICE_ID_INTEL_IOAT_SNB5: - case PCI_DEVICE_ID_INTEL_IOAT_SNB6: - case PCI_DEVICE_ID_INTEL_IOAT_SNB7: - case PCI_DEVICE_ID_INTEL_IOAT_SNB8: - case PCI_DEVICE_ID_INTEL_IOAT_SNB9: - return true; - default: - return false; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + u32 errmask; + + dma = &device->common; + + /* + * if we have descriptor write back error status, we mask the + * error interrupts + */ + if (device->cap & IOAT_CAP_DWBES) { + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + errmask = readl(chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | + IOAT_CHANERR_XOR_Q_ERR; + writel(errmask, chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + } } } @@ -1196,33 +1568,32 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) struct ioat_chan_common *chan; bool is_raid_device = false; int err; - u32 cap; device->enumerate_channels = ioat2_enumerate_channels; device->reset_hw = ioat3_reset_hw; device->self_test = ioat3_dma_self_test; + device->intr_quirk = ioat3_intr_quirk; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; dma->device_free_chan_resources = ioat2_free_chan_resources; - if (is_jf_ioat(pdev) || is_snb_ioat(pdev)) - dma->copy_align = 6; - dma_cap_set(DMA_INTERRUPT, dma->cap_mask); dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; - cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + + if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) + device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); /* dca is incompatible with raid operations */ - if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) - cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); - if (cap & IOAT_CAP_XOR) { + if (device->cap & IOAT_CAP_XOR) { is_raid_device = true; dma->max_xor = 8; - dma->xor_align = 6; dma_cap_set(DMA_XOR, dma->cap_mask); dma->device_prep_dma_xor = ioat3_prep_xor; @@ -1230,53 +1601,56 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma_cap_set(DMA_XOR_VAL, dma->cap_mask); dma->device_prep_dma_xor_val = ioat3_prep_xor_val; } - if (cap & IOAT_CAP_PQ) { + + if (device->cap & IOAT_CAP_PQ) { is_raid_device = true; - dma_set_maxpq(dma, 8, 0); - dma->pq_align = 6; - dma_cap_set(DMA_PQ, dma->cap_mask); dma->device_prep_dma_pq = ioat3_prep_pq; - - dma_cap_set(DMA_PQ_VAL, dma->cap_mask); dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + dma_cap_set(DMA_PQ, dma->cap_mask); + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); - if (!(cap & IOAT_CAP_XOR)) { - dma->max_xor = 8; - dma->xor_align = 6; + if (device->cap & IOAT_CAP_RAID16SS) { + dma_set_maxpq(dma, 16, 0); + } else { + dma_set_maxpq(dma, 8, 0); + } - dma_cap_set(DMA_XOR, dma->cap_mask); + if (!(device->cap & IOAT_CAP_XOR)) { dma->device_prep_dma_xor = ioat3_prep_pqxor; - - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; + dma_cap_set(DMA_XOR, dma->cap_mask); + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + + if (device->cap & IOAT_CAP_RAID16SS) { + dma->max_xor = 16; + } else { + dma->max_xor = 8; + } } } - if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { - dma_cap_set(DMA_MEMSET, dma->cap_mask); - dma->device_prep_dma_memset = ioat3_prep_memset_lock; - } + dma->device_tx_status = ioat3_tx_status; + device->cleanup_fn = ioat3_cleanup_event; + device->timer_fn = ioat3_timer_event; - if (is_raid_device) { - dma->device_tx_status = ioat3_tx_status; - device->cleanup_fn = ioat3_cleanup_event; - device->timer_fn = ioat3_timer_event; - } else { - dma->device_tx_status = ioat_dma_tx_status; - device->cleanup_fn = ioat2_cleanup_event; - device->timer_fn = ioat2_timer_event; - } + /* starting with CB3.3 super extended descriptors are supported */ + if (device->cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; - #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA - dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); - dma->device_prep_dma_pq_val = NULL; - #endif + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); - #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA - dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); - dma->device_prep_dma_xor_val = NULL; - #endif + /* allocate SED DMA pool */ + device->sed_hw_pool[i] = dmam_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!device->sed_hw_pool[i]) + return -ENOMEM; + + } + } err = ioat_probe(device); if (err) diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index d2ff3fda0b1..62f83e983d8 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -30,10 +30,39 @@ #define IOAT_PCI_DID_SCNB 0x65FF #define IOAT_PCI_DID_SNB 0x402F +#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e +#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f + +#define PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27 +#define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e +#define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f + +#define PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52 +#define PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53 + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ #define IOAT_VER_3_2 0x32 /* Version 3.2 */ +#define IOAT_VER_3_3 0x33 /* Version 3.3 */ + int system_has_dca_enabled(struct pci_dev *pdev); @@ -71,33 +100,6 @@ struct ioat_dma_descriptor { uint64_t user2; }; -struct ioat_fill_descriptor { - uint32_t size; - union { - uint32_t ctl; - struct { - unsigned int int_en:1; - unsigned int rsvd:1; - unsigned int dest_snoop_dis:1; - unsigned int compl_write:1; - unsigned int fence:1; - unsigned int rsvd2:2; - unsigned int dest_brk:1; - unsigned int bundle:1; - unsigned int rsvd4:15; - #define IOAT_OP_FILL 0x01 - unsigned int op:8; - } ctl_f; - }; - uint64_t src_data; - uint64_t dst_addr; - uint64_t next; - uint64_t rsv1; - uint64_t next_dst_addr; - uint64_t user1; - uint64_t user2; -}; - struct ioat_xor_descriptor { uint32_t size; union { @@ -136,7 +138,17 @@ struct ioat_xor_ext_descriptor { }; struct ioat_pq_descriptor { - uint32_t size; + union { + uint32_t size; + uint32_t dwbes; + struct { + unsigned int rsvd:25; + unsigned int p_val_err:1; + unsigned int q_val_err:1; + unsigned int rsvd1:4; + unsigned int wbes:1; + } dwbes_f; + }; union { uint32_t ctl; struct { @@ -151,9 +163,14 @@ struct ioat_pq_descriptor { unsigned int hint:1; unsigned int p_disable:1; unsigned int q_disable:1; - unsigned int rsvd:11; + unsigned int rsvd2:2; + unsigned int wb_en:1; + unsigned int prl_en:1; + unsigned int rsvd3:7; #define IOAT_OP_PQ 0x89 #define IOAT_OP_PQ_VAL 0x8a + #define IOAT_OP_PQ_16S 0xa0 + #define IOAT_OP_PQ_VAL_16S 0xa1 unsigned int op:8; } ctl_f; }; @@ -161,7 +178,10 @@ struct ioat_pq_descriptor { uint64_t p_addr; uint64_t next; uint64_t src_addr2; - uint64_t src_addr3; + union { + uint64_t src_addr3; + uint64_t sed_addr; + }; uint8_t coef[8]; uint64_t q_addr; }; @@ -210,4 +230,40 @@ struct ioat_pq_update_descriptor { struct ioat_raw_descriptor { uint64_t field[8]; }; + +struct ioat_pq16a_descriptor { + uint8_t coef[8]; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t src_addr9; +}; + +struct ioat_pq16b_descriptor { + uint64_t src_addr10; + uint64_t src_addr11; + uint64_t src_addr12; + uint64_t src_addr13; + uint64_t src_addr14; + uint64_t src_addr15; + uint64_t src_addr16; + uint64_t rsvd; +}; + +union ioat_sed_pq_descriptor { + struct ioat_pq16a_descriptor a; + struct ioat_pq16b_descriptor b; +}; + +#define SED_SIZE 64 + +struct ioat_sed_raw_descriptor { + uint64_t a[8]; + uint64_t b[8]; + uint64_t c[8]; +}; + #endif diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 4f686c527ab..1d051cd045d 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -40,17 +40,6 @@ MODULE_VERSION(IOAT_DMA_VERSION); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); -#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27 -#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e -#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f - static struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v1 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, @@ -105,6 +94,23 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + + /* I/OAT v3.3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); @@ -117,6 +123,7 @@ module_param(ioat_dca_enabled, int, 0644); MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); struct kmem_cache *ioat2_cache; +struct kmem_cache *ioat3_sed_cache; #define DRV_NAME "ioatdma" @@ -212,7 +219,7 @@ static void ioat_remove(struct pci_dev *pdev) static int __init ioat_init_module(void) { - int err; + int err = -ENOMEM; pr_info("%s: Intel(R) QuickData Technology Driver %s\n", DRV_NAME, IOAT_DMA_VERSION); @@ -222,9 +229,21 @@ static int __init ioat_init_module(void) if (!ioat2_cache) return -ENOMEM; + ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); + if (!ioat3_sed_cache) + goto err_ioat2_cache; + err = pci_register_driver(&ioat_pci_driver); if (err) - kmem_cache_destroy(ioat2_cache); + goto err_ioat3_cache; + + return 0; + + err_ioat3_cache: + kmem_cache_destroy(ioat3_sed_cache); + + err_ioat2_cache: + kmem_cache_destroy(ioat2_cache); return err; } diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 1391798542b..2f1cfa0f1f4 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -79,6 +79,8 @@ #define IOAT_CAP_APIC 0x00000080 #define IOAT_CAP_XOR 0x00000100 #define IOAT_CAP_PQ 0x00000200 +#define IOAT_CAP_DWBES 0x00002000 +#define IOAT_CAP_RAID16SS 0x00020000 #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ @@ -93,6 +95,8 @@ #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 #define IOAT_CHANCTRL_INT_REARM 0x0001 #define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ERR_INT_EN |\ + IOAT_CHANCTRL_ERR_COMPLETION_EN |\ IOAT_CHANCTRL_ANY_ERR_ABORT_EN) #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ |
