diff options
Diffstat (limited to 'drivers/infiniband/hw/qib')
31 files changed, 2582 insertions, 718 deletions
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 8349f9c5064..495be09781b 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,7 +1,15 @@ config INFINIBAND_QIB - tristate "QLogic PCIe HCA support" + tristate "Intel PCIe HCA support" depends on 64BIT ---help--- - This is a low-level driver for QLogic PCIe QLE InfiniBand host - channel adapters. This driver does not support the QLogic + This is a low-level driver for Intel PCIe QLE InfiniBand host + channel adapters. This driver does not support the Intel HyperTransport card (model QHT7140). + +config INFINIBAND_QIB_DCA + bool "QIB DCA support" + depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m) + default y + ---help--- + Setting this enables DCA support on some Intel chip sets + with the iba7322 HCA. diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index f12d7bb8b39..57f8103e51f 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o +ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 4d11575c201..c00ae093b6f 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,7 +1,7 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -51,6 +51,7 @@ #include <linux/completion.h> #include <linux/kref.h> #include <linux/sched.h> +#include <linux/kthread.h> #include "qib_common.h" #include "qib_verbs.h" @@ -88,7 +89,6 @@ struct qlogic_ib_stats { extern struct qlogic_ib_stats qib_stats; extern const struct pci_error_handlers qib_pci_err_handler; -extern struct pci_driver qib_driver; #define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ /* @@ -114,6 +114,11 @@ struct qib_eep_log_mask { /* * Below contains all data related to a single context (formerly called port). */ + +#ifdef CONFIG_DEBUG_FS +struct qib_opcode_stats_perctx; +#endif + struct qib_ctxtdata { void **rcvegrbuf; dma_addr_t *rcvegrbuf_phys; @@ -154,6 +159,8 @@ struct qib_ctxtdata { */ /* instead of calculating it */ unsigned ctxt; + /* local node of context */ + int node_id; /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ @@ -222,12 +229,15 @@ struct qib_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* lookaside fields */ struct qib_qp *lookaside_qp; u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; +#ifdef CONFIG_DEBUG_FS + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +#endif }; struct qib_sge_state; @@ -428,9 +438,19 @@ struct qib_verbs_txreq { #define ACTIVITY_TIMER 5 #define MAX_NAME_SIZE 64 + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify; +#endif + struct qib_msix_entry { struct msix_entry msix; void *arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca; + int rcv; + struct qib_irq_notify *notifier; +#endif char name[MAX_NAME_SIZE]; cpumask_var_t mask; }; @@ -555,11 +575,13 @@ struct qib_pportdata { /* read/write using lock */ spinlock_t sdma_lock ____cacheline_aligned_in_smp; struct list_head sdma_activelist; + struct list_head sdma_userpending; u64 sdma_descq_added; u64 sdma_descq_removed; u16 sdma_descq_tail; u16 sdma_descq_head; u8 sdma_generation; + u8 sdma_intrequest; struct tasklet_struct sdma_sw_clean_up_task ____cacheline_aligned_in_smp; @@ -828,6 +850,9 @@ struct qib_devdata { struct qib_ctxtdata *); void (*f_writescratch)(struct qib_devdata *, u32); int (*f_tempsense_rd)(struct qib_devdata *, int regnum); +#ifdef CONFIG_INFINIBAND_QIB_DCA + int (*f_notify_dca)(struct qib_devdata *, unsigned long event); +#endif char *boardname; /* human readable board info */ @@ -843,8 +868,10 @@ struct qib_devdata { /* last buffer for user use */ u32 lastctxt_piobuf; - /* saturating counter of (non-port-specific) device interrupts */ - u32 int_counter; + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; /* pio bufs allocated per ctxt */ u32 pbufsctxt; @@ -1075,6 +1102,10 @@ struct qib_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors defered to tasklet */ struct tasklet_struct error_tasklet; + /* per device cq worker */ + struct kthread_worker *worker; + + int assigned_node_id; /* NUMA node closest to HCA */ }; /* hol_state values */ @@ -1154,8 +1185,8 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); -void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); @@ -1298,6 +1329,8 @@ int qib_setup_sdma(struct qib_pportdata *); void qib_teardown_sdma(struct qib_pportdata *); void __qib_sdma_intr(struct qib_pportdata *); void qib_sdma_intr(struct qib_pportdata *); +void qib_user_sdma_send_desc(struct qib_pportdata *dd, + struct list_head *pktlist); int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, u32, struct qib_verbs_txreq *); /* ppd->sdma_lock should be locked before calling this. */ @@ -1320,7 +1353,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd) return ppd->sdma_state.current_state == qib_sdma_state_s99_running; } int qib_sdma_running(struct qib_pportdata *); - +void dump_sdma_state(struct qib_pportdata *ppd); void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); @@ -1418,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw @@ -1445,6 +1482,7 @@ extern unsigned qib_n_krcv_queues; extern unsigned qib_sdma_fetch_arb; extern unsigned qib_compat_ddr_negotiate; extern int qib_special_trigger; +extern unsigned qib_numa_aware; extern struct mutex qib_mutex; @@ -1474,27 +1512,23 @@ extern struct mutex qib_mutex; * first to avoid possible serial port delays from printk. */ #define qib_early_err(dev, fmt, ...) \ - do { \ - dev_err(dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_err(dev, fmt, ##__VA_ARGS__) #define qib_dev_err(dd, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ - qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) #define qib_dev_porterr(dd, port, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) #define qib_devinfo(pcidev, fmt, ...) \ - do { \ - dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index d39e0183ff8..5670ace27c6 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,7 +279,7 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 11 +#define QIB_USER_SWMINOR 13 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) @@ -701,7 +701,37 @@ struct qib_message_header { __be32 bth[3]; /* fields below this point are in host byte order */ struct qib_header iph; + /* fields below are simplified, but should match PSM */ + /* some are accessed by driver when packet spliting is needed */ __u8 sub_opcode; + __u8 flags; + __u16 commidx; + __u32 ack_seq_num; + __u8 flowid; + __u8 hdr_dlen; + __u16 mqhdr; + __u32 uwords[4]; +}; + +/* sequence number bits for message */ +union qib_seqnum { + struct { + __u32 seq:11; + __u32 gen:8; + __u32 flow:5; + }; + struct { + __u32 pkt:16; + __u32 msg:8; + }; + __u32 val; +}; + +/* qib receiving-dma tid-session-member */ +struct qib_tid_session_member { + __u16 tid; + __u16 offset; + __u16 length; }; /* IB - LRH header consts */ diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index 5246aa486bb..ab4e11cfab1 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -34,8 +35,10 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/kthread.h> #include "qib_verbs.h" +#include "qib.h" /** * qib_cq_enter - add a new entry to the completion queue @@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - cq->notify = IB_CQ_NONE; - cq->triggered++; + struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - queue_work(qib_cq_wq, &cq->comptask); + smp_rmb(); + worker = cq->dd->worker; + if (likely(worker)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } } spin_unlock_irqrestore(&cq->lock, flags); @@ -163,7 +171,7 @@ bail: return npolled; } -static void send_complete(struct work_struct *work) +static void send_complete(struct kthread_work *work) { struct qib_cq *cq = container_of(work, struct qib_cq, comptask); @@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, * The number of entries should be >= the number requested or return * an error. */ + cq->dd = dd_from_dev(dev); cq->ibcq.cqe = entries; cq->notify = IB_CQ_NONE; cq->triggered = 0; spin_lock_init(&cq->lock); - INIT_WORK(&cq->comptask, send_complete); + init_kthread_work(&cq->comptask, send_complete); wc->head = 0; wc->tail = 0; cq->queue = wc; @@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq) struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_cq *cq = to_icq(ibcq); - flush_work(&cq->comptask); + flush_kthread_work(&cq->comptask); spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); @@ -483,3 +492,49 @@ bail_free: bail: return ret; } + +int qib_cq_init(struct qib_devdata *dd) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (dd->worker) + return 0; + dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); + if (!dd->worker) + return -ENOMEM; + init_kthread_worker(dd->worker); + task = kthread_create_on_node( + kthread_worker_fn, + dd->worker, + dd->assigned_node_id, + "qib_cq%d", dd->unit); + if (IS_ERR(task)) + goto task_fail; + cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); + kthread_bind(task, cpu); + wake_up_process(task); +out: + return ret; +task_fail: + ret = PTR_ERR(task); + kfree(dd->worker); + dd->worker = NULL; + goto out; +} + +void qib_cq_exit(struct qib_devdata *dd) +{ + struct kthread_worker *worker; + + worker = dd->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + dd->worker = NULL; + smp_wmb(); + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); +} diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c new file mode 100644 index 00000000000..799a0c3bffc --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -0,0 +1,283 @@ +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/export.h> + +#include "qib.h" +#include "qib_verbs.h" +#include "qib_debugfs.h" + +static struct dentry *qib_dbg_root; + +#define DEBUGFS_FILE(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ +}; \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ +} \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = seq_release \ +}; + +#define DEBUGFS_FILE_CREATE(name) \ +do { \ + struct dentry *ent; \ + ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ + ibd, &_##name##_file_ops); \ + if (!ent) \ + pr_warn("create of " #name " failed\n"); \ +} while (0) + +static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + + +static void _opcode_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu\n", i, + (unsigned long long) n_packets, + (unsigned long long) n_bytes); + + return 0; +} + +DEBUGFS_FILE(opcode_stats) + +static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (!*pos) + return SEQ_START_TOKEN; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) + return pos; + + ++*pos; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void _ctx_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _ctx_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos; + loff_t i, j; + u64 n_packets = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) { + seq_puts(s, "Ctx:npkts\n"); + return 0; + } + + spos = v; + i = *spos; + + if (!dd->rcd[i]) + return SEQ_SKIP; + + for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) + n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + + if (!n_packets) + return SEQ_SKIP; + + seq_printf(s, " %llu:%llu\n", i, n_packets); + return 0; +} + +DEBUGFS_FILE(ctx_stats) + +static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_qp_iter *iter; + loff_t n = *pos; + + iter = qib_qp_iter_init(s->private); + if (!iter) + return NULL; + + while (n--) { + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, + loff_t *pos) +{ + struct qib_qp_iter *iter = iter_ptr; + + (*pos)++; + + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) +{ + /* nothing for now */ +} + +static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) +{ + struct qib_qp_iter *iter = iter_ptr; + + if (!iter) + return 0; + + qib_qp_iter_print(s, iter); + + return 0; +} + +DEBUGFS_FILE(qp_stats) + +void qib_dbg_ibdev_init(struct qib_ibdev *ibd) +{ + char name[10]; + + snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); + ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); + if (!ibd->qib_ibdev_dbg) { + pr_warn("create of %s failed\n", name); + return; + } + DEBUGFS_FILE_CREATE(opcode_stats); + DEBUGFS_FILE_CREATE(ctx_stats); + DEBUGFS_FILE_CREATE(qp_stats); + return; +} + +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) +{ + if (!qib_dbg_root) + goto out; + debugfs_remove_recursive(ibd->qib_ibdev_dbg); +out: + ibd->qib_ibdev_dbg = NULL; +} + +void qib_dbg_init(void) +{ + qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); + if (!qib_dbg_root) + pr_warn("init of debugfs failed\n"); +} + +void qib_dbg_exit(void) +{ + debugfs_remove_recursive(qib_dbg_root); + qib_dbg_root = NULL; +} + +#endif + diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h new file mode 100644 index 00000000000..7ae983a91b8 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.h @@ -0,0 +1,45 @@ +#ifndef _QIB_DEBUGFS_H +#define _QIB_DEBUGFS_H + +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +struct qib_ibdev; +void qib_dbg_ibdev_init(struct qib_ibdev *ibd); +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd); +void qib_dbg_init(void); +void qib_dbg_exit(void); + +#endif + +#endif /* _QIB_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 1686fd4bda8..5dfda4c5cc9 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct qib_diag_xpkt dp; u32 *tmpbuf = NULL; struct qib_devdata *dd; @@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp, } ppd = &dd->pport[dp.port - 1]; - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - clen = dp.len >> 2; - - if ((plen + 4) > ppd->ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { qib_devinfo(dd->pcidev, @@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp, */ if (dd->flags & QIB_PIO_FLUSH_WC) { qib_flush_wc(); - qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); qib_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - qib_pio_copy(piobuf + 2, tmpbuf, clen); + qib_pio_copy(piobuf + 2, tmpbuf, plen); if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; @@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd, const struct diag_observer *op) { struct diag_observer_list_elt *olp; - int ret = -EINVAL; + unsigned long flags; if (!dd || !op) - goto bail; - ret = -ENOMEM; + return -EINVAL; olp = vmalloc(sizeof *olp); if (!olp) { pr_err("vmalloc for observer failed\n"); - goto bail; + return -ENOMEM; } - if (olp) { - unsigned long flags; - spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); - olp->op = op; - olp->next = dd->diag_observer_list; - dd->diag_observer_list = olp; - spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); - ret = 0; - } -bail: - return ret; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; } /* Remove all registered observers when device is closed */ diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c index 2920bb39a65..59fe092b4b0 100644 --- a/drivers/infiniband/hw/qib/qib_dma.c +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int qib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = { .unmap_page = qib_dma_unmap_page, .map_sg = qib_map_sg, .unmap_sg = qib_unmap_sg, - .dma_address = qib_sg_dma_address, - .dma_len = qib_sg_dma_len, .sync_single_for_cpu = qib_sync_single_for_cpu, .sync_single_for_device = qib_sync_single_for_device, .alloc_coherent = qib_dma_alloc_coherent, diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 5423edcab51..5bee08f16d7 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -63,8 +64,8 @@ MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("QLogic <support@qlogic.com>"); -MODULE_DESCRIPTION("QLogic IB driver"); +MODULE_AUTHOR("Intel <ibsupport@intel.com>"); +MODULE_DESCRIPTION("Intel IB driver"); MODULE_VERSION(QIB_DRIVER_VERSION); /* @@ -557,7 +558,6 @@ move_along: } rcd->head = l; - rcd->pkt_count += i; /* * Iterate over all QPs waiting to respond. diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 959a5c4ff81..b15e34eeef6 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -39,7 +39,7 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/uio.h> +#include <linux/aio.h> #include <linux/jiffies.h> #include <asm/pgtable.h> #include <linux/delay.h> @@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) return pollflag; } +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + /* * Check that userland and driver are compatible for subcontexts. */ @@ -1177,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor) return user_swminor == 3; default: /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; + return user_swminor <= QIB_USER_SWMINOR; } } /* make no promises yet for future major versions */ @@ -1259,12 +1302,20 @@ bail: static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct file *fp, const struct qib_user_info *uinfo) { + struct qib_filedata *fd = fp->private_data; struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; + int numa_id; + + assign_ctxt_affinity(fp, dd); + + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; - rcd = qib_create_ctxtdata(ppd, ctxt); + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); /* * Allocate memory for use in qib_tid_update() at open to @@ -1296,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, goto bail; bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + dd->rcd[ctxt] = NULL; kfree(rcd); kfree(ptmp); @@ -1405,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, cused++; else cfree++; - if (pusable && cfree && cused < inuse) { + if (cfree && cused < inuse) { udd = dd; inuse = cused; } @@ -1485,6 +1539,58 @@ static int qib_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) { + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + } + + return 0; +} + /* * Get ctxt early, so can set affinity prior to memory allocation. */ @@ -1517,61 +1623,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (qib_compatible_subctxts(swmajor, swminor) && uinfo->spu_subctxt_cnt) { ret = find_shared_ctxt(fp, uinfo); - if (ret) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; } } - i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); - else + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } ret = get_a_ctxt(fp, uinfo, alg); - -done_chk_sdma: - if (!ret) { - struct qib_filedata *fd = fp->private_data; - const struct qib_ctxtdata *rcd = fd->rcd; - const struct qib_devdata *dd = rcd->dd; - unsigned int weight; - - if (dd->flags & QIB_HAS_SEND_DMA) { - fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, - dd->unit, - rcd->ctxt, - fd->subctxt); - if (!fd->pq) - ret = -ENOMEM; - } - - /* - * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendezvous for all - * users of the driver. If they don't actually later - * set affinity to this cpu, or set it to some other cpu, - * it just means that sooner or later we don't recommend - * a cpu, and let the scheduler do it's best. - */ - weight = cpumask_weight(tsk_cpus_allowed(current)); - if (!ret && weight >= qib_cpulist_count) { - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); - if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } - } else if (weight == 1 && - test_bit(cpumask_first(tsk_cpus_allowed(current)), - qib_cpulist)) - qib_devinfo(dd->pcidev, - "%s PID %u affinity set to cpu %d; already allocated\n", - current->comm, current->pid, - cpumask_first(tsk_cpus_allowed(current))); } +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: mutex_unlock(&qib_mutex); done: @@ -2208,7 +2289,7 @@ int qib_cdev_init(int minor, const char *name, goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, name); + device = device_create(qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 65a2a23f6f8..cab610ccd50 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -45,7 +45,7 @@ static struct super_block *qib_super; -#define private2dd(file) ((file)->f_dentry->d_inode->i_private) +#define private2dd(file) (file_inode(file)->i_private) static int qibfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *fops, @@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode, static ssize_t driver_stats_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + qib_stats.sps_ints = qib_sps_ints(); return simple_read_from_buffer(buf, count, ppos, &qib_stats, sizeof qib_stats); } @@ -171,7 +172,7 @@ static const struct file_operations cntr_ops[] = { }; /* - * Could use file->f_dentry->d_inode->i_ino to figure out which file, + * Could use file_inode(file)->i_ino to figure out which file, * instead of separate routine for each, but for now, this works... */ @@ -456,13 +457,13 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); } else { spin_unlock(&tmp->d_lock); } + dput(tmp); ret = 0; bail: @@ -491,6 +492,7 @@ static int remove_device_files(struct super_block *sb, goto bail; } + mutex_lock(&dir->d_inode->i_mutex); remove_file(dir, "counters"); remove_file(dir, "counter_names"); remove_file(dir, "portcounter_names"); @@ -505,8 +507,10 @@ static int remove_device_files(struct super_block *sb, } } remove_file(dir, "flash"); - d_delete(dir); + mutex_unlock(&dir->d_inode->i_mutex); ret = simple_rmdir(root->d_inode, dir); + d_delete(dir); + dput(dir); bail: mutex_unlock(&root->d_inode->i_mutex); @@ -604,6 +608,7 @@ static struct file_system_type qibfs_fs_type = { .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init qib_init_qibfs(void) { diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index a099ac171e2..d68266ac761 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64); /* * This file contains all the chip-specific register information and - * access functions for the QLogic QLogic_IB PCI-Express chip. + * access functions for the Intel Intel_IB PCI-Express chip. * */ @@ -1633,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) @@ -1807,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler re-ordering around actual reset */ @@ -3265,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd) dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR; ppd->link_width_enabled = IB_WIDTH_4X; @@ -3463,6 +3465,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) return -ENXIO; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 6120 boards never disable EEPROM Write */ static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -3538,6 +3547,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_6120_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_6120_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_6120_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 64d0ecb90cd..7dec89fdc12 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; - + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) unlikely_7220_intr(dd, istat); @@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler reordering around actual reset */ @@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; @@ -4513,6 +4513,13 @@ bail: return ret; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 7220 boards never disable EEPROM Write */ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_7220_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7220_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7220_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 3f6b21e9dc1..a7eb32517a0 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -44,6 +44,9 @@ #include <linux/module.h> #include <rdma/ib_verbs.h> #include <rdma/ib_smi.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_7322_regs.h" @@ -80,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned); static void serdes_7322_los_enable(struct qib_pportdata *, int); static int serdes_7322_init_old(struct qib_pportdata *); static int serdes_7322_init_new(struct qib_pportdata *); +static void dump_sdma_7322_state(struct qib_pportdata *); #define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) @@ -519,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = { [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify { + int rcv; + void *arg; + struct irq_affinity_notify notify; +}; +#endif + struct qib_chip_specific { u64 __iomem *cregbase; u64 *cntrs; @@ -546,6 +558,12 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 stay_in_freeze; u32 recovery_ports_initted; +#ifdef CONFIG_INFINIBAND_QIB_DCA + u32 dca_ctrl; + int rhdr_cpu[18]; + int sdma_cpu[2]; + u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */ +#endif struct qib_msix_entry *msix_entries; unsigned long *sendchkenable; unsigned long *sendgrhchk; @@ -573,7 +591,7 @@ struct vendor_txdds_ent { static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ -#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ +#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */ #define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ @@ -635,6 +653,7 @@ struct qib_chippport_specific { u8 ibmalfusesnap; struct qib_qsfp_data qsfp_data; char epmsgbuf[192]; /* for port error interrupt msg buffer */ + char sdmamsgbuf[192]; /* for per-port sdma error messages */ }; static struct { @@ -642,27 +661,75 @@ static struct { irq_handler_t handler; int lsb; int port; /* 0 if not port-specific, else port # */ + int dca; } irq_table[] = { - { "", qib_7322intr, -1, 0 }, + { "", qib_7322intr, -1, 0, 0 }, { " (buf avail)", qib_7322bufavail, - SYM_LSB(IntStatus, SendBufAvail), 0 }, + SYM_LSB(IntStatus, SendBufAvail), 0, 0}, { " (sdma 0)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_0), 1 }, + SYM_LSB(IntStatus, SDmaInt_0), 1, 1 }, { " (sdma 1)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_1), 2 }, + SYM_LSB(IntStatus, SDmaInt_1), 2, 1 }, { " (sdmaI 0)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, + SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1}, { " (sdmaI 1)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, + SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1}, { " (sdmaP 0)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, + SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 }, { " (sdmaP 1)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, + SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 }, { " (sdmaC 0)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, + SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 }, { " (sdmaC 1)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, + SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0}, +}; + +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static const struct dca_reg_map { + int shadow_inx; + int lsb; + u64 mask; + u16 regno; +} dca_rcvhdr_reg_map[] = { + { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) }, }; +#endif /* ibcctrl bits */ #define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 @@ -686,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *); static void setup_7322_link_recovery(struct qib_pportdata *, u32); static void check_7322_rxe_status(struct qib_pportdata *); static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); +#ifdef CONFIG_INFINIBAND_QIB_DCA +static void qib_setup_dca(struct qib_devdata *dd); +static void setup_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +static void reset_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +#endif /** * qib_read_ureg32 - read 32-bit virtualized per-context register @@ -1522,6 +1596,8 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) struct qib_devdata *dd = ppd->dd; errs &= QIB_E_P_SDMAERRS; + err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf), + errs, qib_7322p_error_msgs); if (errs & QIB_E_P_SDMAUNEXPDATA) qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit, @@ -1529,6 +1605,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) spin_lock_irqsave(&ppd->sdma_lock, flags); + if (errs != QIB_E_P_SDMAHALT) { + /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */ + qib_dev_porterr(dd, ppd->port, + "SDMA %s 0x%016llx %s\n", + qib_sdma_state_names[ppd->sdma_state.current_state], + errs, ppd->cpspec->sdmamsgbuf); + dump_sdma_7322_state(ppd); + } + switch (ppd->sdma_state.current_state) { case qib_sdma_state_s00_hw_down: break; @@ -2084,6 +2169,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); + if (hwerrs & + (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) | + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) { + int pidx = 0; + int err; + unsigned long flags; + struct qib_pportdata *ppd = dd->pport; + for (; pidx < dd->num_pports; ++pidx, ppd++) { + err = 0; + if (pidx == 0 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_0))) + err++; + if (pidx == 1 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) + err++; + if (err) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + dump_sdma_7322_state(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + if (isfatal && !dd->diag_client) { qib_dev_err(dd, "Fatal Hardware Error, no longer usable, SN %.16s\n", @@ -2287,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); qib_write_kreg(dd, kr_scratch, 0ULL); + /* ensure previous Tx parameters are not still forced */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + if (qib_compat_ddr_negotiate) { ppd->cpspec->ibdeltainprog = 1; ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, @@ -2558,6 +2671,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + switch (event) { + case DCA_PROVIDER_ADD: + if (dd->flags & QIB_DCA_ENABLED) + break; + if (!dca_add_requester(&dd->pcidev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } + break; + case DCA_PROVIDER_REMOVE: + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), + dd->cspec->dca_ctrl); + } + break; + } + return 0; +} + +static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_chip_specific *cspec = dd->cspec; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->rhdr_cpu[rcd->ctxt] != cpu) { + const struct dca_reg_map *rmp; + + cspec->rhdr_cpu[rcd->ctxt] = cpu; + rmp = &dca_rcvhdr_reg_map[rcd->ctxt]; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb; + qib_devinfo(dd->pcidev, + "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu, + (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + qib_write_kreg(dd, rmp->regno, + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_chip_specific *cspec = dd->cspec; + unsigned pidx = ppd->port - 1; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->sdma_cpu[pidx] != cpu) { + cspec->sdma_cpu[pidx] = cpu; + cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ? + SYM_MASK(DCACtrlF, SendDma1DCAOPH) : + SYM_MASK(DCACtrlF, SendDma0DCAOPH)); + cspec->dca_rcvhdr_ctrl[4] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << + (ppd->hw_pidx ? + SYM_LSB(DCACtrlF, SendDma1DCAOPH) : + SYM_LSB(DCACtrlF, SendDma0DCAOPH)); + qib_devinfo(dd->pcidev, + "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu, + (long long) cspec->dca_rcvhdr_ctrl[4]); + qib_write_kreg(dd, KREG_IDX(DCACtrlF), + cspec->dca_rcvhdr_ctrl[4]); + cspec->dca_ctrl |= ppd->hw_pidx ? + SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) : + SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_setup_dca(struct qib_devdata *dd) +{ + struct qib_chip_specific *cspec = dd->cspec; + int i; + + for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++) + cspec->rhdr_cpu[i] = -1; + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + cspec->sdma_cpu[i] = -1; + cspec->dca_rcvhdr_ctrl[0] = + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[1] = + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[2] = + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[3] = + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[4] = + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt)); + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, + cspec->dca_rcvhdr_ctrl[i]); + for (i = 0; i < cspec->num_msix_entries; i++) + setup_dca_notifier(dd, &cspec->msix_entries[i]); +} + +static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct qib_irq_notify *n = + container_of(notify, struct qib_irq_notify, notify); + int cpu = cpumask_first(mask); + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + qib_update_rhdrq_dca(rcd, cpu); + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + qib_update_sdma_dca(ppd, cpu); + } +} + +static void qib_irq_notifier_release(struct kref *ref) +{ + struct qib_irq_notify *n = + container_of(ref, struct qib_irq_notify, notify.kref); + struct qib_devdata *dd; + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + dd = rcd->dd; + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + dd = ppd->dd; + } + qib_devinfo(dd->pcidev, + "release on HCA notify 0x%p n 0x%p\n", ref, n); + kfree(n); +} +#endif + /* * Disable MSIx interrupt if enabled, call generic MSIx code * to cleanup, and clear pending MSIx interrupts. @@ -2575,6 +2844,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd) dd->cspec->num_msix_entries = 0; for (i = 0; i < n; i++) { +#ifdef CONFIG_INFINIBAND_QIB_DCA + reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); +#endif irq_set_affinity_hint( dd->cspec->msix_entries[i].msix.vector, NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); @@ -2602,6 +2874,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) { int i; +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl); + } +#endif + qib_7322_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->sendchkenable); @@ -2834,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* handle "errors" of various kinds first, device ahead of port */ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | @@ -2905,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | @@ -2934,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); @@ -2967,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -2996,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3025,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3055,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3068,6 +3335,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + if (!m->dca) + return; + qib_devinfo(dd->pcidev, + "Disabling notifier on HCA %d irq %d\n", + dd->unit, + m->msix.vector); + irq_set_affinity_notifier( + m->msix.vector, + NULL); + m->notifier = NULL; +} + +static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + struct qib_irq_notify *n; + + if (!m->dca) + return; + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (n) { + int ret; + + m->notifier = n; + n->notify.irq = m->msix.vector; + n->notify.notify = qib_irq_notifier_notify; + n->notify.release = qib_irq_notifier_release; + n->arg = m->arg; + n->rcv = m->rcv; + qib_devinfo(dd->pcidev, + "set notifier irq %d rcv %d notify %p\n", + n->notify.irq, n->rcv, &n->notify); + ret = irq_set_affinity_notifier( + n->notify.irq, + &n->notify); + if (ret) { + m->notifier = NULL; + kfree(n); + } + } +} + +#endif + /* * Set up our chip-specific interrupt handler. * The interrupt type has already been setup, so @@ -3149,6 +3463,9 @@ try_intx: void *arg; u64 val; int lsb, reg, sh; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca = 0; +#endif dd->cspec->msix_entries[msixnum]. name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] @@ -3161,6 +3478,9 @@ try_intx: arg = dd->pport + irq_table[i].port - 1; } else arg = dd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = irq_table[i].dca; +#endif lsb = irq_table[i].lsb; handler = irq_table[i].handler; snprintf(dd->cspec->msix_entries[msixnum].name, @@ -3178,6 +3498,9 @@ try_intx: continue; if (qib_krcvq01_no_msi && ctxt < 2) continue; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = 1; +#endif lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; snprintf(dd->cspec->msix_entries[msixnum].name, @@ -3203,6 +3526,11 @@ try_intx: goto try_intx; } dd->cspec->msix_entries[msixnum].arg = arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->cspec->msix_entries[msixnum].dca = dca; + dd->cspec->msix_entries[msixnum].rcv = + handler == qib_7322pintr; +#endif if (lsb >= 0) { reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * @@ -3381,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) dd->pport->cpspec->ibsymdelta = 0; dd->pport->cpspec->iblnkerrdelta = 0; dd->pport->cpspec->ibmalfdelta = 0; - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); /* * Keep chip from being accessed until we are ready. Use @@ -5853,21 +6182,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp) { struct qib_devdata *dd; unsigned long val; - int ret; - + char *n; if (strlen(str) >= MAX_ATTEN_LEN) { pr_info("txselect_values string too long\n"); return -ENOSPC; } - ret = kstrtoul(str, 0, &val); - if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + val = simple_strtoul(str, &n, 0); + if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { pr_info("txselect_values must start with a number < %d\n", TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); - return ret ? ret : -EINVAL; + return -EINVAL; } - strcpy(txselect_list, str); + list_for_each_entry(dd, &qib_dev_list, list) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) set_no_qsfp_atten(dd, 1); @@ -6216,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) } dd->num_pports++; - qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_width_enabled = IB_WIDTH_4X; @@ -6452,6 +6784,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); } +/* + * sdma_lock should be acquired before calling this routine + */ +static void dump_sdma_7322_state(struct qib_pportdata *ppd) +{ + u64 reg, reg1, reg2; + + reg = qib_read_kreg_port(ppd, krp_senddmastatus); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmastatus: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_sendctrl); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sendctrl: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabase); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabase: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabufmask0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + /* get bufuse bits, clear them, and print them again if non-zero */ + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + reg = qib_read_kreg_port(ppd, krp_senddmatail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmatail: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmahead); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmahead: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaheadaddr: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmalengen); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmalengen: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmadesccnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmadesccnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaidlecnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmapriorityhld: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmareloadcnt: 0x%016llx\n", reg); + + dump_sdma_state(ppd); +} + static struct sdma_set_state_action sdma_7322_action_table[] = { [qib_sdma_state_s00_hw_down] = { .go_s99_running_tofalse = 1, @@ -6885,6 +7297,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->f_sdma_init_early = qib_7322_sdma_init_early; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7322_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7322_notify_dca; +#endif /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. @@ -6921,7 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, actual_cnt -= dd->num_pports; tabsize = actual_cnt; - dd->cspec->msix_entries = kmalloc(tabsize * + dd->cspec->msix_entries = kzalloc(tabsize * sizeof(struct qib_msix_entry), GFP_KERNEL); if (!dd->cspec->msix_entries) { qib_dev_err(dd, "No memory for MSIx table\n"); @@ -6941,7 +7356,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, /* clear diagctrl register, in case diags were running and crashed */ qib_write_kreg(dd, kr_hwdiagctrl, 0); - +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (!dca_add_requester(&pdev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } +#endif goto bail; bail_cleanup: @@ -7156,15 +7577,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { @@ -7173,15 +7599,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { @@ -7190,15 +7621,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 1, 12, 10 }, /* QME7342 backplane setting */ - { 0, 1, 12, 11 }, /* QME7342 backplane setting */ - { 0, 1, 12, 12 }, /* QME7342 backplane setting */ - { 0, 1, 12, 14 }, /* QME7342 backplane setting */ - { 0, 1, 12, 6 }, /* QME7342 backplane setting */ - { 0, 1, 12, 7 }, /* QME7342 backplane setting */ - { 0, 1, 12, 8 }, /* QME7342 backplane setting */ { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 4443adfcd9e..8d3c78ddc90 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -39,10 +39,17 @@ #include <linux/idr.h> #include <linux/module.h> #include <linux/printk.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_common.h" #include "qib_mad.h" +#ifdef CONFIG_DEBUG_FS +#include "qib_debugfs.h" +#include "qib_verbs.h" +#endif #undef pr_fmt #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt @@ -64,6 +71,11 @@ ushort qib_cfgctxts; module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); +unsigned qib_numa_aware; +module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); +MODULE_PARM_DESC(numa_aware, + "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); + /* * If set, do not write to any regs if avoidable, hack to allow * check for deranged default register values. @@ -89,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); -struct workqueue_struct *qib_cq_wq; - static void verify_interrupt(unsigned long); static struct idr qib_unit_table; @@ -120,7 +130,11 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; - int ret; + int local_node_id = pcibus_to_node(dd->pcidev->bus); + + if (local_node_id < 0) + local_node_id = numa_node_id(); + dd->assigned_node_id = local_node_id; /* * Allocate full ctxtcnt array, rather than just cfgctxts, because @@ -130,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!dd->rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata array, failing\n"); - ret = -ENOMEM; - goto done; + return -ENOMEM; } /* create (one or more) kctxt */ @@ -143,38 +156,51 @@ int qib_create_ctxts(struct qib_devdata *dd) continue; ppd = dd->pport + (i % dd->num_pports); - rcd = qib_create_ctxtdata(ppd, i); + + rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); if (!rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); - ret = -ENOMEM; - goto done; + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; } rcd->pkeys[0] = QIB_DEFAULT_P_KEY; rcd->seq_cnt = 1; } - ret = 0; -done: - return ret; + return 0; } /* * Common code for user and kernel context setup. */ -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, + int node_id) { struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); if (rcd) { INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->node_id = node_id; rcd->ppd = ppd; rcd->dd = dd; rcd->cnt = 1; rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; - +#ifdef CONFIG_DEBUG_FS + if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, node_id); + if (!rcd->opstats) { + kfree(rcd); + qib_dev_err(dd, + "Unable to allocate per ctxt stats buffer\n"); + return NULL; + } + } +#endif dd->f_init_ctxt(rcd); /* @@ -204,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) /* * Common code for initializing the physical port structure. */ -void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { int size; @@ -214,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, spin_lock_init(&ppd->sdma_lock); spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); init_timer(&ppd->symerr_clear_timer); @@ -221,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, ppd->symerr_clear_timer.data = (unsigned long)ppd; ppd->qib_wq = NULL; - - spin_lock_init(&ppd->cc_shadow_lock); + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) goto bail; @@ -270,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, goto bail_3; } - return; + return 0; bail_3: kfree(ppd->ccti_entries_shadow); @@ -284,7 +313,7 @@ bail_1: bail: /* User is intentionally disabling the congestion control agent */ if (!qib_cc_table_size) - return; + return 0; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { qib_cc_table_size = 0; @@ -295,7 +324,7 @@ bail: qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); - return; + return 0; } static int init_pioavailregs(struct qib_devdata *dd) @@ -429,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd) dd->intrchk_timer.function = verify_interrupt; dd->intrchk_timer.data = (unsigned long) dd; + ret = qib_cq_init(dd); done: return ret; } @@ -495,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd) static void verify_interrupt(unsigned long opaque) { struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; if (!dd) return; /* being torn down */ @@ -503,7 +534,8 @@ static void verify_interrupt(unsigned long opaque) * If we don't have a lid or any interrupts, let the user know and * don't bother checking again. */ - if (dd->int_counter == 0) { + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { if (!dd->f_intr_fallback(dd)) dev_err(&dd->pcidev->dev, "No interrupts detected, not usable.\n"); @@ -603,6 +635,12 @@ wq_error: return -ENOMEM; } +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + /** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device @@ -890,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd) destroy_workqueue(ppd->qib_wq); ppd->qib_wq = NULL; } + qib_free_pportdata(ppd); } qib_update_eeprom_log(dd); @@ -944,6 +983,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); +#ifdef CONFIG_DEBUG_FS + kfree(rcd->opstats); + rcd->opstats = NULL; +#endif kfree(rcd); } @@ -1033,7 +1076,6 @@ done: dd->f_set_armlaunch(dd, 1); } - void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; @@ -1043,9 +1085,37 @@ void qib_free_devdata(struct qib_devdata *dd) list_del(&dd->list); spin_unlock_irqrestore(&qib_devs_lock, flags); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif + free_percpu(dd->int_counter); ib_dealloc_device(&dd->verbs_dev.ibdev); } +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1060,28 +1130,34 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) struct qib_devdata *dd; int ret; - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&dd->list); + + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); - ret = idr_get_new(&qib_unit_table, dd, &dd->unit); - if (ret >= 0) + + ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + dd->unit = ret; list_add(&dd->list, &qib_dev_list); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + idr_preload_end(); if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); + goto bail; + } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); goto bail; } @@ -1095,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) qib_early_err(&pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* @@ -1134,11 +1216,10 @@ void qib_disable_after_error(struct qib_devdata *dd) *dd->devstatusp |= QIB_STATUS_HWERROR; } -static void __devexit qib_remove_one(struct pci_dev *); -static int __devinit qib_init_one(struct pci_dev *, - const struct pci_device_id *); +static void qib_remove_one(struct pci_dev *); +static int qib_init_one(struct pci_dev *, const struct pci_device_id *); -#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " +#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { @@ -1150,19 +1231,48 @@ static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { MODULE_DEVICE_TABLE(pci, qib_pci_tbl); -struct pci_driver qib_driver = { +static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, - .remove = __devexit_p(qib_remove_one), + .remove = qib_remove_one, .id_table = qib_pci_tbl, .err_handler = &qib_pci_err_handler, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = qib_notify_dca, + .next = NULL, + .priority = 0 +}; + +static int qib_notify_dca_device(struct device *device, void *data) +{ + struct qib_devdata *dd = dev_get_drvdata(device); + unsigned long event = *(unsigned long *)data; + + return dd->f_notify_dca(dd, event); +} + +static int qib_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int rval; + + rval = driver_for_each_device(&qib_driver.driver, NULL, + &event, qib_notify_dca_device); + return rval ? NOTIFY_BAD : NOTIFY_DONE; +} + +#endif + /* * Do all the generic driver unit- and chip-independent memory * allocation and initialization. */ -static int __init qlogic_ib_init(void) +static int __init qib_ib_init(void) { int ret; @@ -1170,27 +1280,22 @@ static int __init qlogic_ib_init(void) if (ret) goto bail; - qib_cq_wq = create_singlethread_workqueue("qib_cq"); - if (!qib_cq_wq) { - ret = -ENOMEM; - goto bail_dev; - } - /* * These must be called before the driver is registered with * the PCI subsystem. */ idr_init(&qib_unit_table); - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - pr_err("idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail_cq_wq; - } +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_register_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_init(); +#endif ret = pci_register_driver(&qib_driver); if (ret < 0) { pr_err("Unable to register driver: error %d\n", -ret); - goto bail_unit; + goto bail_dev; } /* not fatal if it doesn't work */ @@ -1198,22 +1303,25 @@ static int __init qlogic_ib_init(void) pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ -bail_unit: - idr_destroy(&qib_unit_table); -bail_cq_wq: - destroy_workqueue(qib_cq_wq); bail_dev: +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif + idr_destroy(&qib_unit_table); qib_dev_cleanup(); bail: return ret; } -module_init(qlogic_ib_init); +module_init(qib_ib_init); /* * Do the non-unit driver cleanup, memory free, etc. at unload. */ -static void __exit qlogic_ib_cleanup(void) +static void __exit qib_ib_cleanup(void) { int ret; @@ -1223,9 +1331,13 @@ static void __exit qlogic_ib_cleanup(void) "Unable to cleanup counter filesystem: error %d\n", -ret); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&qib_driver); - - destroy_workqueue(qib_cq_wq); +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif qib_cpulist_count = 0; kfree(qib_cpulist); @@ -1234,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void) qib_dev_cleanup(); } -module_exit(qlogic_ib_cleanup); +module_exit(qib_ib_cleanup); /* this can only be called after a successful initialization */ static void cleanup_device_data(struct qib_devdata *dd) @@ -1276,7 +1388,7 @@ static void cleanup_device_data(struct qib_devdata *dd) if (dd->pageshadow) { struct page **tmpp = dd->pageshadow; dma_addr_t *tmpd = dd->physshadow; - int i, cnt = 0; + int i; for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { int ctxt_tidbase = ctxt * dd->rcvtidcnt; @@ -1289,13 +1401,13 @@ static void cleanup_device_data(struct qib_devdata *dd) PAGE_SIZE, PCI_DMA_FROMDEVICE); qib_release_user_pages(&tmpp[i], 1); tmpp[i] = NULL; - cnt++; } } - tmpp = dd->pageshadow; dd->pageshadow = NULL; vfree(tmpp); + dd->physshadow = NULL; + vfree(tmpd); } /* @@ -1317,6 +1429,7 @@ static void cleanup_device_data(struct qib_devdata *dd) } kfree(tmp); kfree(dd->boardname); + qib_cq_exit(dd); } /* @@ -1342,8 +1455,7 @@ static void qib_postinit_cleanup(struct qib_devdata *dd) qib_free_devdata(dd); } -static int __devinit qib_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret, j, pidx, initfail; struct qib_devdata *dd = NULL; @@ -1362,7 +1474,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev, dd = qib_init_iba6120_funcs(pdev, ent); #else qib_early_err(&pdev->dev, - "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", ent->device); dd = ERR_PTR(-ENODEV); #endif @@ -1378,7 +1490,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev, default: qib_early_err(&pdev->dev, - "Failing on unknown QLogic deviceid 0x%x\n", + "Failing on unknown Intel deviceid 0x%x\n", ent->device); ret = -ENODEV; } @@ -1448,7 +1560,7 @@ bail: return ret; } -static void __devexit qib_remove_one(struct pci_dev *pdev) +static void qib_remove_one(struct pci_dev *pdev) { struct qib_devdata *dd = pci_get_drvdata(pdev); int ret; @@ -1490,6 +1602,7 @@ static void __devexit qib_remove_one(struct pci_dev *pdev) int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) { unsigned amt; + int old_node_id; if (!rcd->rcvhdrq) { dma_addr_t phys_hdrqtail; @@ -1499,9 +1612,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) sizeof(u32), PAGE_SIZE); gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrq = dma_alloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, gfp_flags | __GFP_COMP); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrq) { qib_dev_err(dd, @@ -1517,9 +1634,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) } if (!(dd->flags & QIB_NODMA_RTAIL)) { + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1563,6 +1682,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; gfp_t gfp_flags; + int old_node_id; /* * GFP_USER, but without GFP_FS, so buffer cache can be @@ -1581,25 +1701,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) size = rcd->rcvegrbuf_size; if (!rcd->rcvegrbuf) { rcd->rcvegrbuf = - kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), - GFP_KERNEL); + kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf) goto bail; } if (!rcd->rcvegrbuf_phys) { rcd->rcvegrbuf_phys = - kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), - GFP_KERNEL); + kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf_phys) goto bail_rcvegrbuf; } for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { if (rcd->rcvegrbuf[e]) continue; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvegrbuf[e] = dma_alloc_coherent(&dd->pcidev->dev, size, &rcd->rcvegrbuf_phys[e], gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvegrbuf[e]) goto bail_rcvegrbuf_phys; } diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 81c7b73695d..3b9afccaaad 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -61,7 +61,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) if (dma_region) { struct qib_mregion *tmr; - tmr = rcu_dereference(dev->dma_mr); + tmr = rcu_access_pointer(dev->dma_mr); if (!tmr) { qib_get_mr(mr); rcu_assign_pointer(dev->dma_mr, mr); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ccb119143d2..22c720e5740 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) event.event = IB_EVENT_PKEY_CHANGE; event.device = &dd->verbs_dev.ibdev; - event.element.port_num = 1; + event.element.port_num = port; ib_dispatch_event(&event); } return 0; @@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, return reply((struct ib_smp *)pmp); } +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { @@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; u8 port_select = p->port_select; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, p->port_rcv_data = cpu_to_be64(rwords); p->port_xmit_packets = cpu_to_be64(spkts); p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); bail: return reply((struct ib_smp *) pmp); @@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); @@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) ibp->z_port_rcv_packets = rpkts; + qib_snapshot_pmacounters(ibp, &pma); + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - ibp->n_unicast_xmit = 0; + ibp->z_unicast_xmit = pma.n_unicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - ibp->n_unicast_rcv = 0; + ibp->z_unicast_rcv = pma.n_unicast_rcv; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - ibp->n_multicast_xmit = 0; + ibp->z_multicast_xmit = pma.n_multicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - ibp->n_multicast_rcv = 0; + ibp->z_multicast_rcv = pma.n_multicast_rcv; return pma_get_portcounters_ext(pmp, ibdev, port); } diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index 57bd3fa016b..941d4d50d8e 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -54,7 +54,7 @@ struct ib_node_info { __be32 revision; u8 local_port_num; u8 vendor_id[3]; -} __attribute__ ((packed)); +} __packed; struct ib_mad_notice_attr { u8 generic_type; @@ -73,7 +73,7 @@ struct ib_mad_notice_attr { __be16 reserved; __be16 lid; /* where violation happened */ u8 port_num; /* where violation happened */ - } __attribute__ ((packed)) ntc_129_131; + } __packed ntc_129_131; struct { __be16 reserved; @@ -83,14 +83,14 @@ struct ib_mad_notice_attr { __be32 new_cap_mask; /* new capability mask */ u8 reserved3; u8 change_flags; /* low 3 bits only */ - } __attribute__ ((packed)) ntc_144; + } __packed ntc_144; struct { __be16 reserved; __be16 lid; /* lid where sys guid changed */ __be16 reserved2; __be64 new_sys_guid; - } __attribute__ ((packed)) ntc_145; + } __packed ntc_145; struct { __be16 reserved; @@ -104,7 +104,7 @@ struct ib_mad_notice_attr { u8 reserved3; u8 dr_trunc_hop; u8 dr_rtn_path[30]; - } __attribute__ ((packed)) ntc_256; + } __packed ntc_256; struct { __be16 reserved; @@ -115,7 +115,7 @@ struct ib_mad_notice_attr { __be32 qp2; /* high 8 bits reserved */ union ib_gid gid1; union ib_gid gid2; - } __attribute__ ((packed)) ntc_257_258; + } __packed ntc_257_258; } details; }; @@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong { __be64 port_rcv_packets; __be64 port_xmit_wait; __be64 port_adr_events; -} __attribute__ ((packed)); +} __packed; #define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 #define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 @@ -415,7 +415,6 @@ struct cc_table_shadow { struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; } __packed; -#endif /* _QIB_MAD_H */ /* * The PortSamplesControl.CounterMasks field is an array of 3 bit fields * which specify the N'th counter's capabilities. See ch. 16.1.3.2. @@ -428,3 +427,5 @@ struct cc_table_shadow { COUNTER_MASK(1, 2) | \ COUNTER_MASK(1, 3) | \ COUNTER_MASK(1, 4)) + +#endif /* _QIB_MAD_H */ diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index e6687ded821..9bbb55347cc 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct qib_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + struct scatterlist *sg; + int n, m, entry; struct ib_mr *ret; if (length == 0) { @@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; + n = umem->nmap; mr = alloc_mr(n, pd); if (IS_ERR(mr)) { @@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { void *vaddr; - vaddr = page_address(sg_page(&chunk->page_list[i])); + vaddr = page_address(sg_page(sg)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; @@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m++; n = 0; } - } } ret = &mr->ibmr; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index c574ec7c85e..61a0046efb7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -51,8 +51,8 @@ * file calls, even though this violates some * expectations of harmlessness. */ -static int qib_tune_pcie_caps(struct qib_devdata *); -static int qib_tune_pcie_coalesce(struct qib_devdata *); +static void qib_tune_pcie_caps(struct qib_devdata *); +static void qib_tune_pcie_coalesce(struct qib_devdata *); /* * Do all the common PCIe setup and initialization. @@ -197,46 +197,47 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, struct qib_msix_entry *qib_msix_entry) { int ret; - u32 tabsize = 0; - u16 msix_flags; + int nvec = *msixcnt; struct msix_entry *msix_entry; int i; + ret = pci_msix_vec_count(dd->pcidev); + if (ret < 0) + goto do_intx; + + nvec = min(nvec, ret); + /* We can't pass qib_msix_entry array to qib_msix_setup * so use a dummy msix_entry array and copy the allocated * irq back to the qib_msix_entry array. */ - msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) { - ret = -ENOMEM; + msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) goto do_intx; - } - for (i = 0; i < *msixcnt; i++) + + for (i = 0; i < nvec; i++) msix_entry[i] = qib_msix_entry[i].msix; - pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); - tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); - if (tabsize > *msixcnt) - tabsize = *msixcnt; - ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); - if (ret > 0) { - tabsize = ret; - ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); - } -do_intx: - if (ret) { - qib_dev_err(dd, - "pci_enable_msix %d vectors failed: %d, falling back to INTx\n", - tabsize, ret); - tabsize = 0; - } - for (i = 0; i < tabsize; i++) + ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); + if (ret < 0) + goto free_msix_entry; + else + nvec = ret; + + for (i = 0; i < nvec; i++) qib_msix_entry[i].msix = msix_entry[i]; + kfree(msix_entry); - *msixcnt = tabsize; + *msixcnt = nvec; + return; - if (ret) - qib_enable_intx(dd->pcidev); +free_msix_entry: + kfree(msix_entry); +do_intx: + qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, " + "falling back to INTx\n", nvec, ret); + *msixcnt = 0; + qib_enable_intx(dd->pcidev); } /** @@ -283,12 +284,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, goto bail; } - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX); + pos = dd->pcidev->msix_cap; if (nent && *nent && pos) { qib_msix_setup(dd, pos, nent, entry); ret = 0; /* did it, either MSIx or INTx */ } else { - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (pos) ret = qib_msi_setup(dd, pos); else @@ -357,7 +358,7 @@ int qib_reinit_intr(struct qib_devdata *dd) if (!dd->msi_lo) goto bail; - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (!pos) { qib_dev_err(dd, "Can't find MSI capability, can't restore MSI settings\n"); @@ -426,7 +427,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, PCI_COMMAND, new); - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); + pos = pdev->msi_cap; if (pos) { /* then turn off MSI */ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); @@ -434,7 +435,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); } - pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + pos = pdev->msix_cap; if (pos) { /* then turn off MSIx */ pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); @@ -476,30 +477,6 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) "pci_enable_device failed after reset: %d\n", r); } -/* code to adjust PCIe capabilities. */ - -static int fld2val(int wd, int mask) -{ - int lsbmask; - - if (!mask) - return 0; - wd &= mask; - lsbmask = mask ^ (mask & (mask - 1)); - wd /= lsbmask; - return wd; -} - -static int val2fld(int wd, int mask) -{ - int lsbmask; - - if (!mask) - return 0; - lsbmask = mask ^ (mask & (mask - 1)); - wd *= lsbmask; - return wd; -} static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); @@ -511,7 +488,7 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); * of these chipsets, with some BIOS settings, and enabling it on those * systems may result in the system crashing, and/or data corruption. */ -static int qib_tune_pcie_coalesce(struct qib_devdata *dd) +static void qib_tune_pcie_coalesce(struct qib_devdata *dd) { int r; struct pci_dev *parent; @@ -519,18 +496,18 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) u32 mask, bits, val; if (!qib_pcie_coalesce) - return 0; + return; /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; if (parent->bus->parent) { qib_devinfo(dd->pcidev, "Parent not root\n"); - return 1; + return; } if (!pci_is_pcie(parent)) - return 1; + return; if (parent->vendor != 0x8086) - return 1; + return; /* * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 @@ -563,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) mask = (3U << 24) | (7U << 10); } else { /* not one of the chipsets that we know about */ - return 1; + return; } pci_read_config_dword(parent, 0x48, &val); val &= ~mask; val |= bits; r = pci_write_config_dword(parent, 0x48, val); - return 0; } /* @@ -580,55 +556,44 @@ static int qib_pcie_caps; module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); -static int qib_tune_pcie_caps(struct qib_devdata *dd) +static void qib_tune_pcie_caps(struct qib_devdata *dd) { - int ret = 1; /* Assume the worst */ struct pci_dev *parent; - u16 pcaps, pctl, ecaps, ectl; - int rc_sup, ep_sup; - int rc_cur, ep_cur; + u16 rc_mpss, rc_mps, ep_mpss, ep_mps; + u16 rc_mrrs, ep_mrrs, max_mrrs; /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; - if (parent->bus->parent) { + if (!pci_is_root_bus(parent->bus)) { qib_devinfo(dd->pcidev, "Parent not root\n"); - goto bail; + return; } if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) - goto bail; - pcie_capability_read_word(parent, PCI_EXP_DEVCAP, &pcaps); - pcie_capability_read_word(parent, PCI_EXP_DEVCTL, &pctl); + return; + + rc_mpss = parent->pcie_mpss; + rc_mps = ffs(pcie_get_mps(parent)) - 8; /* Find out supported and configured values for endpoint (us) */ - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCAP, &ecaps); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); + ep_mpss = dd->pcidev->pcie_mpss; + ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8; - ret = 0; /* Find max payload supported by root, endpoint */ - rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); - ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD); - if (rc_sup > ep_sup) - rc_sup = ep_sup; - - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD); + if (rc_mpss > ep_mpss) + rc_mpss = ep_mpss; /* If Supported greater than limit in module param, limit it */ - if (rc_sup > (qib_pcie_caps & 7)) - rc_sup = qib_pcie_caps & 7; + if (rc_mpss > (qib_pcie_caps & 7)) + rc_mpss = qib_pcie_caps & 7; /* If less than (allowed, supported), bump root payload */ - if (rc_sup > rc_cur) { - rc_cur = rc_sup; - pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | - val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); - pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl); + if (rc_mpss > rc_mps) { + rc_mps = rc_mpss; + pcie_set_mps(parent, 128 << rc_mps); } /* If less than (allowed, supported), bump endpoint payload */ - if (rc_sup > ep_cur) { - ep_cur = rc_sup; - ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | - val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); + if (rc_mpss > ep_mps) { + ep_mps = rc_mpss; + pcie_set_mps(dd->pcidev, 128 << ep_mps); } /* @@ -636,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) * No field for max supported, but PCIe spec limits it to 4096, * which is code '5' (log2(4096) - 7) */ - rc_sup = 5; - if (rc_sup > ((qib_pcie_caps >> 4) & 7)) - rc_sup = (qib_pcie_caps >> 4) & 7; - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ); - - if (rc_sup > rc_cur) { - rc_cur = rc_sup; - pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | - val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); - pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl); + max_mrrs = 5; + if (max_mrrs > ((qib_pcie_caps >> 4) & 7)) + max_mrrs = (qib_pcie_caps >> 4) & 7; + + max_mrrs = 128 << max_mrrs; + rc_mrrs = pcie_get_readrq(parent); + ep_mrrs = pcie_get_readrq(dd->pcidev); + + if (max_mrrs > rc_mrrs) { + rc_mrrs = max_mrrs; + pcie_set_readrq(parent, rc_mrrs); } - if (rc_sup > ep_cur) { - ep_cur = rc_sup; - ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | - val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); + if (max_mrrs > ep_mrrs) { + ep_mrrs = max_mrrs; + pcie_set_readrq(dd->pcidev, ep_mrrs); } -bail: - return ret; } /* End of PCIe capability tuning */ diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 4850d03870c..7fcc150d603 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -35,6 +35,9 @@ #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/jhash.h> +#ifdef CONFIG_DEBUG_FS +#include <linux/seq_file.h> +#endif #include "qib.h" @@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) unsigned long flags; unsigned n = qpn_hash(dev, qp->ibqp.qp_num); - spin_lock_irqsave(&dev->qpt_lock, flags); atomic_inc(&qp->refcount); + spin_lock_irqsave(&dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) rcu_assign_pointer(ibp->qp0, qp); @@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); } /* @@ -247,40 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; + int removed = 1; spin_lock_irqsave(&dev->qpt_lock, flags); if (rcu_dereference_protected(ibp->qp0, lockdep_is_held(&dev->qpt_lock)) == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp0, NULL); } else if (rcu_dereference_protected(ibp->qp1, lockdep_is_held(&dev->qpt_lock)) == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp1, NULL); } else { struct qib_qp *q; struct qib_qp __rcu **qpp; + removed = 0; qpp = &dev->qp_table[n]; - q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock)); - for (; q; qpp = &q->next) { + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock))) != NULL; + qpp = &q->next) if (q == qp) { - atomic_dec(&qp->refcount); - *qpp = qp->next; - rcu_assign_pointer(qp->next, NULL); - q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock)); + rcu_assign_pointer(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))); + removed = 1; break; } - q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock)); - } } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); + if (removed) { + synchronize_rcu(); + atomic_dec(&qp->refcount); + } } /** @@ -338,26 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { struct qib_qp *qp = NULL; + rcu_read_lock(); if (unlikely(qpn <= 1)) { - rcu_read_lock(); if (qpn == 0) qp = rcu_dereference(ibp->qp0); else qp = rcu_dereference(ibp->qp1); + if (qp) + atomic_inc(&qp->refcount); } else { struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; unsigned n = qpn_hash(dev, qpn); - rcu_read_lock(); for (qp = rcu_dereference(dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); break; + } } - if (qp) - if (unlikely(!atomic_inc_not_zero(&qp->refcount))) - qp = NULL; - rcu_read_unlock(); return qp; } @@ -585,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask)) + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) goto inval; if (attr_mask & IB_QP_AV) { @@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp *ret; if (init_attr->cap.max_send_sge > ib_qib_max_sges || - init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || + init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } @@ -1290,3 +1291,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth) } } } + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter { + struct qib_ibdev *dev; + struct qib_qp *qp; + int n; +}; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) +{ + struct qib_qp_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int qib_qp_iter_next(struct qib_qp_iter *iter) +{ + struct qib_ibdev *dev = iter->dev; + int n = iter->n; + int ret = 1; + struct qib_qp *pqp = iter->qp; + struct qib_qp *qp; + + rcu_read_lock(); + for (; n < dev->qp_table_size; n++) { + if (pqp) + qp = rcu_dereference(pqp->next); + else + qp = rcu_dereference(dev->qp_table[n]); + pqp = qp; + if (qp) { + if (iter->qp) + atomic_dec(&iter->qp->refcount); + atomic_inc(&qp->refcount); + rcu_read_unlock(); + iter->qp = qp; + iter->n = n; + return 0; + } + } + rcu_read_unlock(); + if (iter->qp) + atomic_dec(&iter->qp->refcount); + return ret; +} + +static const char * const qp_type_str[] = { + "SMI", "GSI", "RC", "UC", "UD", +}; + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +{ + struct qib_swqe *wqe; + struct qib_qp *qp = iter->qp; + + wqe = get_swqe_ptr(qp, qp->s_last); + seq_printf(s, + "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", + iter->n, + qp->ibqp.qp_num, + qp_type_str[qp->ibqp.qp_type], + qp->state, + wqe->wr.opcode, + qp->s_hdrwords, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + !list_empty(&qp->iowait), + qp->timeout, + wqe->ssn, + qp->s_lsn, + qp->s_last_psn, + qp->s_psn, qp->s_next_psn, + qp->s_sending_psn, qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head, qp->s_size, + qp->remote_qpn, + qp->remote_ah_attr.dlid); +} + +#endif diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3ab341320ea..2f2501890c4 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp) qib_flush_wc(); qib_sendbuf_done(dd, pbufn); - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); goto done; queue_ack: diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 357b6cfcd46..4c07a8b34ff 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } /** diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index 50a8a0d4fe6..911205d3d5a 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 3fc51443121..c6d6a54d2e1 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -423,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd) void __qib_sdma_intr(struct qib_pportdata *ppd) { - if (__qib_sdma_running(ppd)) + if (__qib_sdma_running(ppd)) { qib_sdma_make_progress(ppd); + if (!list_empty(&ppd->sdma_userpending)) + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + } } int qib_setup_sdma(struct qib_pportdata *ppd) @@ -452,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd) ppd->sdma_descq_removed = 0; ppd->sdma_descq_added = 0; + ppd->sdma_intrequest = 0; + INIT_LIST_HEAD(&ppd->sdma_userpending); + INIT_LIST_HEAD(&ppd->sdma_activelist); tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, @@ -708,6 +714,62 @@ unlock: return ret; } +/* + * sdma_lock should be acquired before calling this routine + */ +void dump_sdma_state(struct qib_pportdata *ppd) +{ + struct qib_sdma_desc *descq; + struct qib_sdma_txreq *txp, *txpnext; + __le64 *descqp; + u64 desc[2]; + u64 addr; + u16 gen, dwlen, dwoffset; + u16 head, tail, cnt; + + head = ppd->sdma_descq_head; + tail = ppd->sdma_descq_tail; + cnt = qib_sdma_descq_freecnt(ppd); + descq = ppd->sdma_descq; + + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_head: %u\n", head); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_tail: %u\n", tail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdma_descq_freecnt: %u\n", cnt); + + /* print info for each entry in the descriptor queue */ + while (head != tail) { + char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 }; + + descqp = &descq[head].qw[0]; + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + flags[0] = (desc[0] & 1<<15) ? 'I' : '-'; + flags[1] = (desc[0] & 1<<14) ? 'L' : 'S'; + flags[2] = (desc[0] & 1<<13) ? 'H' : '-'; + flags[3] = (desc[0] & 1<<12) ? 'F' : '-'; + flags[4] = (desc[0] & 1<<11) ? 'L' : '-'; + addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL); + gen = (desc[0] >> 30) & 3ULL; + dwlen = (desc[0] >> 14) & (0x7ffULL << 2); + dwoffset = (desc[0] & 0x7ffULL) << 2; + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n", + head, flags, addr, gen, dwlen, dwoffset); + if (++head == ppd->sdma_descq_cnt) + head = 0; + } + + /* print dma descriptor indices from the TX requests */ + list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist, + list) + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n", + txp->start_idx, txp->next_descq_idx); +} + void qib_sdma_process_event(struct qib_pportdata *ppd, enum qib_sdma_events event) { diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 034cc821de5..3c8e4e3caca 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -808,10 +808,14 @@ int qib_verbs_register_sysfs(struct qib_devdata *dd) for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { ret = device_create_file(&dev->dev, qib_attributes[i]); if (ret) - return ret; + goto bail; } return 0; +bail: + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) + device_remove_file(&dev->dev, qib_attributes[i]); + return ret; } /* diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index d6c7fe7f88d..aaf7039f8ed 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) struct qib_sge *sge; struct ib_wc wc; u32 length; + enum ib_qp_type sqptype, dqptype; qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; } - if (qp->ibqp.qp_type != sqp->ibqp.qp_type || + + sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : sqp->ibqp.qp_type; + dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : qp->ibqp.qp_type; + + if (dqptype != sqptype || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { ibp->n_pkt_drops++; goto drop; @@ -273,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp) ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) - ibp->n_multicast_xmit++; + this_cpu_inc(ibp->pmastats->n_multicast_xmit); else - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } else { - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { /* diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 82442085cbe..d2806cae234 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -52,21 +52,48 @@ /* attempt to drain the queue for 5secs */ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + struct qib_user_sdma_pkt { - u8 naddr; /* dimension of addr (1..3) ... */ + struct list_head list; /* list element */ + + u8 tiddma; /* if this is NEW tid-sdma */ + u8 largepkt; /* this is large pkt from kmalloc */ + u16 frag_size; /* frag size used by PSM */ + u16 index; /* last header index or push index */ + u16 naddr; /* dimension of addr (1..3) ... */ + u16 addrlimit; /* addr array size */ + u16 tidsmidx; /* current tidsm index */ + u16 tidsmcount; /* tidsm array item count */ + u16 payload_size; /* payload size so far for header */ + u32 bytes_togo; /* bytes for processing */ u32 counter; /* sdma pkts queued counter for this entry */ + struct qib_tid_session_member *tidsm; /* tid session member array */ + struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ u64 added; /* global descq number of entries */ struct { - u32 offset; /* offset for kvaddr, addr */ - u32 length; /* length in page */ - u8 put_page; /* should we put_page? */ - u8 dma_mapped; /* is page dma_mapped? */ + u16 offset; /* offset for kvaddr, addr */ + u16 length; /* length in page */ + u16 first_desc; /* first desc */ + u16 last_desc; /* last desc */ + u16 put_page; /* should we put_page? */ + u16 dma_mapped; /* is page dma_mapped? */ + u16 dma_length; /* for dma_unmap_page() */ + u16 padding; struct page *page; /* may be NULL (coherent mem) */ void *kvaddr; /* FIXME: only for pio hack */ dma_addr_t addr; } addr[4]; /* max pages, any more and we coalesce */ - struct list_head list; /* list element */ }; struct qib_user_sdma_queue { @@ -77,6 +104,12 @@ struct qib_user_sdma_queue { */ struct list_head sent; + /* + * Because above list will be accessed by both process and + * signal handler, we need a spinlock for it. + */ + spinlock_t sent_lock ____cacheline_aligned_in_smp; + /* headers with expected length are allocated from here... */ char header_cache_name[64]; struct dma_pool *header_cache; @@ -88,27 +121,83 @@ struct qib_user_sdma_queue { /* as packets go on the queued queue, they are counted... */ u32 counter; u32 sent_counter; + /* pending packets, not sending yet */ + u32 num_pending; + /* sending packets, not complete yet */ + u32 num_sending; + /* global descq number of entry of last sending packet */ + u64 added; /* dma page table */ struct rb_root dma_pages_root; + struct qib_user_sdma_rb_node *sdma_rb_node; + /* protect everything above... */ struct mutex lock; }; +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + struct qib_user_sdma_queue * qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) { struct qib_user_sdma_queue *pq = kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; if (!pq) goto done; pq->counter = 0; pq->sent_counter = 0; - INIT_LIST_HEAD(&pq->sent); + pq->num_pending = 0; + pq->num_sending = 0; + pq->added = 0; + pq->sdma_rb_node = NULL; + INIT_LIST_HEAD(&pq->sent); + spin_lock_init(&pq->sent_lock); mutex_init(&pq->lock); snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), @@ -131,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->dma_pages_root = RB_ROOT; + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + goto done; +err_rb: + dma_pool_destroy(pq->header_cache); err_slab: kmem_cache_destroy(pq->pkt_slab); err_kfree: @@ -144,34 +255,310 @@ done: } static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, - int i, size_t offset, size_t len, - int put_page, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) + int i, u16 offset, u16 len, + u16 first_desc, u16 last_desc, + u16 put_page, u16 dma_mapped, + struct page *page, void *kvaddr, + dma_addr_t dma_addr, u16 dma_length) { pkt->addr[i].offset = offset; pkt->addr[i].length = len; + pkt->addr[i].first_desc = first_desc; + pkt->addr[i].last_desc = last_desc; pkt->addr[i].put_page = put_page; pkt->addr[i].dma_mapped = dma_mapped; pkt->addr[i].page = page; pkt->addr[i].kvaddr = kvaddr; pkt->addr[i].addr = dma_addr; + pkt->addr[i].dma_length = dma_length; +} + +static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + size_t len, dma_addr_t *dma_addr) +{ + void *hdr; + + if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) + hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + dma_addr); + else + hdr = NULL; + + if (!hdr) { + hdr = kmalloc(len, GFP_KERNEL); + if (!hdr) + return NULL; + + *dma_addr = 0; + } + + return hdr; } -static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt, - u32 counter, size_t offset, - size_t len, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) +static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + struct page *page, u16 put, + u16 offset, u16 len, void *kvaddr) { - pkt->naddr = 1; - pkt->counter = counter; - qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, - kvaddr, dma_addr); + __le16 *pbc16; + void *pbcvaddr; + struct qib_message_header *hdr; + u16 newlen, pbclen, lastdesc, dma_mapped; + u32 vcto; + union qib_seqnum seqnum; + dma_addr_t pbcdaddr; + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + page, offset, len, DMA_TO_DEVICE); + int ret = 0; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + /* + * dma mapping error, pkt has not managed + * this page yet, return the page here so + * the caller can ignore this page. + */ + if (put) { + put_page(page); + } else { + /* coalesce case */ + kunmap(page); + __free_page(page); + } + ret = -ENOMEM; + goto done; + } + offset = 0; + dma_mapped = 1; + + +next_fragment: + + /* + * In tid-sdma, the transfer length is restricted by + * receiver side current tid page length. + */ + if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length) + newlen = pkt->tidsm[pkt->tidsmidx].length; + else + newlen = len; + + /* + * Then the transfer length is restricted by MTU. + * the last descriptor flag is determined by: + * 1. the current packet is at frag size length. + * 2. the current tid page is done if tid-sdma. + * 3. there is no more byte togo if sdma. + */ + lastdesc = 0; + if ((pkt->payload_size + newlen) >= pkt->frag_size) { + newlen = pkt->frag_size - pkt->payload_size; + lastdesc = 1; + } else if (pkt->tiddma) { + if (newlen == pkt->tidsm[pkt->tidsmidx].length) + lastdesc = 1; + } else { + if (newlen == pkt->bytes_togo) + lastdesc = 1; + } + + /* fill the next fragment in this page */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + offset, newlen, /* offset, len */ + 0, lastdesc, /* first last desc */ + put, dma_mapped, /* put page, dma mapped */ + page, kvaddr, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->bytes_togo -= newlen; + pkt->payload_size += newlen; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* If there is no more byte togo. (lastdesc==1) */ + if (pkt->bytes_togo == 0) { + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + goto done; + } + + /* If tid-sdma, advance tid info. */ + if (pkt->tiddma) { + pkt->tidsm[pkt->tidsmidx].length -= newlen; + if (pkt->tidsm[pkt->tidsmidx].length) { + pkt->tidsm[pkt->tidsmidx].offset += newlen; + } else { + pkt->tidsmidx++; + if (pkt->tidsmidx == pkt->tidsmcount) { + ret = -EFAULT; + goto done; + } + } + } + + /* + * If this is NOT the last descriptor. (newlen==len) + * the current packet is not done yet, but the current + * send side page is done. + */ + if (lastdesc == 0) + goto done; + + /* + * If running this driver under PSM with message size + * fitting into one transfer unit, it is not possible + * to pass this line. otherwise, it is a buggggg. + */ + + /* + * Since the current packet is done, and there are more + * bytes togo, we need to create a new sdma header, copying + * from previous sdma header and modify both. + */ + pbclen = pkt->addr[pkt->index].length; + pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr); + if (!pbcvaddr) { + ret = -ENOMEM; + goto done; + } + /* Copy the previous sdma header to new sdma header */ + pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr; + memcpy(pbcvaddr, pbc16, pbclen); + + /* Modify the previous sdma header */ + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* turn on the header suppression */ + hdr->iph.pkt_flags = + cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2); + /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */ + hdr->flags &= ~(0x04|0x20); + } else { + /* turn off extra bytes: 20-21 bits */ + hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF); + /* turn off ACK_REQ: 0x04 */ + hdr->flags &= ~(0x04); + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + /* Modify the new sdma header */ + pbc16 = (__le16 *)pbcvaddr; + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* Set new tid and offset for new sdma header */ + hdr->iph.ver_ctxt_tid_offset = cpu_to_le32( + (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) + + (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) + + (pkt->tidsm[pkt->tidsmidx].offset>>2)); + } else { + /* Middle protocol new packet offset */ + hdr->uwords[2] += pkt->payload_size; + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* Next sequence number in new sdma header */ + seqnum.val = be32_to_cpu(hdr->bth[2]); + if (pkt->tiddma) + seqnum.seq++; + else + seqnum.pkt++; + hdr->bth[2] = cpu_to_be32(seqnum.val); + + /* Init new sdma header. */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + 0, pbclen, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbcvaddr, /* struct page, virt addr */ + pbcdaddr, pbclen); /* dma addr, dma length */ + pkt->index = pkt->naddr; + pkt->payload_size = 0; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* Prepare for next fragment in this page */ + if (newlen != len) { + if (dma_mapped) { + put = 0; + dma_mapped = 0; + page = NULL; + kvaddr = NULL; + } + len -= newlen; + offset += newlen; + + goto next_fragment; + } + +done: + return ret; } /* we've too many pages in the iovec, coalesce to a single page */ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, const struct iovec *iov, unsigned long niov) @@ -182,7 +569,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, char *mpage; int i; int len = 0; - dma_addr_t dma_addr; if (!page) { ret = -ENOMEM; @@ -205,17 +591,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, len += iov[i].iov_len; } - dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_unmap; - } - - qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, - dma_addr); - pkt->naddr = 2; - + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + page, 0, 0, len, mpage_save); goto done; free_unmap: @@ -238,16 +615,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* - * Truncate length to page boundary. - */ -static int qib_user_sdma_page_length(unsigned long addr, unsigned long len) -{ - const unsigned long offset = addr & ~PAGE_MASK; - - return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; -} - static void qib_user_sdma_free_pkt_frag(struct device *dev, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, @@ -256,10 +623,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, const int i = frag; if (pkt->addr[i].page) { + /* only user data has page */ if (pkt->addr[i].dma_mapped) dma_unmap_page(dev, pkt->addr[i].addr, - pkt->addr[i].length, + pkt->addr[i].dma_length, DMA_TO_DEVICE); if (pkt->addr[i].kvaddr) @@ -269,55 +637,80 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, put_page(pkt->addr[i].page); else __free_page(pkt->addr[i].page); - } else if (pkt->addr[i].kvaddr) - /* free coherent mem from cache... */ - dma_pool_free(pq->header_cache, + } else if (pkt->addr[i].kvaddr) { + /* for headers */ + if (pkt->addr[i].dma_mapped) { + /* from kmalloc & dma mapped */ + dma_unmap_single(dev, + pkt->addr[i].addr, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + kfree(pkt->addr[i].kvaddr); + } else if (pkt->addr[i].addr) { + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, pkt->addr[i].kvaddr, pkt->addr[i].addr); + } else { + /* from kmalloc but not dma mapped */ + kfree(pkt->addr[i].kvaddr); + } + } } /* return number of pages pinned... */ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, unsigned long addr, int tlen, int npages) { - struct page *pages[2]; - int j; - int ret; - - ret = get_user_pages(current, current->mm, addr, - npages, 0, 1, pages, NULL); - - if (ret != npages) { - int i; - - for (i = 0; i < ret; i++) - put_page(pages[i]); - - ret = -ENOMEM; - goto done; - } + struct page *pages[8]; + int i, j; + int ret = 0; - for (j = 0; j < npages; j++) { - /* map the pages... */ - const int flen = qib_user_sdma_page_length(addr, tlen); - dma_addr_t dma_addr = - dma_map_page(&dd->pcidev->dev, - pages[j], 0, flen, DMA_TO_DEVICE); - unsigned long fofs = addr & ~PAGE_MASK; + while (npages) { + if (npages > 8) + j = 8; + else + j = npages; - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = get_user_pages_fast(addr, j, 0, pages); + if (ret != j) { + i = 0; + j = ret; ret = -ENOMEM; - goto done; + goto free_pages; } - qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, - pages[j], kmap(pages[j]), dma_addr); + for (i = 0; i < j; i++) { + /* map the pages... */ + unsigned long fofs = addr & ~PAGE_MASK; + int flen = ((fofs + tlen) > PAGE_SIZE) ? + (PAGE_SIZE - fofs) : tlen; + + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + pages[i], 1, fofs, flen, NULL); + if (ret < 0) { + /* current page has beed taken + * care of inside above call. + */ + i++; + goto free_pages; + } + + addr += flen; + tlen -= flen; + } - pkt->naddr++; - addr += flen; - tlen -= flen; + npages -= j; } + goto done; + + /* if error, return all pages not managed by pkt */ +free_pages: + while (i < j) + put_page(pages[i++]); + done: return ret; } @@ -335,7 +728,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, const int npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; - ret = qib_user_sdma_pin_pages(dd, pkt, addr, + ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, iov[idx].iov_len, npages); if (ret < 0) goto free_pkt; @@ -344,9 +737,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, goto done; free_pkt: - for (idx = 0; idx < pkt->naddr; idx++) + /* we need to ignore the first entry here */ + for (idx = 1; idx < pkt->naddr; idx++) qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + /* need to dma unmap the first entry, this is to restore to + * the original state so that caller can free the memory in + * error condition. Caller does not know if dma mapped or not*/ + if (pkt->addr[0].dma_mapped) { + dma_unmap_single(&dd->pcidev->dev, + pkt->addr[0].addr, + pkt->addr[0].dma_length, + DMA_TO_DEVICE); + pkt->addr[0].addr = 0; + pkt->addr[0].dma_mapped = 0; + } + done: return ret; } @@ -359,8 +765,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd, { int ret = 0; - if (npages >= ARRAY_SIZE(pkt->addr)) - ret = qib_user_sdma_coalesce(dd, pkt, iov, niov); + if (pkt->frag_size == pkt->bytes_togo && + npages >= ARRAY_SIZE(pkt->addr)) + ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov); else ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); @@ -380,7 +787,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, for (i = 0; i < pkt->naddr; i++) qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); - kmem_cache_free(pq->pkt_slab, pkt); + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); } INIT_LIST_HEAD(list); } @@ -393,63 +803,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, * as, if there is an error we clean it... */ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, - struct list_head *list, const struct iovec *iov, unsigned long niov, - int maxpkts) + struct list_head *list, + int *maxpkts, int *ndesc) { unsigned long idx = 0; int ret = 0; int npkts = 0; - struct page *page = NULL; __le32 *pbc; dma_addr_t dma_addr; struct qib_user_sdma_pkt *pkt = NULL; size_t len; size_t nw; u32 counter = pq->counter; - int dma_mapped = 0; + u16 frag_size; - while (idx < niov && npkts < maxpkts) { + while (idx < niov && npkts < *maxpkts) { const unsigned long addr = (unsigned long) iov[idx].iov_base; const unsigned long idx_save = idx; unsigned pktnw; unsigned pktnwc; int nfrags = 0; int npages = 0; + int bytes_togo = 0; + int tiddma = 0; int cfur; - dma_mapped = 0; len = iov[idx].iov_len; nw = len >> 2; - page = NULL; - - pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); - if (!pkt) { - ret = -ENOMEM; - goto free_list; - } if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || len > PAGE_SIZE || len & 3 || addr & 3) { ret = -EINVAL; - goto free_pkt; + goto free_list; } - if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) - pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, - &dma_addr); - else - pbc = NULL; - + pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr); if (!pbc) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto free_pkt; - } - pbc = kmap(page); + ret = -ENOMEM; + goto free_list; } cfur = copy_from_user(pbc, iov[idx].iov_base, len); @@ -474,8 +869,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, * we can verify that the packet is consistent with the * iovec lengths. */ - pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK; - if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { + pktnw = le32_to_cpu(*pbc) & 0xFFFF; + if (pktnw < pktnwc) { ret = -EINVAL; goto free_pbc; } @@ -486,17 +881,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, const unsigned long faddr = (unsigned long) iov[idx].iov_base; - if (slen & 3 || faddr & 3 || !slen || - slen > PAGE_SIZE) { + if (slen & 3 || faddr & 3 || !slen) { ret = -EINVAL; goto free_pbc; } - npages++; - if ((faddr & PAGE_MASK) != - ((faddr + slen - 1) & PAGE_MASK)) - npages++; + npages += qib_user_sdma_num_pages(&iov[idx]); + bytes_togo += slen; pktnwc += slen >> 2; idx++; nfrags++; @@ -507,48 +899,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, goto free_pbc; } - if (page) { - dma_addr = dma_map_page(&dd->pcidev->dev, - page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF; + if (((frag_size ? frag_size : bytes_togo) + len) > + ppd->ibmaxlen) { + ret = -EINVAL; + goto free_pbc; + } + + if (frag_size) { + int pktsize, tidsmsize, n; + + n = npages*((2*PAGE_SIZE/frag_size)+1); + pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; + + /* + * Determine if this is tid-sdma or just sdma. + */ + tiddma = (((le32_to_cpu(pbc[7])>> + QLOGIC_IB_I_TID_SHIFT)& + QLOGIC_IB_I_TID_MASK) != + QLOGIC_IB_I_TID_MASK); + + if (tiddma) + tidsmsize = iov[idx].iov_len; + else + tidsmsize = 0; + + pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (!pkt) { ret = -ENOMEM; goto free_pbc; } + pkt->largepkt = 1; + pkt->frag_size = frag_size; + pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + + if (tiddma) { + char *tidsm = (char *)pkt + pktsize; + cfur = copy_from_user(tidsm, + iov[idx].iov_base, tidsmsize); + if (cfur) { + ret = -EFAULT; + goto free_pkt; + } + pkt->tidsm = + (struct qib_tid_session_member *)tidsm; + pkt->tidsmcount = tidsmsize/ + sizeof(struct qib_tid_session_member); + pkt->tidsmidx = 0; + idx++; + } - dma_mapped = 1; + /* + * pbc 'fill1' field is borrowed to pass frag size, + * we need to clear it after picking frag size, the + * hardware requires this field to be zero. + */ + *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); + } else { + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_pbc; + } + pkt->largepkt = 0; + pkt->frag_size = bytes_togo; + pkt->addrlimit = ARRAY_SIZE(pkt->addr); } - - qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, - page, pbc, dma_addr); + pkt->bytes_togo = bytes_togo; + pkt->payload_size = 0; + pkt->counter = counter; + pkt->tiddma = tiddma; + + /* setup the first header */ + qib_user_sdma_init_frag(pkt, 0, /* index */ + 0, len, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbc, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->index = 0; + pkt->naddr = 1; if (nfrags) { ret = qib_user_sdma_init_payload(dd, pq, pkt, iov + idx_save + 1, nfrags, npages); if (ret < 0) - goto free_pbc_dma; + goto free_pkt; + } else { + /* since there is no payload, mark the + * header as the last desc. */ + pkt->addr[0].last_desc = 1; + + if (dma_addr == 0) { + /* + * the header is not dma mapped yet. + * it should be from kmalloc. + */ + dma_addr = dma_map_single(&dd->pcidev->dev, + pbc, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + dma_addr)) { + ret = -ENOMEM; + goto free_pkt; + } + pkt->addr[0].addr = dma_addr; + pkt->addr[0].dma_mapped = 1; + } } counter++; npkts++; + pkt->pq = pq; + pkt->index = 0; /* reset index for push on hw */ + *ndesc += pkt->naddr; list_add_tail(&pkt->list, list); } + *maxpkts = npkts; ret = idx; goto done; -free_pbc_dma: - if (dma_mapped) - dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); +free_pkt: + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); free_pbc: - if (page) { - kunmap(page); - __free_page(page); - } else + if (dma_addr) dma_pool_free(pq->header_cache, pbc, dma_addr); -free_pkt: - kmem_cache_free(pq->pkt_slab, pkt); + else + kfree(pbc); free_list: qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); done: @@ -569,10 +1052,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, struct list_head free_list; struct qib_user_sdma_pkt *pkt; struct qib_user_sdma_pkt *pkt_prev; + unsigned long flags; int ret = 0; + if (!pq->num_sending) + return 0; + INIT_LIST_HEAD(&free_list); + /* + * We need this spin lock here because interrupt handler + * might modify this list in qib_user_sdma_send_desc(), also + * we can not get interrupted, otherwise it is a deadlock. + */ + spin_lock_irqsave(&pq->sent_lock, flags); list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { s64 descd = ppd->sdma_descq_removed - pkt->added; @@ -583,7 +1076,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, /* one more packet cleaned */ ret++; + pq->num_sending--; } + spin_unlock_irqrestore(&pq->sent_lock, flags); if (!list_empty(&free_list)) { u32 counter; @@ -604,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) if (!pq) return; - kmem_cache_destroy(pq->pkt_slab); + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); kfree(pq); } @@ -627,6 +1127,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq) { struct qib_devdata *dd = ppd->dd; + unsigned long flags; int i; if (!pq) @@ -634,7 +1135,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { mutex_lock(&pq->lock); - if (list_empty(&pq->sent)) { + if (!pq->num_pending && !pq->num_sending) { mutex_unlock(&pq->lock); break; } @@ -644,29 +1145,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, msleep(10); } - if (!list_empty(&pq->sent)) { + if (pq->num_pending || pq->num_sending) { + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; struct list_head free_list; + mutex_lock(&pq->lock); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Since we hold sdma_lock, it is safe without sent_lock. + */ + if (pq->num_pending) { + list_for_each_entry_safe(pkt, pkt_prev, + &ppd->sdma_userpending, list) { + if (pkt->pq == pq) { + list_move_tail(&pkt->list, &pq->sent); + pq->num_pending--; + pq->num_sending++; + } + } + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); INIT_LIST_HEAD(&free_list); - mutex_lock(&pq->lock); list_splice_init(&pq->sent, &free_list); + pq->num_sending = 0; qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); mutex_unlock(&pq->lock); } } -static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd, +static inline __le64 qib_sdma_make_desc0(u8 gen, u64 addr, u64 dwlen, u64 dwoffset) { - u8 tmpgen; - - tmpgen = ppd->sdma_generation; - return cpu_to_le64(/* SDmaPhyAddr[31:0] */ ((addr & 0xfffffffcULL) << 32) | /* SDmaGeneration[1:0] */ - ((tmpgen & 3ULL) << 30) | + ((gen & 3ULL) << 30) | /* SDmaDwordCount[10:0] */ ((dwlen & 0x7ffULL) << 16) | /* SDmaBufOffset[12:2] */ @@ -692,7 +1208,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr) static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, struct qib_user_sdma_pkt *pkt, int idx, - unsigned ofs, u16 tail) + unsigned ofs, u16 tail, u8 gen) { const u64 addr = (u64) pkt->addr[idx].addr + (u64) pkt->addr[idx].offset; @@ -702,105 +1218,159 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, descqp = &ppd->sdma_descq[tail].qw[0]; - descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs); - if (idx == 0) + descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs); + if (pkt->addr[idx].first_desc) descq0 = qib_sdma_make_first_desc0(descq0); - if (idx == pkt->naddr - 1) + if (pkt->addr[idx].last_desc) { descq0 = qib_sdma_make_last_desc0(descq0); + if (ppd->sdma_intrequest) { + descq0 |= cpu_to_le64(1ULL << 15); + ppd->sdma_intrequest = 0; + } + } descqp[0] = descq0; descqp[1] = qib_sdma_make_desc1(addr); } -/* pq->lock must be held, get packets on the wire... */ -static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, - struct qib_user_sdma_queue *pq, - struct list_head *pktlist) +void qib_user_sdma_send_desc(struct qib_pportdata *ppd, + struct list_head *pktlist) { struct qib_devdata *dd = ppd->dd; - int ret = 0; - unsigned long flags; - u16 tail; - u8 generation; - u64 descq_added; - - if (list_empty(pktlist)) - return 0; - - if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) - return -ECOMM; - - spin_lock_irqsave(&ppd->sdma_lock, flags); + u16 nfree, nsent; + u16 tail, tail_c; + u8 gen, gen_c; - /* keep a copy for restoring purposes in case of problems */ - generation = ppd->sdma_generation; - descq_added = ppd->sdma_descq_added; - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; - } + nfree = qib_sdma_descq_freecnt(ppd); + if (!nfree) + return; - tail = ppd->sdma_descq_tail; +retry: + nsent = 0; + tail_c = tail = ppd->sdma_descq_tail; + gen_c = gen = ppd->sdma_generation; while (!list_empty(pktlist)) { struct qib_user_sdma_pkt *pkt = list_entry(pktlist->next, struct qib_user_sdma_pkt, list); - int i; + int i, j, c = 0; unsigned ofs = 0; u16 dtail = tail; - if (pkt->naddr > qib_sdma_descq_freecnt(ppd)) - goto unlock_check_tail; - - for (i = 0; i < pkt->naddr; i++) { - qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail); + for (i = pkt->index; i < pkt->naddr && nfree; i++) { + qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen); ofs += pkt->addr[i].length >> 2; if (++tail == ppd->sdma_descq_cnt) { tail = 0; - ++ppd->sdma_generation; + ++gen; + ppd->sdma_intrequest = 1; + } else if (tail == (ppd->sdma_descq_cnt>>1)) { + ppd->sdma_intrequest = 1; } - } - - if ((ofs << 2) > ppd->ibmaxlen) { - ret = -EMSGSIZE; - goto unlock; - } + nfree--; + if (pkt->addr[i].last_desc == 0) + continue; - /* - * If the packet is >= 2KB mtu equivalent, we have to use - * the large buffers, and have to mark each descriptor as - * part of a large buffer packet. - */ - if (ofs > dd->piosize2kmax_dwords) { - for (i = 0; i < pkt->naddr; i++) { - ppd->sdma_descq[dtail].qw[0] |= - cpu_to_le64(1ULL << 14); - if (++dtail == ppd->sdma_descq_cnt) - dtail = 0; + /* + * If the packet is >= 2KB mtu equivalent, we + * have to use the large buffers, and have to + * mark each descriptor as part of a large + * buffer packet. + */ + if (ofs > dd->piosize2kmax_dwords) { + for (j = pkt->index; j <= i; j++) { + ppd->sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == ppd->sdma_descq_cnt) + dtail = 0; + } } + c += i + 1 - pkt->index; + pkt->index = i + 1; /* index for next first */ + tail_c = dtail = tail; + gen_c = gen; + ofs = 0; /* reset for next packet */ } - ppd->sdma_descq_added += pkt->naddr; - pkt->added = ppd->sdma_descq_added; - list_move_tail(&pkt->list, &pq->sent); - ret++; + ppd->sdma_descq_added += c; + nsent += c; + if (pkt->index == pkt->naddr) { + pkt->added = ppd->sdma_descq_added; + pkt->pq->added = pkt->added; + pkt->pq->num_pending--; + spin_lock(&pkt->pq->sent_lock); + pkt->pq->num_sending++; + list_move_tail(&pkt->list, &pkt->pq->sent); + spin_unlock(&pkt->pq->sent_lock); + } + if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt) + break; } -unlock_check_tail: /* advance the tail on the chip if necessary */ - if (ppd->sdma_descq_tail != tail) - dd->f_sdma_update_tail(ppd, tail); + if (ppd->sdma_descq_tail != tail_c) { + ppd->sdma_generation = gen_c; + dd->f_sdma_update_tail(ppd, tail_c); + } + + if (nfree && !list_empty(pktlist)) + goto retry; + + return; +} + +/* pq->lock must be held, get packets on the wire... */ +static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist, int count) +{ + unsigned long flags; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; -unlock: - if (unlikely(ret < 0)) { - ppd->sdma_generation = generation; - ppd->sdma_descq_added = descq_added; + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; } - spin_unlock_irqrestore(&ppd->sdma_lock, flags); - return ret; + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + + pq->num_pending += count; + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); + + return 0; } int qib_user_sdma_writev(struct qib_ctxtdata *rcd, @@ -822,19 +1392,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, if (!qib_sdma_running(ppd)) goto done_unlock; - if (ppd->sdma_descq_added != ppd->sdma_descq_removed) { + /* if I have packets not complete yet */ + if (pq->added > ppd->sdma_descq_removed) qib_user_sdma_hwqueue_clean(ppd); + /* if I have complete packets to be freed */ + if (pq->num_sending) qib_user_sdma_queue_clean(ppd, pq); - } while (dim) { - const int mxp = 8; + int mxp = 1; + int ndesc = 0; - down_write(¤t->mm->mmap_sem); - ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); - up_write(¤t->mm->mmap_sem); - - if (ret <= 0) + ret = qib_user_sdma_queue_pkts(dd, ppd, pq, + iov, dim, &list, &mxp, &ndesc); + if (ret < 0) goto done_unlock; else { dim -= ret; @@ -844,24 +1415,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, /* force packets onto the sdma hw queue... */ if (!list_empty(&list)) { /* - * Lazily clean hw queue. the 4 is a guess of about - * how many sdma descriptors a packet will take (it - * doesn't have to be perfect). + * Lazily clean hw queue. */ - if (qib_sdma_descq_freecnt(ppd) < ret * 4) { + if (qib_sdma_descq_freecnt(ppd) < ndesc) { qib_user_sdma_hwqueue_clean(ppd); - qib_user_sdma_queue_clean(ppd, pq); + if (pq->num_sending) + qib_user_sdma_queue_clean(ppd, pq); } - ret = qib_user_sdma_push_pkts(ppd, pq, &list); + ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp); if (ret < 0) goto done_unlock; else { - npkts += ret; - pq->counter += ret; - - if (!list_empty(&list)) - goto done_unlock; + npkts += mxp; + pq->counter += mxp; } } } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index ba51a4715a1..9bcfbd84298 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) } else goto drop; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - ibp->opstats[opcode & 0x7f].n_bytes += tlen; - ibp->opstats[opcode & 0x7f].n_packets++; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; +#ifdef CONFIG_DEBUG_FS + rcd->opstats->stats[opcode].n_bytes += tlen; + rcd->opstats->stats[opcode].n_packets++; +#endif /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; @@ -660,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; - ibp->n_multicast_rcv++; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* @@ -676,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) &rcd->lookaside_qp->refcount)) wake_up( &rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } + rcd->lookaside_qp = NULL; + } } if (!rcd->lookaside_qp) { qp = qib_lookup_qpn(ibp, qp_num); @@ -687,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) rcd->lookaside_qpn = qp_num; } else qp = rcd->lookaside_qp; - ibp->n_unicast_rcv++; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); } return; @@ -2224,7 +2226,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->dma_ops = &qib_dma_mapping_ops; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), - "QLogic Infiniband HCA %s", init_utsname()->nodename); + "Intel Infiniband HCA %s", init_utsname()->nodename); ret = ib_register_device(ibdev, qib_create_port_files); if (ret) @@ -2234,7 +2236,8 @@ int qib_register_ib_device(struct qib_devdata *dd) if (ret) goto err_agents; - if (qib_verbs_register_sysfs(dd)) + ret = qib_verbs_register_sysfs(dd); + if (ret) goto err_class; goto bail; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index aff8b2c1788..bfc8948fdd3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/kthread.h> #include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> @@ -149,14 +150,14 @@ struct ib_reth { __be64 vaddr; __be32 rkey; __be32 length; -} __attribute__ ((packed)); +} __packed; struct ib_atomic_eth { __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ __be32 rkey; __be64 swap_data; __be64 compare_data; -} __attribute__ ((packed)); +} __packed; struct qib_other_headers { __be32 bth[3]; @@ -177,7 +178,7 @@ struct qib_other_headers { __be32 aeth; struct ib_atomic_eth atomic_eth; } u; -} __attribute__ ((packed)); +} __packed; /* * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes @@ -194,12 +195,12 @@ struct qib_ib_header { } l; struct qib_other_headers oth; } u; -} __attribute__ ((packed)); +} __packed; struct qib_pio_header { __le32 pbc[2]; struct qib_ib_header hdr; -} __attribute__ ((packed)); +} __packed; /* * There is one struct qib_mcast for each multicast GID. @@ -267,7 +268,8 @@ struct qib_cq_wc { */ struct qib_cq { struct ib_cq ibcq; - struct work_struct comptask; + struct kthread_work comptask; + struct qib_devdata *dd; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; @@ -658,6 +660,17 @@ struct qib_opcode_stats { u64 n_bytes; /* total number of bytes */ }; +struct qib_opcode_stats_perctx { + struct qib_opcode_stats stats[128]; +}; + +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + struct qib_ibport { struct qib_qp __rcu *qp0; struct qib_qp __rcu *qp1; @@ -674,10 +687,11 @@ struct qib_ibport { __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ u64 tid; /* TID for traps */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ u64 z_symbol_error_counter; /* starting count for PMA */ u64 z_link_error_recovery_counter; /* starting count for PMA */ u64 z_link_downed_counter; /* starting count for PMA */ @@ -724,7 +738,6 @@ struct qib_ibport { u8 vl_high_limit; u8 sl_to_vl[16]; - struct qib_opcode_stats opstats[128]; }; @@ -768,6 +781,10 @@ struct qib_ibdev { spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; +#ifdef CONFIG_DEBUG_FS + /* per HCA debugfs */ + struct dentry *qib_ibdev_dbg; +#endif }; struct qib_verbs_counters { @@ -832,8 +849,6 @@ static inline int qib_send_ok(struct qib_qp *qp) !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); } -extern struct workqueue_struct *qib_cq_wq; - /* * This must be called with s_lock held. */ @@ -910,6 +925,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); void qib_free_qpn_table(struct qib_qpn_table *qpt); +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); + +int qib_qp_iter_next(struct qib_qp_iter *iter); + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); + +#endif + void qib_get_credit(struct qib_qp *qp, u32 aeth); unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); @@ -972,6 +999,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qib_destroy_srq(struct ib_srq *ibsrq); +int qib_cq_init(struct qib_devdata *dd); + +void qib_cq_exit(struct qib_devdata *dd); + void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); |
