diff options
Diffstat (limited to 'drivers/infiniband/hw/qib')
37 files changed, 4490 insertions, 1549 deletions
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 8349f9c5064..495be09781b 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,7 +1,15 @@ config INFINIBAND_QIB - tristate "QLogic PCIe HCA support" + tristate "Intel PCIe HCA support" depends on 64BIT ---help--- - This is a low-level driver for QLogic PCIe QLE InfiniBand host - channel adapters. This driver does not support the QLogic + This is a low-level driver for Intel PCIe QLE InfiniBand host + channel adapters. This driver does not support the Intel HyperTransport card (model QHT7140). + +config INFINIBAND_QIB_DCA + bool "QIB DCA support" + depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m) + default y + ---help--- + Setting this enables DCA support on some Intel chip sets + with the iba7322 HCA. diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index f12d7bb8b39..57f8103e51f 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o +ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index b881bdc401f..c00ae093b6f 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,8 +1,8 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -51,6 +51,7 @@ #include <linux/completion.h> #include <linux/kref.h> #include <linux/sched.h> +#include <linux/kthread.h> #include "qib_common.h" #include "qib_verbs.h" @@ -87,8 +88,7 @@ struct qlogic_ib_stats { }; extern struct qlogic_ib_stats qib_stats; -extern struct pci_error_handlers qib_pci_err_handler; -extern struct pci_driver qib_driver; +extern const struct pci_error_handlers qib_pci_err_handler; #define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ /* @@ -114,6 +114,11 @@ struct qib_eep_log_mask { /* * Below contains all data related to a single context (formerly called port). */ + +#ifdef CONFIG_DEBUG_FS +struct qib_opcode_stats_perctx; +#endif + struct qib_ctxtdata { void **rcvegrbuf; dma_addr_t *rcvegrbuf_phys; @@ -154,6 +159,8 @@ struct qib_ctxtdata { */ /* instead of calculating it */ unsigned ctxt; + /* local node of context */ + int node_id; /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ @@ -222,12 +229,15 @@ struct qib_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* lookaside fields */ struct qib_qp *lookaside_qp; u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; +#ifdef CONFIG_DEBUG_FS + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +#endif }; struct qib_sge_state; @@ -427,6 +437,24 @@ struct qib_verbs_txreq { /* how often we check for packet activity for "power on hours (in seconds) */ #define ACTIVITY_TIMER 5 +#define MAX_NAME_SIZE 64 + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify; +#endif + +struct qib_msix_entry { + struct msix_entry msix; + void *arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca; + int rcv; + struct qib_irq_notify *notifier; +#endif + char name[MAX_NAME_SIZE]; + cpumask_var_t mask; +}; + /* Below is an opaque struct. Each chip (device) can maintain * private data needed for its operation, but not germane to the * rest of the driver. For convenience, we define another that @@ -511,6 +539,7 @@ struct qib_pportdata { struct qib_devdata *dd; struct qib_chippport_specific *cpspec; /* chip-specific per-port */ struct kobject pport_kobj; + struct kobject pport_cc_kobj; struct kobject sl2vl_kobj; struct kobject diagc_kobj; @@ -522,8 +551,6 @@ struct qib_pportdata { /* qib_lflags driver is waiting for */ u32 state_wanted; spinlock_t lflags_lock; - /* number of (port-specific) interrupts for this port -- saturates... */ - u32 int_counter; /* ref count for each pkey */ atomic_t pkeyrefs[4]; @@ -535,24 +562,29 @@ struct qib_pportdata { u64 *statusp; /* SendDMA related entries */ - spinlock_t sdma_lock; - struct qib_sdma_state sdma_state; - unsigned long sdma_buf_jiffies; + + /* read mostly */ struct qib_sdma_desc *sdma_descq; + struct workqueue_struct *qib_wq; + struct qib_sdma_state sdma_state; + dma_addr_t sdma_descq_phys; + volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ + dma_addr_t sdma_head_phys; + u16 sdma_descq_cnt; + + /* read/write using lock */ + spinlock_t sdma_lock ____cacheline_aligned_in_smp; + struct list_head sdma_activelist; + struct list_head sdma_userpending; u64 sdma_descq_added; u64 sdma_descq_removed; - u16 sdma_descq_cnt; u16 sdma_descq_tail; u16 sdma_descq_head; - u16 sdma_next_intr; - u16 sdma_reset_wait; u8 sdma_generation; - struct tasklet_struct sdma_sw_clean_up_task; - struct list_head sdma_activelist; + u8 sdma_intrequest; - dma_addr_t sdma_descq_phys; - volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ - dma_addr_t sdma_head_phys; + struct tasklet_struct sdma_sw_clean_up_task + ____cacheline_aligned_in_smp; wait_queue_head_t state_wait; /* for state_wanted */ @@ -629,6 +661,39 @@ struct qib_pportdata { struct timer_list led_override_timer; struct xmit_wait cong_stats; struct timer_list symerr_clear_timer; + + /* Synchronize access between driver writes and sysfs reads */ + spinlock_t cc_shadow_lock + ____cacheline_aligned_in_smp; + + /* Shadow copy of the congestion control table */ + struct cc_table_shadow *ccti_entries_shadow; + + /* Shadow copy of the congestion control entries */ + struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow; + + /* List of congestion control table entries */ + struct ib_cc_table_entry_shadow *ccti_entries; + + /* 16 congestion entries with each entry corresponding to a SL */ + struct ib_cc_congestion_entry_shadow *congestion_entries; + + /* Maximum number of congestion control entries that the agent expects + * the manager to send. + */ + u16 cc_supported_table_entries; + + /* Total number of congestion control table entries */ + u16 total_cct_entry; + + /* Bit map identifying service level */ + u16 cc_sl_control_map; + + /* maximum congestion control table index */ + u16 ccti_limit; + + /* CA's max number of 64 entry units in the congestion control table */ + u8 cc_max_table_entries; }; /* Observers. Not to be taken lightly, possibly not to ship. */ @@ -785,6 +850,9 @@ struct qib_devdata { struct qib_ctxtdata *); void (*f_writescratch)(struct qib_devdata *, u32); int (*f_tempsense_rd)(struct qib_devdata *, int regnum); +#ifdef CONFIG_INFINIBAND_QIB_DCA + int (*f_notify_dca)(struct qib_devdata *, unsigned long event); +#endif char *boardname; /* human readable board info */ @@ -800,8 +868,10 @@ struct qib_devdata { /* last buffer for user use */ u32 lastctxt_piobuf; - /* saturating counter of (non-port-specific) device interrupts */ - u32 int_counter; + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; /* pio bufs allocated per ctxt */ u32 pbufsctxt; @@ -865,7 +935,14 @@ struct qib_devdata { * pio_writing. */ spinlock_t pioavail_lock; - + /* + * index of last buffer to optimize search for next + */ + u32 last_pio; + /* + * min kernel pio buffer to optimize search + */ + u32 min_kernel_pio; /* * Shadow copies of registers; size indicates read access size. * Most of them are readonly, but some are write-only register, @@ -1025,6 +1102,10 @@ struct qib_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors defered to tasklet */ struct tasklet_struct error_tasklet; + /* per device cq worker */ + struct kthread_worker *worker; + + int assigned_node_id; /* NUMA node closest to HCA */ }; /* hol_state values */ @@ -1062,6 +1143,7 @@ extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; extern unsigned qib_wc_pat; +extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); int qib_enable_wc(struct qib_devdata *dd); @@ -1103,8 +1185,8 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); -void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); @@ -1247,11 +1329,18 @@ int qib_setup_sdma(struct qib_pportdata *); void qib_teardown_sdma(struct qib_pportdata *); void __qib_sdma_intr(struct qib_pportdata *); void qib_sdma_intr(struct qib_pportdata *); +void qib_user_sdma_send_desc(struct qib_pportdata *dd, + struct list_head *pktlist); int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, u32, struct qib_verbs_txreq *); /* ppd->sdma_lock should be locked before calling this. */ int qib_sdma_make_progress(struct qib_pportdata *dd); +static inline int qib_sdma_empty(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_added == ppd->sdma_descq_removed; +} + /* must be called under qib_sdma_lock */ static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) { @@ -1264,7 +1353,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd) return ppd->sdma_state.current_state == qib_sdma_state_s99_running; } int qib_sdma_running(struct qib_pportdata *); - +void dump_sdma_state(struct qib_pportdata *ppd); void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); @@ -1355,13 +1444,17 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); -int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct msix_entry *); +int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); int qib_reinit_intr(struct qib_devdata *); void qib_enable_intx(struct pci_dev *); void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw @@ -1389,6 +1482,7 @@ extern unsigned qib_n_krcv_queues; extern unsigned qib_sdma_fetch_arb; extern unsigned qib_compat_ddr_negotiate; extern int qib_special_trigger; +extern unsigned qib_numa_aware; extern struct mutex qib_mutex; @@ -1418,27 +1512,23 @@ extern struct mutex qib_mutex; * first to avoid possible serial port delays from printk. */ #define qib_early_err(dev, fmt, ...) \ - do { \ - dev_err(dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_err(dev, fmt, ##__VA_ARGS__) #define qib_dev_err(dd, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ - qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) #define qib_dev_porterr(dd, port, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) #define qib_devinfo(pcidev, fmt, ...) \ - do { \ - dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index 145da404088..5670ace27c6 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,13 +279,12 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 11 +#define QIB_USER_SWMINOR 13 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) #ifndef QIB_KERN_TYPE #define QIB_KERN_TYPE 0 -#define QIB_IDSTR "QLogic kernel.org driver" #endif /* @@ -302,6 +301,19 @@ struct qib_base_info { #define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) /* + * Define the driver version number. This is something that refers only + * to the driver itself, not the software interfaces it supports. + */ +#define QIB_DRIVER_VERSION_BASE "1.11" + +/* create the final driver version string */ +#ifdef QIB_IDSTR +#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR +#else +#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE +#endif + +/* * If the unit is specified via open, HCA choice is fixed. If port is * specified, it's also fixed. Otherwise we try to spread contexts * across ports and HCAs, using different algorithims. WITHIN is @@ -689,7 +701,37 @@ struct qib_message_header { __be32 bth[3]; /* fields below this point are in host byte order */ struct qib_header iph; + /* fields below are simplified, but should match PSM */ + /* some are accessed by driver when packet spliting is needed */ __u8 sub_opcode; + __u8 flags; + __u16 commidx; + __u32 ack_seq_num; + __u8 flowid; + __u8 hdr_dlen; + __u16 mqhdr; + __u32 uwords[4]; +}; + +/* sequence number bits for message */ +union qib_seqnum { + struct { + __u32 seq:11; + __u32 gen:8; + __u32 flow:5; + }; + struct { + __u32 pkt:16; + __u32 msg:8; + }; + __u32 val; +}; + +/* qib receiving-dma tid-session-member */ +struct qib_tid_session_member { + __u16 tid; + __u16 offset; + __u16 length; }; /* IB - LRH header consts */ diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index 5246aa486bb..ab4e11cfab1 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -34,8 +35,10 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/kthread.h> #include "qib_verbs.h" +#include "qib.h" /** * qib_cq_enter - add a new entry to the completion queue @@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - cq->notify = IB_CQ_NONE; - cq->triggered++; + struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - queue_work(qib_cq_wq, &cq->comptask); + smp_rmb(); + worker = cq->dd->worker; + if (likely(worker)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } } spin_unlock_irqrestore(&cq->lock, flags); @@ -163,7 +171,7 @@ bail: return npolled; } -static void send_complete(struct work_struct *work) +static void send_complete(struct kthread_work *work) { struct qib_cq *cq = container_of(work, struct qib_cq, comptask); @@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, * The number of entries should be >= the number requested or return * an error. */ + cq->dd = dd_from_dev(dev); cq->ibcq.cqe = entries; cq->notify = IB_CQ_NONE; cq->triggered = 0; spin_lock_init(&cq->lock); - INIT_WORK(&cq->comptask, send_complete); + init_kthread_work(&cq->comptask, send_complete); wc->head = 0; wc->tail = 0; cq->queue = wc; @@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq) struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_cq *cq = to_icq(ibcq); - flush_work(&cq->comptask); + flush_kthread_work(&cq->comptask); spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); @@ -483,3 +492,49 @@ bail_free: bail: return ret; } + +int qib_cq_init(struct qib_devdata *dd) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (dd->worker) + return 0; + dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); + if (!dd->worker) + return -ENOMEM; + init_kthread_worker(dd->worker); + task = kthread_create_on_node( + kthread_worker_fn, + dd->worker, + dd->assigned_node_id, + "qib_cq%d", dd->unit); + if (IS_ERR(task)) + goto task_fail; + cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); + kthread_bind(task, cpu); + wake_up_process(task); +out: + return ret; +task_fail: + ret = PTR_ERR(task); + kfree(dd->worker); + dd->worker = NULL; + goto out; +} + +void qib_cq_exit(struct qib_devdata *dd) +{ + struct kthread_worker *worker; + + worker = dd->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + dd->worker = NULL; + smp_wmb(); + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); +} diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c new file mode 100644 index 00000000000..799a0c3bffc --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -0,0 +1,283 @@ +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/export.h> + +#include "qib.h" +#include "qib_verbs.h" +#include "qib_debugfs.h" + +static struct dentry *qib_dbg_root; + +#define DEBUGFS_FILE(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ +}; \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ +} \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = seq_release \ +}; + +#define DEBUGFS_FILE_CREATE(name) \ +do { \ + struct dentry *ent; \ + ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ + ibd, &_##name##_file_ops); \ + if (!ent) \ + pr_warn("create of " #name " failed\n"); \ +} while (0) + +static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + + +static void _opcode_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu\n", i, + (unsigned long long) n_packets, + (unsigned long long) n_bytes); + + return 0; +} + +DEBUGFS_FILE(opcode_stats) + +static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (!*pos) + return SEQ_START_TOKEN; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) + return pos; + + ++*pos; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void _ctx_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _ctx_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos; + loff_t i, j; + u64 n_packets = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) { + seq_puts(s, "Ctx:npkts\n"); + return 0; + } + + spos = v; + i = *spos; + + if (!dd->rcd[i]) + return SEQ_SKIP; + + for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) + n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + + if (!n_packets) + return SEQ_SKIP; + + seq_printf(s, " %llu:%llu\n", i, n_packets); + return 0; +} + +DEBUGFS_FILE(ctx_stats) + +static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_qp_iter *iter; + loff_t n = *pos; + + iter = qib_qp_iter_init(s->private); + if (!iter) + return NULL; + + while (n--) { + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, + loff_t *pos) +{ + struct qib_qp_iter *iter = iter_ptr; + + (*pos)++; + + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) +{ + /* nothing for now */ +} + +static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) +{ + struct qib_qp_iter *iter = iter_ptr; + + if (!iter) + return 0; + + qib_qp_iter_print(s, iter); + + return 0; +} + +DEBUGFS_FILE(qp_stats) + +void qib_dbg_ibdev_init(struct qib_ibdev *ibd) +{ + char name[10]; + + snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); + ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); + if (!ibd->qib_ibdev_dbg) { + pr_warn("create of %s failed\n", name); + return; + } + DEBUGFS_FILE_CREATE(opcode_stats); + DEBUGFS_FILE_CREATE(ctx_stats); + DEBUGFS_FILE_CREATE(qp_stats); + return; +} + +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) +{ + if (!qib_dbg_root) + goto out; + debugfs_remove_recursive(ibd->qib_ibdev_dbg); +out: + ibd->qib_ibdev_dbg = NULL; +} + +void qib_dbg_init(void) +{ + qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); + if (!qib_dbg_root) + pr_warn("init of debugfs failed\n"); +} + +void qib_dbg_exit(void) +{ + debugfs_remove_recursive(qib_dbg_root); + qib_dbg_root = NULL; +} + +#endif + diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h new file mode 100644 index 00000000000..7ae983a91b8 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.h @@ -0,0 +1,45 @@ +#ifndef _QIB_DEBUGFS_H +#define _QIB_DEBUGFS_H + +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +struct qib_ibdev; +void qib_dbg_ibdev_init(struct qib_ibdev *ibd); +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd); +void qib_dbg_init(void); +void qib_dbg_exit(void); + +#endif + +#endif /* _QIB_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 9892456a434..5dfda4c5cc9 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -53,6 +53,9 @@ #include "qib.h" #include "qib_common.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + /* * Each client that opens the diag device must read then write * offset 0, to prevent lossage from random cat or od. diag_state @@ -543,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct qib_diag_xpkt dp; u32 *tmpbuf = NULL; struct qib_devdata *dd; @@ -587,19 +590,24 @@ static ssize_t qib_diagpkt_write(struct file *fp, } ppd = &dd->pport[dp.port - 1]; - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - clen = dp.len >> 2; - - if ((plen + 4) > ppd->ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { - qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, " - "failing\n"); + qib_devinfo(dd->pcidev, + "Unable to allocate tmp buffer, failing\n"); ret = -ENOMEM; goto bail; } @@ -635,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp, */ if (dd->flags & QIB_PIO_FLUSH_WC) { qib_flush_wc(); - qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); qib_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - qib_pio_copy(piobuf + 2, tmpbuf, clen); + qib_pio_copy(piobuf + 2, tmpbuf, plen); if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; @@ -686,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd, const struct diag_observer *op) { struct diag_observer_list_elt *olp; - int ret = -EINVAL; + unsigned long flags; if (!dd || !op) - goto bail; - ret = -ENOMEM; + return -EINVAL; olp = vmalloc(sizeof *olp); if (!olp) { - printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n"); - goto bail; + pr_err("vmalloc for observer failed\n"); + return -ENOMEM; } - if (olp) { - unsigned long flags; - spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); - olp->op = op; - olp->next = dd->diag_observer_list; - dd->diag_observer_list = olp; - spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); - ret = 0; - } -bail: - return ret; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; } /* Remove all registered observers when device is closed */ diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c index 2920bb39a65..59fe092b4b0 100644 --- a/drivers/infiniband/hw/qib/qib_dma.c +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int qib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = { .unmap_page = qib_dma_unmap_page, .map_sg = qib_map_sg, .unmap_sg = qib_unmap_sg, - .dma_address = qib_sg_dma_address, - .dma_len = qib_sg_dma_len, .sync_single_for_cpu = qib_sync_single_for_cpu, .sync_single_for_device = qib_sync_single_for_device, .alloc_coherent = qib_dma_alloc_coherent, diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 6fc9365ba8a..5bee08f16d7 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -38,6 +39,7 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/prefetch.h> #include "qib.h" @@ -45,7 +47,7 @@ * The size has to be longer than this string, so we can append * board/chip information to it in the init code. */ -const char ib_qib_version[] = QIB_IDSTR "\n"; +const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; DEFINE_SPINLOCK(qib_devs_lock); LIST_HEAD(qib_dev_list); @@ -62,8 +64,9 @@ MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("QLogic <support@qlogic.com>"); -MODULE_DESCRIPTION("QLogic IB driver"); +MODULE_AUTHOR("Intel <ibsupport@intel.com>"); +MODULE_DESCRIPTION("Intel IB driver"); +MODULE_VERSION(QIB_DRIVER_VERSION); /* * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our @@ -481,8 +484,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) etail = qib_hdrget_index(rhf_addr); updegr = 1; if (tlen > sizeof(*hdr) || - etype >= RCVHQ_RCV_TYPE_NON_KD) + etype >= RCVHQ_RCV_TYPE_NON_KD) { ebuf = qib_get_egrbuf(rcd, etail); + prefetch_range(ebuf, tlen - sizeof(*hdr)); + } } if (!eflags) { u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; @@ -553,7 +558,6 @@ move_along: } rcd->head = l; - rcd->pkt_count += i; /* * Iterate over all QPs waiting to respond. @@ -761,8 +765,9 @@ int qib_reset_device(int unit) qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { - qib_devinfo(dd->pcidev, "Invalid unit number %u or " - "not initialized or not present\n", unit); + qib_devinfo(dd->pcidev, + "Invalid unit number %u or not initialized or not present\n", + unit); ret = -ENXIO; goto bail; } @@ -799,11 +804,13 @@ int qib_reset_device(int unit) else ret = -EAGAIN; if (ret) - qib_dev_err(dd, "Reinitialize unit %u after " - "reset failed with %d\n", unit, ret); + qib_dev_err(dd, + "Reinitialize unit %u after reset failed with %d\n", + unit, ret); else - qib_devinfo(dd->pcidev, "Reinitialized unit %u after " - "resetting\n", unit); + qib_devinfo(dd->pcidev, + "Reinitialized unit %u after resetting\n", + unit); bail: return ret; diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 92d9cfe98a6..4d5d71aaa2b 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -160,10 +161,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) if (oguid > bguid[7]) { if (bguid[6] == 0xff) { if (bguid[5] == 0xff) { - qib_dev_err(dd, "Can't set %s GUID" - " from base, wraps to" - " OUI!\n", - qib_get_unit_name(t)); + qib_dev_err(dd, + "Can't set %s GUID from base, wraps to OUI!\n", + qib_get_unit_name(t)); dd->base_guid = 0; goto bail; } @@ -182,8 +182,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) len = sizeof(struct qib_flash); buf = vmalloc(len); if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for GUID\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", + len); goto bail; } @@ -201,23 +202,25 @@ void qib_get_eeprom_info(struct qib_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - qib_devinfo(dd->pcidev, "Bad I2C flash checksum: " - "0x%x, not 0x%x\n", csum, ifp->if_csum); + qib_devinfo(dd->pcidev, + "Bad I2C flash checksum: 0x%x, not 0x%x\n", + csum, ifp->if_csum); goto done; } if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { - qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n", - *(unsigned long long *) ifp->if_guid); + qib_dev_err(dd, + "Invalid GUID %llx from flash; ignoring\n", + *(unsigned long long *) ifp->if_guid); /* don't allow GUID if all 0 or all 1's */ goto done; } /* complain, but allow it */ if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) - qib_devinfo(dd->pcidev, "Warning, GUID %llx is " - "default, probably not correct!\n", - *(unsigned long long *) ifp->if_guid); + qib_devinfo(dd->pcidev, + "Warning, GUID %llx is default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); bguid = ifp->if_guid; if (!bguid[0] && !bguid[1] && !bguid[2]) { @@ -260,8 +263,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) memcpy(dd->serial, ifp->if_serial, sizeof ifp->if_serial); if (!strstr(ifp->if_comment, "Tested successfully")) - qib_dev_err(dd, "Board SN %s did not pass functional " - "test: %s\n", dd->serial, ifp->if_comment); + qib_dev_err(dd, + "Board SN %s did not pass functional test: %s\n", + dd->serial, ifp->if_comment); memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); /* @@ -323,8 +327,9 @@ int qib_update_eeprom_log(struct qib_devdata *dd) buf = vmalloc(len); ret = 1; if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for logging\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for logging\n", + len); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index a7403248d83..b15e34eeef6 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -39,7 +39,7 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/uio.h> +#include <linux/aio.h> #include <linux/jiffies.h> #include <asm/pgtable.h> #include <linux/delay.h> @@ -49,6 +49,9 @@ #include "qib_common.h" #include "qib_user_sdma.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + static int qib_open(struct inode *, struct file *); static int qib_close(struct inode *, struct file *); static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); @@ -315,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, } if (cnt > tidcnt) { /* make sure it all fits in tid_pg_list */ - qib_devinfo(dd->pcidev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); + qib_devinfo(dd->pcidev, + "Process tried to allocate %u TIDs, only trying max (%u)\n", + cnt, tidcnt); cnt = tidcnt; } pagep = (struct page **) rcd->tid_pg_list; @@ -750,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x " - "bytes failed: %d\n", what, rcd->ctxt, - pfn, len, ret); + qib_devinfo(dd->pcidev, + "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", + what, rcd->ctxt, pfn, len, ret); bail: return ret; } @@ -771,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, */ sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; if ((vma->vm_end - vma->vm_start) > sz) { - qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); + qib_devinfo(dd->pcidev, + "FAIL mmap userreg: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); ret = -EFAULT; } else { phys = dd->physaddr + ureg; @@ -802,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, * for it. */ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { - qib_devinfo(dd->pcidev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", + qib_devinfo(dd->pcidev, + "FAIL mmap piobufs: reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); ret = -EINVAL; goto bail; @@ -847,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, size = rcd->rcvegrbuf_size; total_size = rcd->rcvegrbuf_chunks * size; if ((vma->vm_end - vma->vm_start) > total_size) { - qib_devinfo(dd->pcidev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", + qib_devinfo(dd->pcidev, + "FAIL on egr bufs: reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); ret = -EINVAL; @@ -856,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, } if (vma->vm_flags & VM_WRITE) { - qib_devinfo(dd->pcidev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); + qib_devinfo(dd->pcidev, + "Can't map eager buffers as writable (flags=%lx)\n", + vma->vm_flags); ret = -EPERM; goto bail; } @@ -965,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &qib_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: @@ -1149,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) return pollflag; } +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + /* * Check that userland and driver are compatible for subcontexts. */ @@ -1171,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor) return user_swminor == 3; default: /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; + return user_swminor <= QIB_USER_SWMINOR; } } /* make no promises yet for future major versions */ @@ -1253,12 +1302,20 @@ bail: static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct file *fp, const struct qib_user_info *uinfo) { + struct qib_filedata *fd = fp->private_data; struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; + int numa_id; + + assign_ctxt_affinity(fp, dd); - rcd = qib_create_ctxtdata(ppd, ctxt); + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); /* * Allocate memory for use in qib_tid_update() at open to @@ -1270,8 +1327,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, GFP_KERNEL); if (!rcd || !ptmp) { - qib_dev_err(dd, "Unable to allocate ctxtdata " - "memory, failing open\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata memory, failing open\n"); ret = -ENOMEM; goto bailerr; } @@ -1290,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, goto bail; bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + dd->rcd[ctxt] = NULL; kfree(rcd); kfree(ptmp); @@ -1399,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, cused++; else cfree++; - if (pusable && cfree && cused < inuse) { + if (cfree && cused < inuse) { udd = dd; inuse = cused; } @@ -1479,6 +1539,58 @@ static int qib_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) { + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + } + + return 0; +} + /* * Get ctxt early, so can set affinity prior to memory allocation. */ @@ -1511,61 +1623,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (qib_compatible_subctxts(swmajor, swminor) && uinfo->spu_subctxt_cnt) { ret = find_shared_ctxt(fp, uinfo); - if (ret) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; } } - i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); - else + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } ret = get_a_ctxt(fp, uinfo, alg); - -done_chk_sdma: - if (!ret) { - struct qib_filedata *fd = fp->private_data; - const struct qib_ctxtdata *rcd = fd->rcd; - const struct qib_devdata *dd = rcd->dd; - unsigned int weight; - - if (dd->flags & QIB_HAS_SEND_DMA) { - fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, - dd->unit, - rcd->ctxt, - fd->subctxt); - if (!fd->pq) - ret = -ENOMEM; - } - - /* - * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendezvous for all - * users of the driver. If they don't actually later - * set affinity to this cpu, or set it to some other cpu, - * it just means that sooner or later we don't recommend - * a cpu, and let the scheduler do it's best. - */ - weight = cpumask_weight(tsk_cpus_allowed(current)); - if (!ret && weight >= qib_cpulist_count) { - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); - if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } - } else if (weight == 1 && - test_bit(cpumask_first(tsk_cpus_allowed(current)), - qib_cpulist)) - qib_devinfo(dd->pcidev, "%s PID %u affinity " - "set to cpu %d; already allocated\n", - current->comm, current->pid, - cpumask_first(tsk_cpus_allowed(current))); } +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: mutex_unlock(&qib_mutex); done: @@ -2185,8 +2272,7 @@ int qib_cdev_init(int minor, const char *name, cdev = cdev_alloc(); if (!cdev) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", + pr_err("Could not allocate cdev for minor %d, %s\n", minor, name); ret = -ENOMEM; goto done; @@ -2198,19 +2284,17 @@ int qib_cdev_init(int minor, const char *name, ret = cdev_add(cdev, dev, 1); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", + pr_err("Could not add cdev for minor %d, %s (err %d)\n", minor, name, -ret); goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, name); + device = device_create(qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); device = NULL; - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", + pr_err("Could not create device for minor %d, %s (err %d)\n", minor, name, -ret); err_cdev: cdev_del(cdev); @@ -2245,16 +2329,14 @@ int __init qib_dev_init(void) ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME ": Could not allocate " - "chrdev region (err %d)\n", -ret); + pr_err("Could not allocate chrdev region (err %d)\n", -ret); goto done; } qib_class = class_create(THIS_MODULE, "ipath"); if (IS_ERR(qib_class)) { ret = PTR_ERR(qib_class); - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); + pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 05e0f17c5b4..cab610ccd50 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -44,7 +45,7 @@ static struct super_block *qib_super; -#define private2dd(file) ((file)->f_dentry->d_inode->i_private) +#define private2dd(file) (file_inode(file)->i_private) static int qibfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *fops, @@ -60,8 +61,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_atime = CURRENT_TIME; inode->i_mtime = inode->i_atime; @@ -104,6 +105,7 @@ static int create_file(const char *name, umode_t mode, static ssize_t driver_stats_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + qib_stats.sps_ints = qib_sps_ints(); return simple_read_from_buffer(buf, count, ppos, &qib_stats, sizeof qib_stats); } @@ -170,7 +172,7 @@ static const struct file_operations cntr_ops[] = { }; /* - * Could use file->f_dentry->d_inode->i_ino to figure out which file, + * Could use file_inode(file)->i_ino to figure out which file, * instead of separate routine for each, but for now, this works... */ @@ -382,7 +384,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, &simple_dir_operations, dd); if (ret) { - printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); + pr_err("create_file(%s) failed: %d\n", unit, ret); goto bail; } @@ -390,21 +392,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counters) failed: %d\n", + pr_err("create_file(%s/counters) failed: %d\n", unit, ret); goto bail; } ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n", + pr_err("create_file(%s/counter_names) failed: %d\n", unit, ret); goto bail; } ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, "portcounter_names", ret); goto bail; } @@ -416,7 +418,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[i], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -426,7 +428,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &qsfp_ops[i - 1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -435,7 +437,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, &flash_ops, dd); if (ret) - printk(KERN_ERR "create_file(%s/flash) failed: %d\n", + pr_err("create_file(%s/flash) failed: %d\n", unit, ret); bail: return ret; @@ -455,13 +457,13 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); } else { spin_unlock(&tmp->d_lock); } + dput(tmp); ret = 0; bail: @@ -486,10 +488,11 @@ static int remove_device_files(struct super_block *sb, if (IS_ERR(dir)) { ret = PTR_ERR(dir); - printk(KERN_ERR "Lookup of %s failed\n", unit); + pr_err("Lookup of %s failed\n", unit); goto bail; } + mutex_lock(&dir->d_inode->i_mutex); remove_file(dir, "counters"); remove_file(dir, "counter_names"); remove_file(dir, "portcounter_names"); @@ -504,8 +507,10 @@ static int remove_device_files(struct super_block *sb, } } remove_file(dir, "flash"); - d_delete(dir); + mutex_unlock(&dir->d_inode->i_mutex); ret = simple_rmdir(root->d_inode, dir); + d_delete(dir); + dput(dir); bail: mutex_unlock(&root->d_inode->i_mutex); @@ -532,7 +537,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) ret = simple_fill_super(sb, QIBFS_MAGIC, files); if (ret) { - printk(KERN_ERR "simple_fill_super failed: %d\n", ret); + pr_err("simple_fill_super failed: %d\n", ret); goto bail; } @@ -603,6 +608,7 @@ static struct file_system_type qibfs_fs_type = { .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init qib_init_qibfs(void) { diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index d0c64d51481..d68266ac761 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64); /* * This file contains all the chip-specific register information and - * access functions for the QLogic QLogic_IB PCI-Express chip. + * access functions for the Intel Intel_IB PCI-Express chip. * */ @@ -753,8 +754,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) return; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); return; } qib_stats.sps_hwerrs++; @@ -779,13 +780,14 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, * or it's occurred within the last 5 seconds. */ if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT)); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { @@ -815,8 +817,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware" - " unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -868,8 +871,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, *msg = 0; /* recovered from all of them */ if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1017,9 +1021,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) qib_inc_eeprom_err(dd, log_idx, 1); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", - (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_6120_senderrbufs(ppd); @@ -1089,8 +1093,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1541,8 +1545,9 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); handle_6120_errors(dd, estat); } @@ -1629,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) @@ -1715,16 +1718,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) } if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret; ret = request_irq(dd->cspec->irq, qib_6120intr, 0, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup interrupt " - "(irq=%d): %d\n", dd->cspec->irq, - ret); + qib_dev_err(dd, + "Couldn't setup interrupt (irq=%d): %d\n", + dd->cspec->irq, ret); } } @@ -1759,8 +1762,9 @@ static void pe_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware revision " - "%u.%u!\n", dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -1802,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler re-ordering around actual reset */ @@ -1833,8 +1838,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ qib_6120_init_hwerrors(dd); /* for Rev2 error interrupts; nop for rev 1 */ @@ -1876,8 +1881,9 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -1941,8 +1947,9 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2928,8 +2935,9 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->dd->unit, ppd->port); } else if (!strncmp(what, "off", 3)) { ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3132,6 +3140,7 @@ static void get_6120_chip_params(struct qib_devdata *dd) val = qib_read_kreg64(dd, kr_sendpiobufcnt); dd->piobcnt2k = val & ~0U; dd->piobcnt4k = val >> 32; + dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1; /* these may be adjusted in init_chip_wc_pat() */ dd->pio2kbase = (u32 __iomem *) (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase); @@ -3185,11 +3194,10 @@ static int qib_late_6120_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } return ret; @@ -3217,8 +3225,8 @@ static int init_6120_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -3257,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd) dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR; ppd->link_width_enabled = IB_WIDTH_4X; @@ -3455,6 +3465,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) return -ENXIO; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 6120 boards never disable EEPROM Write */ static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -3530,6 +3547,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_6120_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_6120_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_6120_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. @@ -3550,8 +3570,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, goto bail; if (qib_pcie_params(dd, 8, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ /* clear diagctrl register, in case diags were running and crashed */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 3c722f79d6f..7dec89fdc12 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1111,9 +1111,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) sdma_7220_errors(ppd, errs); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_7220_senderrbufs(ppd); @@ -1192,8 +1192,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1305,8 +1305,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -1329,13 +1329,14 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_inc_eeprom_err(dd, log_idx, 1); if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | RXE_PARITY)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT)); if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) qib_sd7220_clr_ibpar(dd); @@ -1362,8 +1363,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -1409,8 +1411,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * For /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1918,8 +1921,9 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); else handle_7220_errors(dd, estat); } @@ -1958,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; - + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) unlikely_7220_intr(dd, istat); @@ -2023,17 +2024,18 @@ bail: static void qib_setup_7220_interrupt(struct qib_devdata *dd) { if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret = request_irq(dd->cspec->irq, qib_7220intr, dd->msi_lo ? 0 : IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup %s interrupt " - "(irq=%d): %d\n", dd->msi_lo ? - "MSI" : "INTx", dd->cspec->irq, ret); + qib_dev_err(dd, + "Couldn't setup %s interrupt (irq=%d): %d\n", + dd->msi_lo ? "MSI" : "INTx", + dd->cspec->irq, ret); } } @@ -2072,9 +2074,9 @@ static void qib_7220_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware " - "revision %u.%u!\n", - dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -2115,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler reordering around actual reset */ @@ -2146,8 +2149,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* hold IBC in reset, no sends, etc till later */ qib_write_kreg(dd, kr_control, 0ULL); @@ -2187,8 +2190,9 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2706,8 +2710,9 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); /* enable heart beat again */ val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3307,8 +3312,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) if (!dd->msi_lo) return 0; - qib_devinfo(dd->pcidev, "MSI interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); qib_enable_intx(dd->pcidev); /* @@ -3980,11 +3985,10 @@ static int qib_late_7220_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } qib_register_observer(dd, &sendctrl_observer); @@ -4014,8 +4018,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -4055,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; @@ -4157,6 +4163,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->cspec->sdmabufcnt; dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; dd->pbufsctxt = dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt); @@ -4506,6 +4513,13 @@ bail: return ret; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 7220 boards never disable EEPROM Write */ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -4580,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_7220_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7220_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7220_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. @@ -4612,8 +4629,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, break; } if (qib_pcie_params(dd, minwidth, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* save IRQ for possible later use */ dd->cspec->irq = pdev->irq; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 41e92089e41..a7eb32517a0 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,12 +44,19 @@ #include <linux/module.h> #include <rdma/ib_verbs.h> #include <rdma/ib_smi.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_7322_regs.h" #include "qib_qsfp.h" #include "qib_mad.h" +#include "qib_verbs.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME " " fmt static void qib_setup_7322_setextled(struct qib_pportdata *, u32); static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); @@ -75,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned); static void serdes_7322_los_enable(struct qib_pportdata *, int); static int serdes_7322_init_old(struct qib_pportdata *); static int serdes_7322_init_new(struct qib_pportdata *); +static void dump_sdma_7322_state(struct qib_pportdata *); #define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) @@ -514,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = { [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify { + int rcv; + void *arg; + struct irq_affinity_notify notify; +}; +#endif + struct qib_chip_specific { u64 __iomem *cregbase; u64 *cntrs; @@ -541,8 +558,13 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 stay_in_freeze; u32 recovery_ports_initted; - struct msix_entry *msix_entries; - void **msix_arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + u32 dca_ctrl; + int rhdr_cpu[18]; + int sdma_cpu[2]; + u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */ +#endif + struct qib_msix_entry *msix_entries; unsigned long *sendchkenable; unsigned long *sendgrhchk; unsigned long *sendibchk; @@ -569,7 +591,7 @@ struct vendor_txdds_ent { static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ -#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ +#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */ #define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ @@ -631,6 +653,7 @@ struct qib_chippport_specific { u8 ibmalfusesnap; struct qib_qsfp_data qsfp_data; char epmsgbuf[192]; /* for port error interrupt msg buffer */ + char sdmamsgbuf[192]; /* for per-port sdma error messages */ }; static struct { @@ -638,28 +661,76 @@ static struct { irq_handler_t handler; int lsb; int port; /* 0 if not port-specific, else port # */ + int dca; } irq_table[] = { - { QIB_DRV_NAME, qib_7322intr, -1, 0 }, - { QIB_DRV_NAME " (buf avail)", qib_7322bufavail, - SYM_LSB(IntStatus, SendBufAvail), 0 }, - { QIB_DRV_NAME " (sdma 0)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_0), 1 }, - { QIB_DRV_NAME " (sdma 1)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_1), 2 }, - { QIB_DRV_NAME " (sdmaI 0)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, - { QIB_DRV_NAME " (sdmaI 1)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, - { QIB_DRV_NAME " (sdmaP 0)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, - { QIB_DRV_NAME " (sdmaP 1)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, - { QIB_DRV_NAME " (sdmaC 0)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, - { QIB_DRV_NAME " (sdmaC 1)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, + { "", qib_7322intr, -1, 0, 0 }, + { " (buf avail)", qib_7322bufavail, + SYM_LSB(IntStatus, SendBufAvail), 0, 0}, + { " (sdma 0)", sdma_intr, + SYM_LSB(IntStatus, SDmaInt_0), 1, 1 }, + { " (sdma 1)", sdma_intr, + SYM_LSB(IntStatus, SDmaInt_1), 2, 1 }, + { " (sdmaI 0)", sdma_idle_intr, + SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1}, + { " (sdmaI 1)", sdma_idle_intr, + SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1}, + { " (sdmaP 0)", sdma_progress_intr, + SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 }, + { " (sdmaP 1)", sdma_progress_intr, + SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 }, + { " (sdmaC 0)", sdma_cleanup_intr, + SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 }, + { " (sdmaC 1)", sdma_cleanup_intr, + SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0}, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static const struct dca_reg_map { + int shadow_inx; + int lsb; + u64 mask; + u16 regno; +} dca_rcvhdr_reg_map[] = { + { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) }, +}; +#endif + /* ibcctrl bits */ #define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 /* cycle through TS1/TS2 till OK */ @@ -682,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *); static void setup_7322_link_recovery(struct qib_pportdata *, u32); static void check_7322_rxe_status(struct qib_pportdata *); static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); +#ifdef CONFIG_INFINIBAND_QIB_DCA +static void qib_setup_dca(struct qib_devdata *dd); +static void setup_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +static void reset_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +#endif /** * qib_read_ureg32 - read 32-bit virtualized per-context register @@ -1518,6 +1596,8 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) struct qib_devdata *dd = ppd->dd; errs &= QIB_E_P_SDMAERRS; + err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf), + errs, qib_7322p_error_msgs); if (errs & QIB_E_P_SDMAUNEXPDATA) qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit, @@ -1525,6 +1605,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) spin_lock_irqsave(&ppd->sdma_lock, flags); + if (errs != QIB_E_P_SDMAHALT) { + /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */ + qib_dev_porterr(dd, ppd->port, + "SDMA %s 0x%016llx %s\n", + qib_sdma_state_names[ppd->sdma_state.current_state], + errs, ppd->cpspec->sdmamsgbuf); + dump_sdma_7322_state(ppd); + } + switch (ppd->sdma_state.current_state) { case qib_sdma_state_s00_hw_down: break; @@ -1576,8 +1665,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) qib_stats.sps_errints++; errs = qib_read_kreg64(dd, kr_errstatus); if (!errs) { - qib_devinfo(dd->pcidev, "device error interrupt, " - "but no error bits set!\n"); + qib_devinfo(dd->pcidev, + "device error interrupt, but no error bits set!\n"); goto done; } @@ -1623,8 +1712,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) if (errs & QIB_E_RESET) { int pidx; - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1761,9 +1850,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) ppd->dd->cspec->r1 ? QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); - printk(KERN_INFO QIB_DRV_NAME - " IB%u:%u re-enabled QDR adaptation " - "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt); + pr_info( + "IB%u:%u re-enabled QDR adaptation ibclt %x\n", + ppd->dd->unit, ppd->port, ibclt); } } } @@ -1805,9 +1894,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd) if (!*msg) snprintf(msg, sizeof ppd->cpspec->epmsgbuf, "no others"); - qib_dev_porterr(dd, ppd->port, "error interrupt with unknown" - " errors 0x%016Lx set (and %s)\n", - (errs & ~QIB_E_P_BITSEXTANT), msg); + qib_dev_porterr(dd, ppd->port, + "error interrupt with unknown errors 0x%016Lx set (and %s)\n", + (errs & ~QIB_E_P_BITSEXTANT), msg); *msg = '\0'; } @@ -2025,8 +2114,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -2040,8 +2129,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, /* no EEPROM logging, yet */ if (hwerrs) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { @@ -2065,8 +2155,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcpy(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -2078,9 +2169,33 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); + if (hwerrs & + (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) | + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) { + int pidx = 0; + int err; + unsigned long flags; + struct qib_pportdata *ppd = dd->pport; + for (; pidx < dd->num_pports; ++pidx, ppd++) { + err = 0; + if (pidx == 0 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_0))) + err++; + if (pidx == 1 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) + err++; + if (err) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + dump_sdma_7322_state(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -2280,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); qib_write_kreg(dd, kr_scratch, 0ULL); + /* ensure previous Tx parameters are not still forced */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + if (qib_compat_ddr_negotiate) { ppd->cpspec->ibdeltainprog = 1; ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, @@ -2551,6 +2671,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + switch (event) { + case DCA_PROVIDER_ADD: + if (dd->flags & QIB_DCA_ENABLED) + break; + if (!dca_add_requester(&dd->pcidev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } + break; + case DCA_PROVIDER_REMOVE: + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), + dd->cspec->dca_ctrl); + } + break; + } + return 0; +} + +static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_chip_specific *cspec = dd->cspec; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->rhdr_cpu[rcd->ctxt] != cpu) { + const struct dca_reg_map *rmp; + + cspec->rhdr_cpu[rcd->ctxt] = cpu; + rmp = &dca_rcvhdr_reg_map[rcd->ctxt]; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb; + qib_devinfo(dd->pcidev, + "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu, + (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + qib_write_kreg(dd, rmp->regno, + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_chip_specific *cspec = dd->cspec; + unsigned pidx = ppd->port - 1; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->sdma_cpu[pidx] != cpu) { + cspec->sdma_cpu[pidx] = cpu; + cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ? + SYM_MASK(DCACtrlF, SendDma1DCAOPH) : + SYM_MASK(DCACtrlF, SendDma0DCAOPH)); + cspec->dca_rcvhdr_ctrl[4] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << + (ppd->hw_pidx ? + SYM_LSB(DCACtrlF, SendDma1DCAOPH) : + SYM_LSB(DCACtrlF, SendDma0DCAOPH)); + qib_devinfo(dd->pcidev, + "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu, + (long long) cspec->dca_rcvhdr_ctrl[4]); + qib_write_kreg(dd, KREG_IDX(DCACtrlF), + cspec->dca_rcvhdr_ctrl[4]); + cspec->dca_ctrl |= ppd->hw_pidx ? + SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) : + SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_setup_dca(struct qib_devdata *dd) +{ + struct qib_chip_specific *cspec = dd->cspec; + int i; + + for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++) + cspec->rhdr_cpu[i] = -1; + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + cspec->sdma_cpu[i] = -1; + cspec->dca_rcvhdr_ctrl[0] = + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[1] = + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[2] = + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[3] = + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[4] = + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt)); + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, + cspec->dca_rcvhdr_ctrl[i]); + for (i = 0; i < cspec->num_msix_entries; i++) + setup_dca_notifier(dd, &cspec->msix_entries[i]); +} + +static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct qib_irq_notify *n = + container_of(notify, struct qib_irq_notify, notify); + int cpu = cpumask_first(mask); + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + qib_update_rhdrq_dca(rcd, cpu); + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + qib_update_sdma_dca(ppd, cpu); + } +} + +static void qib_irq_notifier_release(struct kref *ref) +{ + struct qib_irq_notify *n = + container_of(ref, struct qib_irq_notify, notify.kref); + struct qib_devdata *dd; + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + dd = rcd->dd; + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + dd = ppd->dd; + } + qib_devinfo(dd->pcidev, + "release on HCA notify 0x%p n 0x%p\n", ref, n); + kfree(n); +} +#endif + /* * Disable MSIx interrupt if enabled, call generic MSIx code * to cleanup, and clear pending MSIx interrupts. @@ -2567,9 +2843,16 @@ static void qib_7322_nomsix(struct qib_devdata *dd) int i; dd->cspec->num_msix_entries = 0; - for (i = 0; i < n; i++) - free_irq(dd->cspec->msix_entries[i].vector, - dd->cspec->msix_arg[i]); + for (i = 0; i < n; i++) { +#ifdef CONFIG_INFINIBAND_QIB_DCA + reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); +#endif + irq_set_affinity_hint( + dd->cspec->msix_entries[i].msix.vector, NULL); + free_cpumask_var(dd->cspec->msix_entries[i].mask); + free_irq(dd->cspec->msix_entries[i].msix.vector, + dd->cspec->msix_entries[i].arg); + } qib_nomsix(dd); } /* make sure no MSIx interrupts are left pending */ @@ -2591,13 +2874,21 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) { int i; +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl); + } +#endif + qib_7322_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->sendchkenable); kfree(dd->cspec->sendgrhchk); kfree(dd->cspec->sendibchk); kfree(dd->cspec->msix_entries); - kfree(dd->cspec->msix_arg); for (i = 0; i < dd->num_pports; i++) { unsigned long flags; u32 mask = QSFP_GPIO_MOD_PRS_N | @@ -2665,8 +2956,9 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat) char msg[128]; kills = istat & ~QIB_I_BITSEXTANT; - qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:" - " %s\n", (unsigned long long) kills, msg); + qib_dev_err(dd, + "Clearing reserved interrupt(s) 0x%016llx: %s\n", + (unsigned long long) kills, msg); qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); } @@ -2823,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* handle "errors" of various kinds first, device ahead of port */ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | @@ -2894,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | @@ -2923,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); @@ -2956,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -2985,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3014,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3044,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3057,6 +3335,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + if (!m->dca) + return; + qib_devinfo(dd->pcidev, + "Disabling notifier on HCA %d irq %d\n", + dd->unit, + m->msix.vector); + irq_set_affinity_notifier( + m->msix.vector, + NULL); + m->notifier = NULL; +} + +static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + struct qib_irq_notify *n; + + if (!m->dca) + return; + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (n) { + int ret; + + m->notifier = n; + n->notify.irq = m->msix.vector; + n->notify.notify = qib_irq_notifier_notify; + n->notify.release = qib_irq_notifier_release; + n->arg = m->arg; + n->rcv = m->rcv; + qib_devinfo(dd->pcidev, + "set notifier irq %d rcv %d notify %p\n", + n->notify.irq, n->rcv, &n->notify); + ret = irq_set_affinity_notifier( + n->notify.irq, + &n->notify); + if (ret) { + m->notifier = NULL; + kfree(n); + } + } +} + +#endif + /* * Set up our chip-specific interrupt handler. * The interrupt type has already been setup, so @@ -3070,6 +3395,8 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) int ret, i, msixnum; u64 redirect[6]; u64 mask; + const struct cpumask *local_mask; + int firstcpu, secondcpu = 0, currrcvcpu = 0; if (!dd->num_pports) return; @@ -3097,16 +3424,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) /* Try to get INTx interrupt */ try_intx: if (!dd->pcidev->irq) { - qib_dev_err(dd, "irq is 0, BIOS error? " - "Interrupts won't work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); goto bail; } ret = request_irq(dd->pcidev->irq, qib_7322intr, IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) { - qib_dev_err(dd, "Couldn't setup INTx " - "interrupt (irq=%d): %d\n", - dd->pcidev->irq, ret); + qib_dev_err(dd, + "Couldn't setup INTx interrupt (irq=%d): %d\n", + dd->pcidev->irq, ret); goto bail; } dd->cspec->irq = dd->pcidev->irq; @@ -3118,13 +3445,31 @@ try_intx: memset(redirect, 0, sizeof redirect); mask = ~0ULL; msixnum = 0; + local_mask = cpumask_of_pcibus(dd->pcidev->bus); + firstcpu = cpumask_first(local_mask); + if (firstcpu >= nr_cpu_ids || + cpumask_weight(local_mask) == num_online_cpus()) { + local_mask = topology_core_cpumask(0); + firstcpu = cpumask_first(local_mask); + } + if (firstcpu < nr_cpu_ids) { + secondcpu = cpumask_next(firstcpu, local_mask); + if (secondcpu >= nr_cpu_ids) + secondcpu = firstcpu; + currrcvcpu = secondcpu; + } for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { irq_handler_t handler; - const char *name; void *arg; u64 val; int lsb, reg, sh; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca = 0; +#endif + dd->cspec->msix_entries[msixnum]. + name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] + = '\0'; if (i < ARRAY_SIZE(irq_table)) { if (irq_table[i].port) { /* skip if for a non-configured port */ @@ -3133,9 +3478,16 @@ try_intx: arg = dd->pport + irq_table[i].port - 1; } else arg = dd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = irq_table[i].dca; +#endif lsb = irq_table[i].lsb; handler = irq_table[i].handler; - name = irq_table[i].name; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d%s", dd->unit, + irq_table[i].name); } else { unsigned ctxt; @@ -3146,25 +3498,39 @@ try_intx: continue; if (qib_krcvq01_no_msi && ctxt < 2) continue; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = 1; +#endif lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; - name = QIB_DRV_NAME " (kctx)"; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d (kctx)", dd->unit); } - ret = request_irq(dd->cspec->msix_entries[msixnum].vector, - handler, 0, name, arg); + ret = request_irq( + dd->cspec->msix_entries[msixnum].msix.vector, + handler, 0, dd->cspec->msix_entries[msixnum].name, + arg); if (ret) { /* * Shouldn't happen since the enable said we could * have as many as we are trying to setup here. */ - qib_dev_err(dd, "Couldn't setup MSIx " - "interrupt (vec=%d, irq=%d): %d\n", msixnum, - dd->cspec->msix_entries[msixnum].vector, - ret); + qib_dev_err(dd, + "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", + msixnum, + dd->cspec->msix_entries[msixnum].msix.vector, + ret); qib_7322_nomsix(dd); goto try_intx; } - dd->cspec->msix_arg[msixnum] = arg; + dd->cspec->msix_entries[msixnum].arg = arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->cspec->msix_entries[msixnum].dca = dca; + dd->cspec->msix_entries[msixnum].rcv = + handler == qib_7322pintr; +#endif if (lsb >= 0) { reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * @@ -3174,6 +3540,25 @@ try_intx: } val = qib_read_kreg64(dd, 2 * msixnum + 1 + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (firstcpu < nr_cpu_ids && + zalloc_cpumask_var( + &dd->cspec->msix_entries[msixnum].mask, + GFP_KERNEL)) { + if (handler == qib_7322pintr) { + cpumask_set_cpu(currrcvcpu, + dd->cspec->msix_entries[msixnum].mask); + currrcvcpu = cpumask_next(currrcvcpu, + local_mask); + if (currrcvcpu >= nr_cpu_ids) + currrcvcpu = secondcpu; + } else { + cpumask_set_cpu(firstcpu, + dd->cspec->msix_entries[msixnum].mask); + } + irq_set_affinity_hint( + dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].mask); + } msixnum++; } /* Initialize the vector mapping */ @@ -3258,8 +3643,9 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { - qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode" - " by module parameter\n", dd->unit); + qib_devinfo(dd->pcidev, + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); features &= PORT_SPD_CAP; } @@ -3323,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) dd->pport->cpspec->ibsymdelta = 0; dd->pport->cpspec->iblnkerrdelta = 0; dd->pport->cpspec->ibmalfdelta = 0; - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); /* * Keep chip from being accessed until we are ready. Use @@ -3353,8 +3740,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (val == dd->revision) break; if (i == 5) { - qib_dev_err(dd, "Failed to initialize after reset, " - "unusable\n"); + qib_dev_err(dd, + "Failed to initialize after reset, unusable\n"); ret = 0; goto bail; } @@ -3365,7 +3752,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (msix_entries) { /* restore the MSIx vector address and data if saved above */ for (i = 0; i < msix_entries; i++) { - dd->cspec->msix_entries[i].entry = i; + dd->cspec->msix_entries[i].msix.entry = i; if (!msix_vecsave || !msix_vecsave[2 * i]) continue; qib_write_kreg(dd, 2 * i + @@ -3385,8 +3772,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (qib_pcie_params(dd, dd->lbus_width, &dd->cspec->num_msix_entries, dd->cspec->msix_entries)) - qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); qib_setup_7322_interrupt(dd, 1); @@ -3427,8 +3814,9 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -3982,8 +4370,9 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what) Loopback); /* enable heart beat again */ val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -4667,8 +5056,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd) dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); if (!dd->pport[i].cpspec->portcntrs) - qib_dev_err(dd, "Failed allocation for" - " portcounters\n"); + qib_dev_err(dd, + "Failed allocation for portcounters\n"); } } @@ -4818,8 +5207,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) if (!dd->cspec->num_msix_entries) return 0; /* already using INTx */ - qib_devinfo(dd->pcidev, "MSIx interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); qib_enable_intx(dd->pcidev); qib_setup_7322_interrupt(dd, 0); @@ -5104,15 +5493,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; if (!ibp->smi_ah) { - struct ib_ah_attr attr; struct ib_ah *ah; - memset(&attr, 0, sizeof attr); - attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE); - attr.port_num = ppd->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->smi_ah = to_iah(ah); @@ -5799,15 +6184,13 @@ static int setup_txselect(const char *str, struct kernel_param *kp) unsigned long val; char *n; if (strlen(str) >= MAX_ATTEN_LEN) { - printk(KERN_INFO QIB_DRV_NAME " txselect_values string " - "too long\n"); + pr_info("txselect_values string too long\n"); return -ENOSPC; } val = simple_strtoul(str, &n, 0); if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { - printk(KERN_INFO QIB_DRV_NAME - "txselect_values must start with a number < %d\n", + pr_info("txselect_values must start with a number < %d\n", TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); return -EINVAL; } @@ -5835,11 +6218,10 @@ static int qib_late_7322_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } @@ -6051,8 +6433,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -6162,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) } dd->num_pports++; - qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_width_enabled = IB_WIDTH_4X; @@ -6218,9 +6604,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd) */ if (!(dd->flags & QIB_HAS_QSFP)) { if (!IS_QMH(dd) && !IS_QME(dd)) - qib_devinfo(dd->pcidev, "IB%u:%u: " - "Unknown mezzanine card type\n", - dd->unit, ppd->port); + qib_devinfo(dd->pcidev, + "IB%u:%u: Unknown mezzanine card type\n", + dd->unit, ppd->port); cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; /* * Choose center value as default tx serdes setting @@ -6292,8 +6678,10 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->piobcnt4k * dd->align4k; dd->piovl15base = ioremap_nocache(vl15off, NUM_VL15_BUFS * dd->align4k); - if (!dd->piovl15base) + if (!dd->piovl15base) { + ret = -ENOMEM; goto bail; + } } qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ @@ -6332,6 +6720,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->cspec->sdmabufcnt; dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ? dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0; @@ -6395,6 +6784,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); } +/* + * sdma_lock should be acquired before calling this routine + */ +static void dump_sdma_7322_state(struct qib_pportdata *ppd) +{ + u64 reg, reg1, reg2; + + reg = qib_read_kreg_port(ppd, krp_senddmastatus); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmastatus: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_sendctrl); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sendctrl: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabase); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabase: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabufmask0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + /* get bufuse bits, clear them, and print them again if non-zero */ + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + reg = qib_read_kreg_port(ppd, krp_senddmatail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmatail: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmahead); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmahead: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaheadaddr: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmalengen); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmalengen: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmadesccnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmadesccnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaidlecnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmapriorityhld: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmareloadcnt: 0x%016llx\n", reg); + + dump_sdma_state(ppd); +} + static struct sdma_set_state_action sdma_7322_action_table[] = { [qib_sdma_state_s00_hw_down] = { .go_s99_running_tofalse = 1, @@ -6828,6 +7297,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->f_sdma_init_early = qib_7322_sdma_init_early; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7322_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7322_notify_dca; +#endif /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. @@ -6864,20 +7336,18 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, actual_cnt -= dd->num_pports; tabsize = actual_cnt; - dd->cspec->msix_entries = kmalloc(tabsize * - sizeof(struct msix_entry), GFP_KERNEL); - dd->cspec->msix_arg = kmalloc(tabsize * - sizeof(void *), GFP_KERNEL); - if (!dd->cspec->msix_entries || !dd->cspec->msix_arg) { + dd->cspec->msix_entries = kzalloc(tabsize * + sizeof(struct qib_msix_entry), GFP_KERNEL); + if (!dd->cspec->msix_entries) { qib_dev_err(dd, "No memory for MSIx table\n"); tabsize = 0; } for (i = 0; i < tabsize; i++) - dd->cspec->msix_entries[i].entry = i; + dd->cspec->msix_entries[i].msix.entry = i; if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ dd->cspec->num_msix_entries = tabsize; @@ -6886,7 +7356,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, /* clear diagctrl register, in case diags were running and crashed */ qib_write_kreg(dd, kr_hwdiagctrl, 0); - +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (!dca_add_requester(&pdev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } +#endif goto bail; bail_cleanup: @@ -7101,15 +7577,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { @@ -7118,15 +7599,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { @@ -7135,15 +7621,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 1, 12, 10 }, /* QME7342 backplane setting */ - { 0, 1, 12, 11 }, /* QME7342 backplane setting */ - { 0, 1, 12, 12 }, /* QME7342 backplane setting */ - { 0, 1, 12, 14 }, /* QME7342 backplane setting */ - { 0, 1, 12, 6 }, /* QME7342 backplane setting */ - { 0, 1, 12, 7 }, /* QME7342 backplane setting */ - { 0, 1, 12, 8 }, /* QME7342 backplane setting */ { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { @@ -7230,8 +7721,7 @@ static void find_best_ent(struct qib_pportdata *ppd, ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); - printk(KERN_INFO QIB_DRV_NAME - " IB%u:%u use idx %u into txdds_mfg\n", + pr_info("IB%u:%u use idx %u into txdds_mfg\n", ppd->dd->unit, ppd->port, idx); *sdr_dds = &txdds_extra_mfg[idx]; *ddr_dds = &txdds_extra_mfg[idx]; @@ -7386,11 +7876,11 @@ static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); if (enable && !state) { - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n", + pr_info("IB%u:%u Turning LOS on\n", ppd->dd->unit, ppd->port); data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); } else if (!enable && state) { - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n", + pr_info("IB%u:%u Turning LOS off\n", ppd->dd->unit, ppd->port); data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); } @@ -7626,8 +8116,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) } } if (chan_done) { - printk(KERN_INFO QIB_DRV_NAME - " Serdes %d calibration not done after .5 sec: 0x%x\n", + pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n", IBSD(ppd->hw_pidx), chan_done); } else { for (chan = 0; chan < SERDES_CHANS; ++chan) { @@ -7635,9 +8124,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) (chan + (chan >> 1)), 25, 0, 0); if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) - printk(KERN_INFO QIB_DRV_NAME - " Serdes %d chan %d calibration " - "failed\n", IBSD(ppd->hw_pidx), chan); + pr_info("Serdes %d chan %d calibration failed\n", + IBSD(ppd->hw_pidx), chan); } } @@ -7663,7 +8151,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); msleep(20); /* Set Frequency Loop Bandwidth */ - ibsd_wr_allchans(ppd, 2, (7 << 5), BMASK(8, 5)); + ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5)); /* Enable Frequency Loop */ ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4)); /* Set Timing Loop Bandwidth */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index cf0cd30adc8..8d3c78ddc90 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -38,9 +38,21 @@ #include <linux/delay.h> #include <linux/idr.h> #include <linux/module.h> +#include <linux/printk.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_common.h" +#include "qib_mad.h" +#ifdef CONFIG_DEBUG_FS +#include "qib_debugfs.h" +#include "qib_verbs.h" +#endif + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt /* * min buffers we want to have per context, after driver @@ -59,6 +71,11 @@ ushort qib_cfgctxts; module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); +unsigned qib_numa_aware; +module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); +MODULE_PARM_DESC(numa_aware, + "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); + /* * If set, do not write to any regs if avoidable, hack to allow * check for deranged default register values. @@ -71,6 +88,9 @@ unsigned qib_n_krcv_queues; module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); +unsigned qib_cc_table_size; +module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); /* * qib_wc_pat parameter: * 0 is WC via MTRR @@ -81,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); -struct workqueue_struct *qib_cq_wq; - static void verify_interrupt(unsigned long); static struct idr qib_unit_table; @@ -102,6 +120,8 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) dd->cfgctxts = qib_cfgctxts; else dd->cfgctxts = dd->ctxtcnt; + dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + dd->cfgctxts - dd->first_user_ctxt; } /* @@ -110,7 +130,11 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; - int ret; + int local_node_id = pcibus_to_node(dd->pcidev->bus); + + if (local_node_id < 0) + local_node_id = numa_node_id(); + dd->assigned_node_id = local_node_id; /* * Allocate full ctxtcnt array, rather than just cfgctxts, because @@ -118,10 +142,9 @@ int qib_create_ctxts(struct qib_devdata *dd) */ dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); if (!dd->rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata array, " - "failing\n"); - ret = -ENOMEM; - goto done; + qib_dev_err(dd, + "Unable to allocate ctxtdata array, failing\n"); + return -ENOMEM; } /* create (one or more) kctxt */ @@ -133,38 +156,51 @@ int qib_create_ctxts(struct qib_devdata *dd) continue; ppd = dd->pport + (i % dd->num_pports); - rcd = qib_create_ctxtdata(ppd, i); + + rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); if (!rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata" - " for Kernel ctxt, failing\n"); - ret = -ENOMEM; - goto done; + qib_dev_err(dd, + "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; } rcd->pkeys[0] = QIB_DEFAULT_P_KEY; rcd->seq_cnt = 1; } - ret = 0; -done: - return ret; + return 0; } /* * Common code for user and kernel context setup. */ -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, + int node_id) { struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); if (rcd) { INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->node_id = node_id; rcd->ppd = ppd; rcd->dd = dd; rcd->cnt = 1; rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; - +#ifdef CONFIG_DEBUG_FS + if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, node_id); + if (!rcd->opstats) { + kfree(rcd); + qib_dev_err(dd, + "Unable to allocate per ctxt stats buffer\n"); + return NULL; + } + } +#endif dd->f_init_ctxt(rcd); /* @@ -194,20 +230,101 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) /* * Common code for initializing the physical port structure. */ -void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { + int size; ppd->dd = dd; ppd->hw_pidx = hw_pidx; ppd->port = port; /* IB port number, not index */ spin_lock_init(&ppd->sdma_lock); spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); init_timer(&ppd->symerr_clear_timer); ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; ppd->symerr_clear_timer.data = (unsigned long)ppd; + + ppd->qib_wq = NULL; + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) + goto bail; + + ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, + IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); + + ppd->cc_max_table_entries = + ppd->cc_supported_table_entries/IB_CCT_ENTRIES; + + size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) + * IB_CCT_ENTRIES; + ppd->ccti_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries) { + qib_dev_err(dd, + "failed to allocate congestion control table for port %d!\n", + port); + goto bail; + } + + size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); + ppd->congestion_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries) { + qib_dev_err(dd, + "failed to allocate congestion setting list for port %d!\n", + port); + goto bail_1; + } + + size = sizeof(struct cc_table_shadow); + ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow ccti list for port %d!\n", + port); + goto bail_2; + } + + size = sizeof(struct ib_cc_congestion_setting_attr); + ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow congestion setting list for port %d!\n", + port); + goto bail_3; + } + + return 0; + +bail_3: + kfree(ppd->ccti_entries_shadow); + ppd->ccti_entries_shadow = NULL; +bail_2: + kfree(ppd->congestion_entries); + ppd->congestion_entries = NULL; +bail_1: + kfree(ppd->ccti_entries); + ppd->ccti_entries = NULL; +bail: + /* User is intentionally disabling the congestion control agent */ + if (!qib_cc_table_size) + return 0; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { + qib_cc_table_size = 0; + qib_dev_err(dd, + "Congestion Control table size %d less than minimum %d for port %d\n", + qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); + } + + qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", + port); + return 0; } static int init_pioavailregs(struct qib_devdata *dd) @@ -219,8 +336,8 @@ static int init_pioavailregs(struct qib_devdata *dd) &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, GFP_KERNEL); if (!dd->pioavailregs_dma) { - qib_dev_err(dd, "failed to allocate PIOavail reg area " - "in memory\n"); + qib_dev_err(dd, + "failed to allocate PIOavail reg area in memory\n"); ret = -ENOMEM; goto done; } @@ -275,15 +392,15 @@ static void init_shadow_tids(struct qib_devdata *dd) pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); if (!pages) { - qib_dev_err(dd, "failed to allocate shadow page * " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow page * array, no expected sends!\n"); goto bail; } addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); if (!addrs) { - qib_dev_err(dd, "failed to allocate shadow dma handle " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow dma handle array, no expected sends!\n"); goto bail_free; } @@ -307,13 +424,13 @@ static int loadtime_init(struct qib_devdata *dd) if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { - qib_dev_err(dd, "Driver only handles version %d, " - "chip swversion is %d (%llx), failng\n", - QIB_CHIP_SWVERSION, - (int)(dd->revision >> + qib_dev_err(dd, + "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + QIB_CHIP_SWVERSION, + (int)(dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & - QLOGIC_IB_R_SOFTWARE_MASK, - (unsigned long long) dd->revision); + QLOGIC_IB_R_SOFTWARE_MASK, + (unsigned long long) dd->revision); ret = -ENOSYS; goto done; } @@ -341,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd) dd->intrchk_timer.function = verify_interrupt; dd->intrchk_timer.data = (unsigned long) dd; + ret = qib_cq_init(dd); done: return ret; } @@ -402,12 +520,12 @@ static void enable_chip(struct qib_devdata *dd) if (rcd) dd->f_rcvctrl(rcd->ppd, rcvmask, i); } - dd->freectxts = dd->cfgctxts - dd->first_user_ctxt; } static void verify_interrupt(unsigned long opaque) { struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; if (!dd) return; /* being torn down */ @@ -416,10 +534,11 @@ static void verify_interrupt(unsigned long opaque) * If we don't have a lid or any interrupts, let the user know and * don't bother checking again. */ - if (dd->int_counter == 0) { + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { if (!dd->f_intr_fallback(dd)) - dev_err(&dd->pcidev->dev, "No interrupts detected, " - "not usable.\n"); + dev_err(&dd->pcidev->dev, + "No interrupts detected, not usable.\n"); else /* re-arm the timer to see if fallback works */ mod_timer(&dd->intrchk_timer, jiffies + HZ/2); } @@ -482,6 +601,47 @@ static void init_piobuf_state(struct qib_devdata *dd) } /** + * qib_create_workqueues - create per port workqueues + * @dd: the qlogic_ib device + */ +static int qib_create_workqueues(struct qib_devdata *dd) +{ + int pidx; + struct qib_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (!ppd->qib_wq) { + char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ + snprintf(wq_name, sizeof(wq_name), "qib%d_%d", + dd->unit, pidx); + ppd->qib_wq = + create_singlethread_workqueue(wq_name); + if (!ppd->qib_wq) + goto wq_error; + } + } + return 0; +wq_error: + pr_err("create_singlethread_workqueue failed for port %d\n", + pidx + 1); + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + } + return -ENOMEM; +} + +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + +/** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device * @reinit: reinitializing, so don't allocate new memory @@ -546,8 +706,8 @@ int qib_init(struct qib_devdata *dd, int reinit) if (!lastfail) lastfail = qib_setup_eagerbufs(rcd); if (lastfail) { - qib_dev_err(dd, "failed to allocate kernel ctxt's " - "rcvhdrq and/or egr bufs\n"); + qib_dev_err(dd, + "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); continue; } } @@ -763,6 +923,12 @@ static void qib_shutdown_device(struct qib_devdata *dd) * We can't count on interrupts since we are stopping. */ dd->f_quiet_serdes(ppd); + + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + qib_free_pportdata(ppd); } qib_update_eeprom_log(dd); @@ -817,6 +983,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); +#ifdef CONFIG_DEBUG_FS + kfree(rcd->opstats); + rcd->opstats = NULL; +#endif kfree(rcd); } @@ -892,8 +1062,7 @@ static void qib_verify_pioperf(struct qib_devdata *dd) /* 1 GiB/sec, slightly over IB SDR line rate */ if (lcnt < (emsecs * 1024U)) qib_dev_err(dd, - "Performance problem: bandwidth to PIO buffers is " - "only %u MiB/sec\n", + "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", lcnt / (u32) emsecs); preempt_enable(); @@ -907,7 +1076,6 @@ done: dd->f_set_armlaunch(dd, 1); } - void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; @@ -917,9 +1085,37 @@ void qib_free_devdata(struct qib_devdata *dd) list_del(&dd->list); spin_unlock_irqrestore(&qib_devs_lock, flags); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif + free_percpu(dd->int_counter); ib_dealloc_device(&dd->verbs_dev.ibdev); } +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -934,28 +1130,34 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) struct qib_devdata *dd; int ret; - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dd->list); + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); - ret = idr_get_new(&qib_unit_table, dd, &dd->unit); - if (ret >= 0) + + ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + dd->unit = ret; list_add(&dd->list, &qib_dev_list); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + idr_preload_end(); if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); + goto bail; + } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); goto bail; } @@ -966,12 +1168,18 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (qib_cpulist) qib_cpulist_count = count; else - qib_early_err(&pdev->dev, "Could not alloc cpulist " - "info, cpu affinity might be wrong\n"); + qib_early_err(&pdev->dev, + "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* @@ -1008,11 +1216,10 @@ void qib_disable_after_error(struct qib_devdata *dd) *dd->devstatusp |= QIB_STATUS_HWERROR; } -static void __devexit qib_remove_one(struct pci_dev *); -static int __devinit qib_init_one(struct pci_dev *, - const struct pci_device_id *); +static void qib_remove_one(struct pci_dev *); +static int qib_init_one(struct pci_dev *, const struct pci_device_id *); -#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " +#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { @@ -1024,19 +1231,48 @@ static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { MODULE_DEVICE_TABLE(pci, qib_pci_tbl); -struct pci_driver qib_driver = { +static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, - .remove = __devexit_p(qib_remove_one), + .remove = qib_remove_one, .id_table = qib_pci_tbl, .err_handler = &qib_pci_err_handler, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = qib_notify_dca, + .next = NULL, + .priority = 0 +}; + +static int qib_notify_dca_device(struct device *device, void *data) +{ + struct qib_devdata *dd = dev_get_drvdata(device); + unsigned long event = *(unsigned long *)data; + + return dd->f_notify_dca(dd, event); +} + +static int qib_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int rval; + + rval = driver_for_each_device(&qib_driver.driver, NULL, + &event, qib_notify_dca_device); + return rval ? NOTIFY_BAD : NOTIFY_DONE; +} + +#endif + /* * Do all the generic driver unit- and chip-independent memory * allocation and initialization. */ -static int __init qlogic_ib_init(void) +static int __init qib_ib_init(void) { int ret; @@ -1044,63 +1280,64 @@ static int __init qlogic_ib_init(void) if (ret) goto bail; - qib_cq_wq = create_singlethread_workqueue("qib_cq"); - if (!qib_cq_wq) { - ret = -ENOMEM; - goto bail_dev; - } - /* * These must be called before the driver is registered with * the PCI subsystem. */ idr_init(&qib_unit_table); - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail_cq_wq; - } +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_register_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_init(); +#endif ret = pci_register_driver(&qib_driver); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Unable to register driver: error %d\n", -ret); - goto bail_unit; + pr_err("Unable to register driver: error %d\n", -ret); + goto bail_dev; } /* not fatal if it doesn't work */ if (qib_init_qibfs()) - printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); + pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ -bail_unit: - idr_destroy(&qib_unit_table); -bail_cq_wq: - destroy_workqueue(qib_cq_wq); bail_dev: +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif + idr_destroy(&qib_unit_table); qib_dev_cleanup(); bail: return ret; } -module_init(qlogic_ib_init); +module_init(qib_ib_init); /* * Do the non-unit driver cleanup, memory free, etc. at unload. */ -static void __exit qlogic_ib_cleanup(void) +static void __exit qib_ib_cleanup(void) { int ret; ret = qib_exit_qibfs(); if (ret) - printk(KERN_ERR QIB_DRV_NAME ": " - "Unable to cleanup counter filesystem: " - "error %d\n", -ret); + pr_err( + "Unable to cleanup counter filesystem: error %d\n", + -ret); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&qib_driver); - - destroy_workqueue(qib_cq_wq); +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif qib_cpulist_count = 0; kfree(qib_cpulist); @@ -1109,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void) qib_dev_cleanup(); } -module_exit(qlogic_ib_cleanup); +module_exit(qib_ib_cleanup); /* this can only be called after a successful initialization */ static void cleanup_device_data(struct qib_devdata *dd) @@ -1120,10 +1357,24 @@ static void cleanup_device_data(struct qib_devdata *dd) unsigned long flags; /* users can't do anything more with chip */ - for (pidx = 0; pidx < dd->num_pports; ++pidx) + for (pidx = 0; pidx < dd->num_pports; ++pidx) { if (dd->pport[pidx].statusp) *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; + spin_lock(&dd->pport[pidx].cc_shadow_lock); + + kfree(dd->pport[pidx].congestion_entries); + dd->pport[pidx].congestion_entries = NULL; + kfree(dd->pport[pidx].ccti_entries); + dd->pport[pidx].ccti_entries = NULL; + kfree(dd->pport[pidx].ccti_entries_shadow); + dd->pport[pidx].ccti_entries_shadow = NULL; + kfree(dd->pport[pidx].congestion_entries_shadow); + dd->pport[pidx].congestion_entries_shadow = NULL; + + spin_unlock(&dd->pport[pidx].cc_shadow_lock); + } + if (!qib_wc_pat) qib_disable_wc(dd); @@ -1137,7 +1388,7 @@ static void cleanup_device_data(struct qib_devdata *dd) if (dd->pageshadow) { struct page **tmpp = dd->pageshadow; dma_addr_t *tmpd = dd->physshadow; - int i, cnt = 0; + int i; for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { int ctxt_tidbase = ctxt * dd->rcvtidcnt; @@ -1150,13 +1401,13 @@ static void cleanup_device_data(struct qib_devdata *dd) PAGE_SIZE, PCI_DMA_FROMDEVICE); qib_release_user_pages(&tmpp[i], 1); tmpp[i] = NULL; - cnt++; } } - tmpp = dd->pageshadow; dd->pageshadow = NULL; vfree(tmpp); + dd->physshadow = NULL; + vfree(tmpd); } /* @@ -1178,6 +1429,7 @@ static void cleanup_device_data(struct qib_devdata *dd) } kfree(tmp); kfree(dd->boardname); + qib_cq_exit(dd); } /* @@ -1203,8 +1455,7 @@ static void qib_postinit_cleanup(struct qib_devdata *dd) qib_free_devdata(dd); } -static int __devinit qib_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret, j, pidx, initfail; struct qib_devdata *dd = NULL; @@ -1222,9 +1473,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, #ifdef CONFIG_PCI_MSI dd = qib_init_iba6120_funcs(pdev, ent); #else - qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " - "work if CONFIG_PCI_MSI is not enabled\n", - ent->device); + qib_early_err(&pdev->dev, + "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + ent->device); dd = ERR_PTR(-ENODEV); #endif break; @@ -1238,8 +1489,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, break; default: - qib_early_err(&pdev->dev, "Failing on unknown QLogic " - "deviceid 0x%x\n", ent->device); + qib_early_err(&pdev->dev, + "Failing on unknown Intel deviceid 0x%x\n", + ent->device); ret = -ENODEV; } @@ -1248,6 +1500,10 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (ret) goto bail; /* error already printed */ + ret = qib_create_workqueues(dd); + if (ret) + goto bail; + /* do the generic initialization */ initfail = qib_init(dd, 0); @@ -1292,9 +1548,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (!qib_wc_pat) { ret = qib_enable_wc(dd); if (ret) { - qib_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); ret = 0; } } @@ -1304,7 +1560,7 @@ bail: return ret; } -static void __devexit qib_remove_one(struct pci_dev *pdev) +static void qib_remove_one(struct pci_dev *pdev) { struct qib_devdata *dd = pci_get_drvdata(pdev); int ret; @@ -1346,6 +1602,7 @@ static void __devexit qib_remove_one(struct pci_dev *pdev) int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) { unsigned amt; + int old_node_id; if (!rcd->rcvhdrq) { dma_addr_t phys_hdrqtail; @@ -1355,14 +1612,18 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) sizeof(u32), PAGE_SIZE); gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrq = dma_alloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, gfp_flags | __GFP_COMP); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrq) { - qib_dev_err(dd, "attempt to allocate %d bytes " - "for ctxt %u rcvhdrq failed\n", - amt, rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); goto bail; } @@ -1373,9 +1634,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) } if (!(dd->flags & QIB_NODMA_RTAIL)) { + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1391,8 +1654,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) return 0; bail_free: - qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " - "rcvhdrqtailaddr failed\n", rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", + rcd->ctxt); vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; bail_free_hdrq: @@ -1418,6 +1682,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; gfp_t gfp_flags; + int old_node_id; /* * GFP_USER, but without GFP_FS, so buffer cache can be @@ -1436,25 +1701,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) size = rcd->rcvegrbuf_size; if (!rcd->rcvegrbuf) { rcd->rcvegrbuf = - kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), - GFP_KERNEL); + kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf) goto bail; } if (!rcd->rcvegrbuf_phys) { rcd->rcvegrbuf_phys = - kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), - GFP_KERNEL); + kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf_phys) goto bail_rcvegrbuf; } for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { if (rcd->rcvegrbuf[e]) continue; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvegrbuf[e] = dma_alloc_coherent(&dd->pcidev->dev, size, &rcd->rcvegrbuf_phys[e], gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvegrbuf[e]) goto bail_rcvegrbuf_phys; } diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 6ae57d23004..f4918f2165e 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -224,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd) * We print the message and disable interrupts, in hope of * having a better chance of debugging the problem. */ - qib_dev_err(dd, "Read of chip interrupt status failed" - " disabling interrupts\n"); + qib_dev_err(dd, + "Read of chip interrupt status failed disabling interrupts\n"); if (allbits++) { /* disable interrupt delivery, something is very wrong */ if (allbits == 2) dd->f_set_intr_state(dd, 0); if (allbits == 3) { - qib_dev_err(dd, "2nd bad interrupt status, " - "unregistering interrupts\n"); + qib_dev_err(dd, + "2nd bad interrupt status, unregistering interrupts\n"); dd->flags |= QIB_BADINTR; dd->flags &= ~QIB_INITTED; dd->f_free_irq(dd); diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 8fd19a47df0..3b9afccaaad 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -35,21 +35,41 @@ /** * qib_alloc_lkey - allocate an lkey - * @rkt: lkey table in which to allocate the lkey * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. * - * Returns 1 if successful, otherwise returns 0. */ -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; - int ret; + int ret = 0; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct qib_mregion *tmr; + + tmr = rcu_access_pointer(dev->dma_mr); + if (!tmr) { + qib_get_mr(mr); + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } + goto success; + } + /* Find the next available LKEY */ r = rkt->next; n = r; @@ -57,11 +77,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) if (rkt->table[r] == NULL) break; r = (r + 1) & (rkt->max - 1); - if (r == n) { - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; + if (r == n) goto bail; - } } rkt->next = (r + 1) & (rkt->max - 1); /* @@ -76,57 +93,58 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) mr->lkey |= 1 << 8; rkt->gen++; } - rkt->table[r] = mr; + qib_get_mr(mr); + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: spin_unlock_irqrestore(&rkt->lock, flags); - - ret = 1; - -bail: +out: return ret; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; } /** * qib_free_lkey - free an lkey - * @rkt: table from which to free the lkey - * @lkey: lkey id to free + * @mr: mr to free from tables */ -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) +void qib_free_lkey(struct qib_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; - int ret; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (lkey == 0) { - if (dev->dma_mr && dev->dma_mr == mr) { - ret = atomic_read(&dev->dma_mr->refcount); - if (!ret) - dev->dma_mr = NULL; - } else - ret = 0; - } else { + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) + rcu_assign_pointer(dev->dma_mr, NULL); + else { r = lkey >> (32 - ib_qib_lkey_table_size); - ret = atomic_read(&dev->lk_table.table[r]->refcount); - if (!ret) - dev->lk_table.table[r] = NULL; + rcu_assign_pointer(rkt->table[r], NULL); } - spin_unlock_irqrestore(&dev->lk_table.lock, flags); - - if (ret) - ret = -EBUSY; - return ret; + qib_put_mr(mr); + mr->lkey_published = 0; +out: + spin_unlock_irqrestore(&rkt->lock, flags); } /** * qib_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against + * @pd: protection domain * @isge: outgoing internal SGE * @sge: SGE to check * @acc: access flags * * Return 1 if valid and successful, otherwise returns 0. * + * increments the reference count upon success + * * Check the IB SGE for validity and initialize our internal version * of it. */ @@ -136,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_mregion *mr; unsigned n, m; size_t off; - unsigned long flags; /* * We use LKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (sge->lkey == 0) { struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) goto bail; - atomic_inc(&dev->dma_mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); - isge->mr = dev->dma_mr; + isge->mr = mr; isge->vaddr = (void *) sge->addr; isge->length = sge->length; isge->sge_length = sge->length; @@ -161,9 +180,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->n = 0; goto ok; } - mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey || - mr->pd != &pd->ibpd)) + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; off = sge->addr - mr->user_base; @@ -171,8 +190,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; - atomic_inc(&mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; if (mr->page_shift) { @@ -208,20 +228,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, ok: return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); return 0; } /** * qib_rkey_ok - check the IB virtual address, length, and RKEY - * @dev: infiniband device - * @ss: SGE state + * @qp: qp for validation + * @sge: SGE state * @len: length of data * @vaddr: virtual address to place data * @rkey: rkey to check * @acc: access flags * * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success */ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) @@ -230,25 +252,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, struct qib_mregion *mr; unsigned n, m; size_t off; - unsigned long flags; /* * We use RKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (rkey == 0) { struct qib_pd *pd = to_ipd(qp->ibqp.pd); struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) goto bail; - atomic_inc(&dev->dma_mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); - sge->mr = dev->dma_mr; + sge->mr = mr; sge->vaddr = (void *) vaddr; sge->length = len; sge->sge_length = len; @@ -257,16 +280,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, goto ok; } - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + mr = rcu_dereference( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; if (mr->page_shift) { @@ -302,7 +327,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, ok: return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); return 0; } @@ -325,7 +350,9 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) if (pd->user || rkey == 0) goto bail; - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; + mr = rcu_dereference_protected( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + lockdep_is_held(&rkt->lock)); if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) goto bail; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 3b3745f261f..22c720e5740 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static int reply_failure(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY; +} + static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) { struct ib_mad_send_buf *send_buf; @@ -90,14 +102,10 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) if (!ibp->sm_ah) { if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - struct ib_ah_attr attr; - memset(&attr, 0, sizeof attr); - attr.dlid = ibp->sm_lid; - attr.port_num = ppd_from_ibp(ibp)->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, ibp->sm_lid); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->sm_ah = to_iah(ah); @@ -396,6 +404,7 @@ static int get_linkdowndefaultstate(struct qib_pportdata *ppd) static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) { + int valid_mkey = 0; int ret = 0; /* Is the mkey in the process of expiring? */ @@ -406,23 +415,36 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) ibp->mkeyprot = 0; } - /* M_Key checking depends on Portinfo:M_Key_protect_bits */ - if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 && - ibp->mkey != smp->mkey && - (smp->method == IB_MGMT_METHOD_SET || - smp->method == IB_MGMT_METHOD_TRAP_REPRESS || - (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) { - if (ibp->mkey_violations != 0xFFFF) - ++ibp->mkey_violations; - if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) - ibp->mkey_lease_timeout = jiffies + - ibp->mkey_lease_period * HZ; - /* Generate a trap notice. */ - qib_bad_mkey(ibp, smp); - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - } else if (ibp->mkey_lease_timeout) + if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || + ibp->mkey == smp->mkey) + valid_mkey = 1; + + /* Unset lease timeout on any valid Get/Set/TrapRepress */ + if (valid_mkey && ibp->mkey_lease_timeout && + (smp->method == IB_MGMT_METHOD_GET || + smp->method == IB_MGMT_METHOD_SET || + smp->method == IB_MGMT_METHOD_TRAP_REPRESS)) ibp->mkey_lease_timeout = 0; + if (!valid_mkey) { + switch (smp->method) { + case IB_MGMT_METHOD_GET: + /* Bad mkey not a violation below level 2 */ + if (ibp->mkeyprot < 2) + break; + case IB_MGMT_METHOD_SET: + case IB_MGMT_METHOD_TRAP_REPRESS: + if (ibp->mkey_violations != 0xFFFF) + ++ibp->mkey_violations; + if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) + ibp->mkey_lease_timeout = jiffies + + ibp->mkey_lease_period * HZ; + /* Generate a trap notice. */ + qib_bad_mkey(ibp, smp); + ret = 1; + } + } + return ret; } @@ -433,7 +455,6 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, struct qib_pportdata *ppd; struct qib_ibport *ibp; struct ib_port_info *pip = (struct ib_port_info *)smp->data; - u16 lid; u8 mtu; int ret; u32 state; @@ -450,8 +471,10 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, if (port_num != port) { ibp = to_iport(ibdev, port_num); ret = check_mkey(ibp, smp, 0); - if (ret) + if (ret) { + ret = IB_MAD_RESULT_FAILURE; goto bail; + } } } @@ -469,8 +492,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ibp->mkeyprot == 1)) pip->mkey = ibp->mkey; pip->gid_prefix = ibp->gid_prefix; - lid = ppd->lid; - pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; + pip->lid = cpu_to_be16(ppd->lid); pip->sm_lid = cpu_to_be16(ibp->sm_lid); pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); /* pip->diag_code; */ @@ -633,7 +655,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, struct qib_devdata *dd; struct qib_pportdata *ppd; struct qib_ibport *ibp; - char clientrereg = 0; + u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80); unsigned long flags; u16 lid, smlid; u8 lwe; @@ -783,12 +805,6 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; - if (pip->clientrereg_resv_subnetto & 0x80) { - clientrereg = 1; - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } - /* * Do the port state change now that the other link parameters * have been set. @@ -846,10 +862,15 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, smp->status |= IB_SMP_INVALID_FIELD; } + if (clientrereg) { + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + ret = subn_get_portinfo(smp, ibdev, port); - if (clientrereg) - pip->clientrereg_resv_subnetto |= 0x80; + /* restore re-reg bit per o14-12.2.1 */ + pip->clientrereg_resv_subnetto |= clientrereg; goto get_only; @@ -1007,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) event.event = IB_EVENT_PKEY_CHANGE; event.device = &dd->verbs_dev.ibdev; - event.element.port_num = 1; + event.element.port_num = port; ib_dispatch_event(&event); } return 0; @@ -1613,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, return reply((struct ib_smp *)pmp); } +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { @@ -1621,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; u8 port_select = p->port_select; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1643,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, p->port_rcv_data = cpu_to_be64(rwords); p->port_xmit_packets = cpu_to_be64(spkts); p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); bail: return reply((struct ib_smp *) pmp); @@ -1774,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); @@ -1789,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) ibp->z_port_rcv_packets = rpkts; + qib_snapshot_pmacounters(ibp, &pma); + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - ibp->n_unicast_xmit = 0; + ibp->z_unicast_xmit = pma.n_unicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - ibp->n_unicast_rcv = 0; + ibp->z_unicast_rcv = pma.n_unicast_rcv; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - ibp->n_multicast_xmit = 0; + ibp->z_multicast_xmit = pma.n_multicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - ibp->n_multicast_rcv = 0; + ibp->z_multicast_rcv = pma.n_multicast_rcv; return pma_get_portcounters_ext(pmp, ibdev, port); } @@ -1837,6 +1886,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, port_num && port_num <= ibdev->phys_port_cnt && port != port_num) (void) check_mkey(to_iport(ibdev, port_num), smp, 0); + ret = IB_MAD_RESULT_FAILURE; goto bail; } @@ -2038,6 +2088,298 @@ bail: return ret; } +static int cc_get_classportinfo(struct ib_cc_mad *ccp, + struct ib_device *ibdev) +{ + struct ib_cc_classportinfo_attr *p = + (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->base_version = 1; + p->class_version = 1; + p->cap_mask = 0; + + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_info(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_info_attr *p = + (struct ib_cc_info_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->congestion_info = 0; + p->control_table_cap = ppd->cc_max_table_entries; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + int i; + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_cc_congestion_entry_shadow *entries; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + entries = ppd->congestion_entries_shadow->entries; + p->port_control = cpu_to_be16( + ppd->congestion_entries_shadow->port_control); + p->control_map = cpu_to_be16( + ppd->congestion_entries_shadow->control_map); + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + p->entries[i].ccti_increase = entries[i].ccti_increase; + p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer); + p->entries[i].trigger_threshold = entries[i].trigger_threshold; + p->entries[i].ccti_min = entries[i].ccti_min; + } + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 max_cct_block; + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + max_cct_block = + (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES; + max_cct_block = max_cct_block ? max_cct_block - 1 : 0; + + if (cct_block_index > max_cct_block) { + spin_unlock(&ppd->cc_shadow_lock); + goto bail; + } + + ccp->attr_mod = cpu_to_be32(cct_block_index); + + cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1); + + cct_entry--; + + p->ccti_limit = cpu_to_be16(cct_entry); + + entries = &ppd->ccti_entries_shadow-> + entries[IB_CCT_ENTRIES * cct_block_index]; + cct_entry %= IB_CCT_ENTRIES; + + for (i = 0; i <= cct_entry; i++) + p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int cc_set_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int i; + + ppd->cc_sl_control_map = be16_to_cpu(p->control_map); + + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + ppd->congestion_entries[i].ccti_increase = + p->entries[i].ccti_increase; + + ppd->congestion_entries[i].ccti_timer = + be16_to_cpu(p->entries[i].ccti_timer); + + ppd->congestion_entries[i].trigger_threshold = + p->entries[i].trigger_threshold; + + ppd->congestion_entries[i].ccti_min = + p->entries[i].ccti_min; + } + + return reply((struct ib_smp *) ccp); +} + +static int cc_set_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + /* If this packet is the first in the sequence then + * zero the total table entry count. + */ + if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES) + ppd->total_cct_entry = 0; + + cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES; + + /* ccti_limit is 0 to 63 */ + ppd->total_cct_entry += (cct_entry + 1); + + if (ppd->total_cct_entry > ppd->cc_supported_table_entries) + goto bail; + + ppd->ccti_limit = be16_to_cpu(p->ccti_limit); + + entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index); + + for (i = 0; i <= cct_entry; i++) + entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry); + + spin_lock(&ppd->cc_shadow_lock); + + ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1; + memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries, + (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry))); + + ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED; + ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map; + memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries, + IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry)); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int check_cc_key(struct qib_ibport *ibp, + struct ib_cc_mad *ccp, int mad_flags) +{ + return 0; +} + +static int process_cc(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + int ret; + + *out_mad = *in_mad; + + if (ccp->class_version != 2) { + ccp->status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *)ccp); + goto bail; + } + + ret = check_cc_key(ibp, ccp, mad_flags); + if (ret) + goto bail; + + switch (ccp->method) { + case IB_MGMT_METHOD_GET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CLASSPORTINFO: + ret = cc_get_classportinfo(ccp, ibdev); + goto bail; + + case IB_CC_ATTR_CONGESTION_INFO: + ret = cc_get_congestion_info(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_get_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_get_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_set_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_set_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_TRAP: + default: + ccp->status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) ccp); + } + +bail: + return ret; +} + /** * qib_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on @@ -2062,6 +2404,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, struct ib_mad *in_mad, struct ib_mad *out_mad) { int ret; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: @@ -2073,6 +2417,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = process_perf(ibdev, port, in_mad, out_mad); goto bail; + case IB_MGMT_CLASS_CONG_MGMT: + if (!ppd->congestion_entries_shadow || + !qib_cc_table_size) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + default: ret = IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index ecc416cdbaa..941d4d50d8e 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -31,6 +31,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef _QIB_MAD_H +#define _QIB_MAD_H #include <rdma/ib_pma.h> @@ -52,7 +54,7 @@ struct ib_node_info { __be32 revision; u8 local_port_num; u8 vendor_id[3]; -} __attribute__ ((packed)); +} __packed; struct ib_mad_notice_attr { u8 generic_type; @@ -71,7 +73,7 @@ struct ib_mad_notice_attr { __be16 reserved; __be16 lid; /* where violation happened */ u8 port_num; /* where violation happened */ - } __attribute__ ((packed)) ntc_129_131; + } __packed ntc_129_131; struct { __be16 reserved; @@ -81,14 +83,14 @@ struct ib_mad_notice_attr { __be32 new_cap_mask; /* new capability mask */ u8 reserved3; u8 change_flags; /* low 3 bits only */ - } __attribute__ ((packed)) ntc_144; + } __packed ntc_144; struct { __be16 reserved; __be16 lid; /* lid where sys guid changed */ __be16 reserved2; __be64 new_sys_guid; - } __attribute__ ((packed)) ntc_145; + } __packed ntc_145; struct { __be16 reserved; @@ -102,7 +104,7 @@ struct ib_mad_notice_attr { u8 reserved3; u8 dr_trunc_hop; u8 dr_rtn_path[30]; - } __attribute__ ((packed)) ntc_256; + } __packed ntc_256; struct { __be16 reserved; @@ -113,7 +115,7 @@ struct ib_mad_notice_attr { __be32 qp2; /* high 8 bits reserved */ union ib_gid gid1; union ib_gid gid2; - } __attribute__ ((packed)) ntc_257_258; + } __packed ntc_257_258; } details; }; @@ -207,7 +209,7 @@ struct ib_pma_portcounters_cong { __be64 port_rcv_packets; __be64 port_xmit_wait; __be64 port_adr_events; -} __attribute__ ((packed)); +} __packed; #define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 #define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 @@ -223,6 +225,197 @@ struct ib_pma_portcounters_cong { #define IB_PMA_SEL_CONG_ROUTING 0x08 /* + * Congestion control class attributes + */ +#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001) +#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002) +#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011) +#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012) +#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013) +#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014) +#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015) +#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016) +#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017) +#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018) + +/* generalizations for threshold values */ +#define IB_CC_THRESHOLD_NONE 0x0 +#define IB_CC_THRESHOLD_MIN 0x1 +#define IB_CC_THRESHOLD_MAX 0xf + +/* CCA MAD header constants */ +#define IB_CC_MAD_LOGDATA_LEN 32 +#define IB_CC_MAD_MGMTDATA_LEN 192 + +struct ib_cc_mad { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 cckey; + + /* For CongestionLog attribute only */ + u8 log_data[IB_CC_MAD_LOGDATA_LEN]; + + u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN]; +} __packed; + +/* + * Congestion Control class portinfo capability mask bits + */ +#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0) +#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1) +#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2) +#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8) + +struct ib_cc_classportinfo_attr { + u8 base_version; + u8 class_version; + __be16 cap_mask; + u8 reserved[3]; + u8 resp_time_value; /* only lower 5 bits */ + union ib_gid redirect_gid; + __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; /* only lower 24 bits */ + __be32 redirect_qkey; + union ib_gid trap_gid; + __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hl_qp; /* 8, 24 bits respectively */ + __be32 trap_qkey; +} __packed; + +/* Congestion control traps */ +#define IB_CC_TRAP_KEY_VIOLATION 0x0000 + +struct ib_cc_trap_key_violation_attr { + __be16 source_lid; + u8 method; + u8 reserved1; + __be16 attrib_id; + __be32 attrib_mod; + __be32 qp; + __be64 cckey; + u8 sgid[16]; + u8 padding[24]; +} __packed; + +/* Congestion info flags */ +#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1 +#define IB_CC_TABLE_CAP_DEFAULT 31 + +struct ib_cc_info_attr { + __be16 congestion_info; + u8 control_table_cap; /* Multiple of 64 entry unit CCTs */ +} __packed; + +struct ib_cc_key_info_attr { + __be64 cckey; + u8 protect; + __be16 lease_period; + __be16 violations; +} __packed; + +#define IB_CC_CL_CA_LOGEVENTS_LEN 208 + +struct ib_cc_log_attr { + u8 log_type; + u8 congestion_flags; + __be16 threshold_event_counter; + __be16 threshold_congestion_event_map; + __be16 current_time_stamp; + u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN]; +} __packed; + +#define IB_CC_CLEC_SERVICETYPE_RC 0x0 +#define IB_CC_CLEC_SERVICETYPE_UC 0x1 +#define IB_CC_CLEC_SERVICETYPE_RD 0x2 +#define IB_CC_CLEC_SERVICETYPE_UD 0x3 + +struct ib_cc_log_event { + u8 local_qp_cn_entry; + u8 remote_qp_number_cn_entry[3]; + u8 sl_cn_entry:4; + u8 service_type_cn_entry:4; + __be32 remote_lid_cn_entry; + __be32 timestamp_cn_entry; +} __packed; + +/* Sixteen congestion entries */ +#define IB_CC_CCS_ENTRIES 16 + +/* Port control flags */ +#define IB_CC_CCS_PC_SL_BASED 0x01 + +struct ib_cc_congestion_entry { + u8 ccti_increase; + __be16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_entry_shadow { + u8 ccti_increase; + u16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_setting_attr { + __be16 port_control; + __be16 control_map; + struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES]; +} __packed; + +struct ib_cc_congestion_setting_attr_shadow { + u16 port_control; + u16 control_map; + struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES]; +} __packed; + +#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1 +#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1 + +/* 64 Congestion Control table entries in a single MAD */ +#define IB_CCT_ENTRIES 64 +#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2) + +struct ib_cc_table_entry { + __be16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_entry_shadow { + u16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_attr { + __be16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +struct ib_cc_table_attr_shadow { + u16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +#define CC_TABLE_SHADOW_MAX \ + (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES) + +struct cc_table_shadow { + u16 ccti_last_entry; + struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; +} __packed; + +/* * The PortSamplesControl.CounterMasks field is an array of 3 bit fields * which specify the N'th counter's capabilities. See ch. 16.1.3.2. * We support 5 counters which only count the mandatory quantities. @@ -234,3 +427,5 @@ struct ib_pma_portcounters_cong { COUNTER_MASK(1, 2) | \ COUNTER_MASK(1, 3) | \ COUNTER_MASK(1, 4)) + +#endif /* _QIB_MAD_H */ diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 08944e2ee33..9bbb55347cc 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) return container_of(ibfmr, struct qib_fmr, ibfmr); } +static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + int rval = 0; + + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); + if (!mr->map[i]) + goto bail; + } + mr->mapsz = m; + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; +out: + return rval; +bail: + while (i) + kfree(mr->map[--i]); + rval = -ENOMEM; + goto out; +} + +static void deinit_qib_mregion(struct qib_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + + /** * qib_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) */ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) { - struct qib_ibdev *dev = to_idev(pd->device); - struct qib_mr *mr; + struct qib_mr *mr = NULL; struct ib_mr *ret; - unsigned long flags; + int rval; if (to_ipd(pd)->user) { ret = ERR_PTR(-EPERM); @@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) goto bail; } - mr->mr.access_flags = acc; - atomic_set(&mr->mr.refcount, 0); + rval = init_qib_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (!dev->dma_mr) - dev->dma_mr = &mr->mr; - spin_unlock_irqrestore(&dev->lk_table.lock, flags); + rval = qib_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + + mr->mr.access_flags = acc; ret = &mr->ibmr; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - return ret; + kfree(mr); + goto done; } -static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) +static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) { struct qib_mr *mr; - int m, i = 0; + int rval = -ENOMEM; + int m; /* Allocate struct plus pointers to first level page tables. */ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; - mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); if (!mr) - goto done; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); - if (!mr->mr.map[i]) - goto bail; - } - mr->mr.mapsz = m; - mr->mr.page_shift = 0; - mr->mr.max_segs = count; + goto bail; + rval = init_qib_mregion(&mr->mr, pd, count); + if (rval) + goto bail; /* * ib_reg_phys_mr() will initialize mr->ibmr except for * lkey and rkey. */ - if (!qib_alloc_lkey(lk_table, &mr->mr)) - goto bail; + rval = qib_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; mr->ibmr.lkey = mr->mr.lkey; mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; - atomic_set(&mr->mr.refcount, 0); - goto done; - +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - while (i) - kfree(mr->mr.map[--i]); kfree(mr); - mr = NULL; - -done: - return mr; + mr = ERR_PTR(rval); + goto done; } /** @@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, int n, m, i; struct ib_mr *ret; - mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); - if (mr == NULL) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(num_phys_buf, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; goto bail; } - mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; - mr->mr.length = 0; - mr->mr.offset = 0; mr->mr.access_flags = acc; - mr->umem = NULL; m = 0; n = 0; @@ -186,7 +221,6 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register - * @virt_addr: virtual address to use (from HCA's point of view) * @mr_access_flags: access flags for this memory region * @udata: unused by the QLogic_IB driver * @@ -198,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct qib_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + struct scatterlist *sg; + int n, m, entry; struct ib_mr *ret; if (length == 0) { @@ -212,18 +246,15 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; + n = umem->nmap; - mr = alloc_mr(n, &to_idev(pd->device)->lk_table); - if (!mr) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; ib_umem_release(umem); goto bail; } - mr->mr.pd = pd; mr->mr.user_base = start; mr->mr.iova = virt_addr; mr->mr.length = length; @@ -235,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { void *vaddr; - vaddr = page_address(sg_page(&chunk->page_list[i])); + vaddr = page_address(sg_page(sg)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; @@ -251,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m++; n = 0; } - } } ret = &mr->ibmr; @@ -271,21 +300,25 @@ bail: int qib_dereg_mr(struct ib_mr *ibmr) { struct qib_mr *mr = to_imr(ibmr); - struct qib_ibdev *dev = to_idev(ibmr->device); - int ret; - int i; - - ret = qib_free_lkey(dev, &mr->mr); - if (ret) - return ret; - - i = mr->mr.mapsz; - while (i) - kfree(mr->mr.map[--i]); + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&mr->mr); + + qib_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&mr->mr); if (mr->umem) ib_umem_release(mr->umem); kfree(mr); - return 0; +out: + return ret; } /* @@ -298,17 +331,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct qib_mr *mr; - mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); - if (mr == NULL) - return ERR_PTR(-ENOMEM); - - mr->mr.pd = pd; - mr->mr.user_base = 0; - mr->mr.iova = 0; - mr->mr.length = 0; - mr->mr.offset = 0; - mr->mr.access_flags = 0; - mr->umem = NULL; + mr = alloc_mr(max_page_list_len, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; return &mr->ibmr; } @@ -322,11 +347,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) if (size > PAGE_SIZE) return ERR_PTR(-EINVAL); - pl = kmalloc(sizeof *pl, GFP_KERNEL); + pl = kzalloc(sizeof *pl, GFP_KERNEL); if (!pl) return ERR_PTR(-ENOMEM); - pl->page_list = kmalloc(size, GFP_KERNEL); + pl->page_list = kzalloc(size, GFP_KERNEL); if (!pl->page_list) goto err_free; @@ -355,57 +380,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct qib_fmr *fmr; - int m, i = 0; + int m; struct ib_fmr *ret; + int rval = -ENOMEM; /* Allocate struct plus pointers to first level page tables. */ m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; - fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); if (!fmr) goto bail; - /* Allocate first level page tables. */ - for (; i < m; i++) { - fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], - GFP_KERNEL); - if (!fmr->mr.map[i]) - goto bail; - } - fmr->mr.mapsz = m; + rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; /* * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & * rkey. */ - if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) - goto bail; + rval = qib_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; fmr->ibfmr.rkey = fmr->mr.lkey; fmr->ibfmr.lkey = fmr->mr.lkey; /* * Resources are allocated but no valid mapping (RKEY can't be * used). */ - fmr->mr.pd = pd; - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - fmr->mr.offset = 0; fmr->mr.access_flags = mr_access_flags; fmr->mr.max_segs = fmr_attr->max_pages; fmr->mr.page_shift = fmr_attr->page_shift; - atomic_set(&fmr->mr.refcount, 0); ret = &fmr->ibfmr; - goto done; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&fmr->mr); bail: - while (i) - kfree(fmr->mr.map[--i]); kfree(fmr); - ret = ERR_PTR(-ENOMEM); - -done: - return ret; + ret = ERR_PTR(rval); + goto done; } /** @@ -428,7 +443,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, u32 ps; int ret; - if (atomic_read(&fmr->mr.refcount)) + i = atomic_read(&fmr->mr.refcount); + if (i > 2) return -EBUSY; if (list_len > fmr->mr.max_segs) { @@ -490,16 +506,27 @@ int qib_unmap_fmr(struct list_head *fmr_list) int qib_dealloc_fmr(struct ib_fmr *ibfmr) { struct qib_fmr *fmr = to_ifmr(ibfmr); - int ret; - int i; + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&fmr->mr); + qib_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} - ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); - if (ret) - return ret; +void mr_rcu_callback(struct rcu_head *list) +{ + struct qib_mregion *mr = container_of(list, struct qib_mregion, list); - i = fmr->mr.mapsz; - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - return 0; + complete(&mr->comp); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 0fde788e110..61a0046efb7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -51,8 +51,8 @@ * file calls, even though this violates some * expectations of harmlessness. */ -static int qib_tune_pcie_caps(struct qib_devdata *); -static int qib_tune_pcie_coalesce(struct qib_devdata *); +static void qib_tune_pcie_caps(struct qib_devdata *); +static void qib_tune_pcie_coalesce(struct qib_devdata *); /* * Do all the common PCIe setup and initialization. @@ -194,31 +194,50 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) } static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, - struct msix_entry *msix_entry) + struct qib_msix_entry *qib_msix_entry) { int ret; - u32 tabsize = 0; - u16 msix_flags; - - pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); - tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); - if (tabsize > *msixcnt) - tabsize = *msixcnt; - ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); - if (ret > 0) { - tabsize = ret; - ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); - } - if (ret) { - qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " - "falling back to INTx\n", tabsize, ret); - tabsize = 0; - } - *msixcnt = tabsize; - - if (ret) - qib_enable_intx(dd->pcidev); - + int nvec = *msixcnt; + struct msix_entry *msix_entry; + int i; + + ret = pci_msix_vec_count(dd->pcidev); + if (ret < 0) + goto do_intx; + + nvec = min(nvec, ret); + + /* We can't pass qib_msix_entry array to qib_msix_setup + * so use a dummy msix_entry array and copy the allocated + * irq back to the qib_msix_entry array. */ + msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) + goto do_intx; + + for (i = 0; i < nvec; i++) + msix_entry[i] = qib_msix_entry[i].msix; + + ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); + if (ret < 0) + goto free_msix_entry; + else + nvec = ret; + + for (i = 0; i < nvec; i++) + qib_msix_entry[i].msix = msix_entry[i]; + + kfree(msix_entry); + *msixcnt = nvec; + return; + +free_msix_entry: + kfree(msix_entry); + +do_intx: + qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, " + "falling back to INTx\n", nvec, ret); + *msixcnt = 0; + qib_enable_intx(dd->pcidev); } /** @@ -234,8 +253,9 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) ret = pci_enable_msi(pdev); if (ret) - qib_dev_err(dd, "pci_enable_msi failed: %d, " - "interrupts may not work\n", ret); + qib_dev_err(dd, + "pci_enable_msi failed: %d, interrupts may not work\n", + ret); /* continue even if it fails, we may still be OK... */ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, @@ -251,13 +271,12 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) } int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, - struct msix_entry *entry) + struct qib_msix_entry *entry) { u16 linkstat, speed; - int pos = 0, pose, ret = 1; + int pos = 0, ret = 1; - pose = pci_pcie_cap(dd->pcidev); - if (!pose) { + if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; @@ -265,12 +284,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, goto bail; } - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX); + pos = dd->pcidev->msix_cap; if (nent && *nent && pos) { qib_msix_setup(dd, pos, nent, entry); ret = 0; /* did it, either MSIx or INTx */ } else { - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (pos) ret = qib_msi_setup(dd, pos); else @@ -279,7 +298,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, if (!pos) qib_enable_intx(dd->pcidev); - pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat); + pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* * speed is bits 0-3, linkwidth is bits 4-8 * no defines for them in headers @@ -339,10 +358,10 @@ int qib_reinit_intr(struct qib_devdata *dd) if (!dd->msi_lo) goto bail; - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (!pos) { - qib_dev_err(dd, "Can't find MSI capability, " - "can't restore MSI settings\n"); + qib_dev_err(dd, + "Can't find MSI capability, can't restore MSI settings\n"); ret = 0; /* nothing special for MSIx, just MSI */ goto bail; @@ -408,7 +427,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, PCI_COMMAND, new); - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); + pos = pdev->msi_cap; if (pos) { /* then turn off MSI */ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); @@ -416,7 +435,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); } - pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + pos = pdev->msix_cap; if (pos) { /* then turn off MSIx */ pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); @@ -454,34 +473,10 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); r = pci_enable_device(dd->pcidev); if (r) - qib_dev_err(dd, "pci_enable_device failed after " - "reset: %d\n", r); -} - -/* code to adjust PCIe capabilities. */ - -static int fld2val(int wd, int mask) -{ - int lsbmask; - - if (!mask) - return 0; - wd &= mask; - lsbmask = mask ^ (mask & (mask - 1)); - wd /= lsbmask; - return wd; + qib_dev_err(dd, + "pci_enable_device failed after reset: %d\n", r); } -static int val2fld(int wd, int mask) -{ - int lsbmask; - - if (!mask) - return 0; - lsbmask = mask ^ (mask & (mask - 1)); - wd *= lsbmask; - return wd; -} static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); @@ -493,28 +488,26 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); * of these chipsets, with some BIOS settings, and enabling it on those * systems may result in the system crashing, and/or data corruption. */ -static int qib_tune_pcie_coalesce(struct qib_devdata *dd) +static void qib_tune_pcie_coalesce(struct qib_devdata *dd) { int r; struct pci_dev *parent; - int ppos; u16 devid; u32 mask, bits, val; if (!qib_pcie_coalesce) - return 0; + return; /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; if (parent->bus->parent) { qib_devinfo(dd->pcidev, "Parent not root\n"); - return 1; + return; } - ppos = pci_pcie_cap(parent); - if (!ppos) - return 1; + if (!pci_is_pcie(parent)) + return; if (parent->vendor != 0x8086) - return 1; + return; /* * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 @@ -547,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) mask = (3U << 24) | (7U << 10); } else { /* not one of the chipsets that we know about */ - return 1; + return; } pci_read_config_dword(parent, 0x48, &val); val &= ~mask; val |= bits; r = pci_write_config_dword(parent, 0x48, val); - return 0; } /* @@ -564,60 +556,44 @@ static int qib_pcie_caps; module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); -static int qib_tune_pcie_caps(struct qib_devdata *dd) +static void qib_tune_pcie_caps(struct qib_devdata *dd) { - int ret = 1; /* Assume the worst */ struct pci_dev *parent; - int ppos, epos; - u16 pcaps, pctl, ecaps, ectl; - int rc_sup, ep_sup; - int rc_cur, ep_cur; + u16 rc_mpss, rc_mps, ep_mpss, ep_mps; + u16 rc_mrrs, ep_mrrs, max_mrrs; /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; - if (parent->bus->parent) { + if (!pci_is_root_bus(parent->bus)) { qib_devinfo(dd->pcidev, "Parent not root\n"); - goto bail; + return; } - ppos = pci_pcie_cap(parent); - if (ppos) { - pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps); - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - } else - goto bail; + + if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) + return; + + rc_mpss = parent->pcie_mpss; + rc_mps = ffs(pcie_get_mps(parent)) - 8; /* Find out supported and configured values for endpoint (us) */ - epos = pci_pcie_cap(dd->pcidev); - if (epos) { - pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps); - pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl); - } else - goto bail; - ret = 0; - /* Find max payload supported by root, endpoint */ - rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); - ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD); - if (rc_sup > ep_sup) - rc_sup = ep_sup; + ep_mpss = dd->pcidev->pcie_mpss; + ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8; - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD); + /* Find max payload supported by root, endpoint */ + if (rc_mpss > ep_mpss) + rc_mpss = ep_mpss; /* If Supported greater than limit in module param, limit it */ - if (rc_sup > (qib_pcie_caps & 7)) - rc_sup = qib_pcie_caps & 7; + if (rc_mpss > (qib_pcie_caps & 7)) + rc_mpss = qib_pcie_caps & 7; /* If less than (allowed, supported), bump root payload */ - if (rc_sup > rc_cur) { - rc_cur = rc_sup; - pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | - val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); - pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + if (rc_mpss > rc_mps) { + rc_mps = rc_mpss; + pcie_set_mps(parent, 128 << rc_mps); } /* If less than (allowed, supported), bump endpoint payload */ - if (rc_sup > ep_cur) { - ep_cur = rc_sup; - ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | - val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); - pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + if (rc_mpss > ep_mps) { + ep_mps = rc_mpss; + pcie_set_mps(dd->pcidev, 128 << ep_mps); } /* @@ -625,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) * No field for max supported, but PCIe spec limits it to 4096, * which is code '5' (log2(4096) - 7) */ - rc_sup = 5; - if (rc_sup > ((qib_pcie_caps >> 4) & 7)) - rc_sup = (qib_pcie_caps >> 4) & 7; - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ); - - if (rc_sup > rc_cur) { - rc_cur = rc_sup; - pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | - val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); - pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + max_mrrs = 5; + if (max_mrrs > ((qib_pcie_caps >> 4) & 7)) + max_mrrs = (qib_pcie_caps >> 4) & 7; + + max_mrrs = 128 << max_mrrs; + rc_mrrs = pcie_get_readrq(parent); + ep_mrrs = pcie_get_readrq(dd->pcidev); + + if (max_mrrs > rc_mrrs) { + rc_mrrs = max_mrrs; + pcie_set_readrq(parent, rc_mrrs); } - if (rc_sup > ep_cur) { - ep_cur = rc_sup; - ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | - val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); - pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + if (max_mrrs > ep_mrrs) { + ep_mrrs = max_mrrs; + pcie_set_readrq(dd->pcidev, ep_mrrs); } -bail: - return ret; } /* End of PCIe capability tuning */ @@ -700,15 +672,16 @@ qib_pci_mmio_enabled(struct pci_dev *pdev) if (words == ~0ULL) ret = PCI_ERS_RESULT_NEED_RESET; } - qib_devinfo(pdev, "QIB mmio_enabled function called, " - "read wordscntr %Lx, returning %d\n", words, ret); + qib_devinfo(pdev, + "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n", + words, ret); return ret; } static pci_ers_result_t qib_pci_slot_reset(struct pci_dev *pdev) { - qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + qib_devinfo(pdev, "QIB slot_reset function called, ignored\n"); return PCI_ERS_RESULT_CAN_RECOVER; } @@ -733,7 +706,7 @@ qib_pci_resume(struct pci_dev *pdev) qib_init(dd, 1); /* same as re-init after reset */ } -struct pci_error_handlers qib_pci_err_handler = { +const struct pci_error_handlers qib_pci_err_handler = { .error_detected = qib_pci_error_detected, .mmio_enabled = qib_pci_mmio_enabled, .link_reset = qib_pci_link_reset, diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 7e7e16fbee9..7fcc150d603 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -35,6 +35,9 @@ #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/jhash.h> +#ifdef CONFIG_DEBUG_FS +#include <linux/seq_file.h> +#endif #include "qib.h" @@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) unsigned long flags; unsigned n = qpn_hash(dev, qp->ibqp.qp_num); - spin_lock_irqsave(&dev->qpt_lock, flags); atomic_inc(&qp->refcount); + spin_lock_irqsave(&dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) rcu_assign_pointer(ibp->qp0, qp); @@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); } /* @@ -247,30 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; + int removed = 1; spin_lock_irqsave(&dev->qpt_lock, flags); - if (ibp->qp0 == qp) { - atomic_dec(&qp->refcount); + if (rcu_dereference_protected(ibp->qp0, + lockdep_is_held(&dev->qpt_lock)) == qp) { rcu_assign_pointer(ibp->qp0, NULL); - } else if (ibp->qp1 == qp) { - atomic_dec(&qp->refcount); + } else if (rcu_dereference_protected(ibp->qp1, + lockdep_is_held(&dev->qpt_lock)) == qp) { rcu_assign_pointer(ibp->qp1, NULL); } else { - struct qib_qp *q, **qpp; + struct qib_qp *q; + struct qib_qp __rcu **qpp; + removed = 0; qpp = &dev->qp_table[n]; - for (; (q = *qpp) != NULL; qpp = &q->next) + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock))) != NULL; + qpp = &q->next) if (q == qp) { - atomic_dec(&qp->refcount); - rcu_assign_pointer(*qpp, qp->next); - qp->next = NULL; + rcu_assign_pointer(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))); + removed = 1; break; } } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); + if (removed) { + synchronize_rcu(); + atomic_dec(&qp->refcount); + } } /** @@ -302,10 +313,12 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) spin_lock_irqsave(&dev->qpt_lock, flags); for (n = 0; n < dev->qp_table_size; n++) { - qp = dev->qp_table[n]; + qp = rcu_dereference_protected(dev->qp_table[n], + lockdep_is_held(&dev->qpt_lock)); rcu_assign_pointer(dev->qp_table[n], NULL); - for (; qp; qp = qp->next) + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))) qp_inuse++; } spin_unlock_irqrestore(&dev->qpt_lock, flags); @@ -326,25 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { struct qib_qp *qp = NULL; + rcu_read_lock(); if (unlikely(qpn <= 1)) { - rcu_read_lock(); if (qpn == 0) qp = rcu_dereference(ibp->qp0); else qp = rcu_dereference(ibp->qp1); + if (qp) + atomic_inc(&qp->refcount); } else { struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; unsigned n = qpn_hash(dev, qpn); - rcu_read_lock(); - for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next) - if (qp->ibqp.qp_num == qpn) + for (qp = rcu_dereference(dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); break; + } } - if (qp) - if (unlikely(!atomic_inc_not_zero(&qp->refcount))) - qp = NULL; - rcu_read_unlock(); return qp; } @@ -406,18 +419,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) unsigned n; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (clr_sends) { while (qp->s_last != qp->s_head) { @@ -427,7 +431,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || @@ -437,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) qp->s_last = 0; } if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } } @@ -450,7 +454,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } } @@ -495,7 +499,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) if (!(qp->s_flags & QIB_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_tx) { @@ -581,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask)) + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) goto inval; if (attr_mask & IB_QP_AV) { @@ -981,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp *ret; if (init_attr->cap.max_send_sge > ib_qib_max_sges || - init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || + init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } @@ -1038,6 +1043,11 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + if (!qp->s_hdr) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); @@ -1159,6 +1169,7 @@ bail_ip: vfree(qp->r_rq.wq); free_qpn(&dev->qpn_table, qp->ibqp.qp_num); bail_qp: + kfree(qp->s_hdr); kfree(qp); bail_swq: vfree(swq); @@ -1214,6 +1225,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) else vfree(qp->r_rq.wq); vfree(qp->s_wq); + kfree(qp->s_hdr); kfree(qp); return 0; } @@ -1279,3 +1291,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth) } } } + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter { + struct qib_ibdev *dev; + struct qib_qp *qp; + int n; +}; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) +{ + struct qib_qp_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int qib_qp_iter_next(struct qib_qp_iter *iter) +{ + struct qib_ibdev *dev = iter->dev; + int n = iter->n; + int ret = 1; + struct qib_qp *pqp = iter->qp; + struct qib_qp *qp; + + rcu_read_lock(); + for (; n < dev->qp_table_size; n++) { + if (pqp) + qp = rcu_dereference(pqp->next); + else + qp = rcu_dereference(dev->qp_table[n]); + pqp = qp; + if (qp) { + if (iter->qp) + atomic_dec(&iter->qp->refcount); + atomic_inc(&qp->refcount); + rcu_read_unlock(); + iter->qp = qp; + iter->n = n; + return 0; + } + } + rcu_read_unlock(); + if (iter->qp) + atomic_dec(&iter->qp->refcount); + return ret; +} + +static const char * const qp_type_str[] = { + "SMI", "GSI", "RC", "UC", "UD", +}; + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +{ + struct qib_swqe *wqe; + struct qib_qp *qp = iter->qp; + + wqe = get_swqe_ptr(qp, qp->s_last); + seq_printf(s, + "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", + iter->n, + qp->ibqp.qp_num, + qp_type_str[qp->ibqp.qp_type], + qp->state, + wqe->wr.opcode, + qp->s_hdrwords, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + !list_empty(&qp->iowait), + qp->timeout, + wqe->ssn, + qp->s_lsn, + qp->s_last_psn, + qp->s_psn, qp->s_next_psn, + qp->s_sending_psn, qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head, qp->s_size, + qp->remote_qpn, + qp->remote_ah_attr.dlid); +} + +#endif diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 894afac26f3..2f2501890c4 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } /* FALLTHROUGH */ @@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, /* Copy SGE state in case we need to resend */ qp->s_rdma_mr = e->rdma_sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, qp->s_cur_sge = &qp->s_ack_rdma_sge; qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; @@ -244,9 +244,9 @@ int qib_make_rc_req(struct qib_qp *qp) int ret = 0; int delta; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * The lock is needed to synchronize between the sending tasklet, @@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp) qib_flush_wc(); qib_sendbuf_done(dd, pbufn); - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); goto done; queue_ack: @@ -1012,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1068,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1730,7 +1730,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } if (len != 0) { @@ -2024,11 +2024,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) break; @@ -2048,7 +2044,6 @@ send_last: wc.pkey_index = 0; wc.dlid_path_bits = 0; wc.port_num = 0; - wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -2117,7 +2112,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } reth = &ohdr->u.rc.reth; @@ -2189,7 +2184,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; @@ -2211,7 +2206,7 @@ send_last: (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b4b37e47321..4c07a8b34ff 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -110,7 +110,7 @@ bad_lkey: while (j) { struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); @@ -501,7 +501,7 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -525,7 +525,7 @@ again: sge->sge_length -= len; if (sge->sge_length == 0) { if (!release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -542,11 +542,7 @@ again: sqp->s_len -= len; } if (release) - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -688,17 +684,17 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = QIB_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; } lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -707,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } /** @@ -758,7 +755,7 @@ void qib_do_send(struct work_struct *work) * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ - if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) break; /* Record that s_hdr is empty. */ @@ -782,7 +779,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index ac065dd6b69..911205d3d5a 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -342,15 +342,17 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0, 0); if (ret < 0) - qib_dev_err(dd, "Failed checking TRIMDONE, chn %d" - " (%s)\n", chn, where); + qib_dev_err(dd, + "Failed checking TRIMDONE, chn %d (%s)\n", + chn, where); if (!(ret & 0x10)) { int probe; baduns |= (1 << chn); - qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)." - " (%s)\n", chn, ret, where); + qib_dev_err(dd, + "TRIMDONE cleared on chn %d (%02X). (%s)\n", + chn, ret, where); probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_PGUDP(0), 0, 0); qib_dev_err(dd, "probe is %d (%02X)\n", @@ -370,13 +372,13 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, /* Read CTRL reg for each channel to check TRIMDONE */ if (baduns & (1 << chn)) { qib_dev_err(dd, - "Reseting TRIMDONE on chn %d (%s)\n", + "Resetting TRIMDONE on chn %d (%s)\n", chn, where); ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); if (ret < 0) - qib_dev_err(dd, "Failed re-setting " - "TRIMDONE, chn %d (%s)\n", + qib_dev_err(dd, + "Failed re-setting TRIMDONE, chn %d (%s)\n", chn, where); } } @@ -1144,10 +1146,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "pre-read failed: elt %d," - " addr 0x%X, chnl %d\n", - (sloc & 0xF), - (sloc >> 9) & 0x3f, chnl); + qib_dev_err(dd, + "pre-read failed: elt %d, addr 0x%X, chnl %d\n", + (sloc & 0xF), + (sloc >> 9) & 0x3f, chnl); return ret; } val = (ret & ~mask) | (val & mask); @@ -1157,9 +1159,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Global WR failed: elt %d," - " addr 0x%X, val %02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, val); + qib_dev_err(dd, + "Global WR failed: elt %d, addr 0x%X, val %02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, val); } return ret; } @@ -1173,11 +1175,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Write failed: elt %d," - " addr 0x%X, chnl %d, val 0x%02X," - " mask 0x%02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, - val & 0xFF, mask & 0xFF); + qib_dev_err(dd, + "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, + val & 0xFF, mask & 0xFF); break; } } diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 12a9604310d..c6d6a54d2e1 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -276,8 +277,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) GFP_KERNEL); if (!ppd->sdma_descq) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor " - "FIFO memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA descriptor FIFO memory\n"); goto bail; } @@ -285,8 +286,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); if (!ppd->sdma_head_dma) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA " - "head memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA head memory\n"); goto cleanup_descq; } ppd->sdma_head_dma[0] = 0; @@ -422,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd) void __qib_sdma_intr(struct qib_pportdata *ppd) { - if (__qib_sdma_running(ppd)) + if (__qib_sdma_running(ppd)) { qib_sdma_make_progress(ppd); + if (!list_empty(&ppd->sdma_userpending)) + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + } } int qib_setup_sdma(struct qib_pportdata *ppd) @@ -451,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd) ppd->sdma_descq_removed = 0; ppd->sdma_descq_added = 0; + ppd->sdma_intrequest = 0; + INIT_LIST_HEAD(&ppd->sdma_userpending); + INIT_LIST_HEAD(&ppd->sdma_activelist); tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, @@ -707,6 +714,62 @@ unlock: return ret; } +/* + * sdma_lock should be acquired before calling this routine + */ +void dump_sdma_state(struct qib_pportdata *ppd) +{ + struct qib_sdma_desc *descq; + struct qib_sdma_txreq *txp, *txpnext; + __le64 *descqp; + u64 desc[2]; + u64 addr; + u16 gen, dwlen, dwoffset; + u16 head, tail, cnt; + + head = ppd->sdma_descq_head; + tail = ppd->sdma_descq_tail; + cnt = qib_sdma_descq_freecnt(ppd); + descq = ppd->sdma_descq; + + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_head: %u\n", head); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_tail: %u\n", tail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdma_descq_freecnt: %u\n", cnt); + + /* print info for each entry in the descriptor queue */ + while (head != tail) { + char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 }; + + descqp = &descq[head].qw[0]; + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + flags[0] = (desc[0] & 1<<15) ? 'I' : '-'; + flags[1] = (desc[0] & 1<<14) ? 'L' : 'S'; + flags[2] = (desc[0] & 1<<13) ? 'H' : '-'; + flags[3] = (desc[0] & 1<<12) ? 'F' : '-'; + flags[4] = (desc[0] & 1<<11) ? 'L' : '-'; + addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL); + gen = (desc[0] >> 30) & 3ULL; + dwlen = (desc[0] >> 14) & (0x7ffULL << 2); + dwoffset = (desc[0] & 0x7ffULL) << 2; + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n", + head, flags, addr, gen, dwlen, dwoffset); + if (++head == ppd->sdma_descq_cnt) + head = 0; + } + + /* print dma descriptor indices from the TX requests */ + list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist, + list) + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n", + txp->start_idx, txp->next_descq_idx); +} + void qib_sdma_process_event(struct qib_pportdata *ppd, enum qib_sdma_events event) { diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index dae51604cfc..3c8e4e3caca 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,41 +34,7 @@ #include <linux/ctype.h> #include "qib.h" - -/** - * qib_parse_ushort - parse an unsigned short value in an arbitrary base - * @str: the string containing the number - * @valp: where to put the result - * - * Returns the number of bytes consumed, or negative value on error. - */ -static int qib_parse_ushort(const char *str, unsigned short *valp) -{ - unsigned long val; - char *end; - int ret; - - if (!isdigit(str[0])) { - ret = -EINVAL; - goto bail; - } - - val = simple_strtoul(str, &end, 0); - - if (val > 0xffff) { - ret = -EINVAL; - goto bail; - } - - *valp = val; - - ret = end + 1 - str; - if (ret == 0) - ret = -EINVAL; - -bail: - return ret; -} +#include "qib_mad.h" /* start of per-port functions */ /* @@ -90,7 +57,11 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); + ret = kstrtou16(buf, 0, &val); + if (ret) { + qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + return ret; + } /* * Set the "intentional" heartbeat enable per either of @@ -99,10 +70,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, * because entering loopback mode overrides it and automatically * disables heartbeat. */ - if (ret >= 0) - ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); - if (ret < 0) - qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); return ret < 0 ? ret : count; } @@ -126,12 +94,14 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); - if (ret > 0) - qib_set_led_override(ppd, val); - else + ret = kstrtou16(buf, 0, &val); + if (ret) { qib_dev_err(dd, "attempt to set invalid LED override\n"); - return ret < 0 ? ret : count; + return ret; + } + + qib_set_led_override(ppd, val); + return count; } static ssize_t show_status(struct qib_pportdata *ppd, char *buf) @@ -231,6 +201,98 @@ static struct attribute *port_default_attributes[] = { NULL }; +/* + * Start of per-port congestion control structures and support code + */ + +/* + * Congestion control table size followed by table entries + */ +static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->ccti_entries_shadow) + return -EINVAL; + + ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow) + + sizeof(__be16); + + if (pos > ret) + return -EINVAL; + + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->ccti_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static void qib_port_release(struct kobject *kobj) +{ + /* nothing to do since memory is freed by qib_free_devdata() */ +} + +static struct kobj_type qib_port_cc_ktype = { + .release = qib_port_release, +}; + +static struct bin_attribute cc_table_bin_attr = { + .attr = {.name = "cc_table_bin", .mode = 0444}, + .read = read_cc_table_bin, + .size = PAGE_SIZE, +}; + +/* + * Congestion settings: port control, control map and an array of 16 + * entries for the congestion entries - increase, timer, event log + * trigger threshold and the minimum injection rate delay. + */ +static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return -EINVAL; + + ret = sizeof(struct ib_cc_congestion_setting_attr_shadow); + + if (pos > ret) + return -EINVAL; + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->congestion_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static struct bin_attribute cc_setting_bin_attr = { + .attr = {.name = "cc_settings_bin", .mode = 0444}, + .read = read_cc_setting_bin, + .size = PAGE_SIZE, +}; + + static ssize_t qib_portattr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -253,10 +315,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj, return pattr->store(ppd, buf, len); } -static void qib_port_release(struct kobject *kobj) -{ - /* nothing to do since memory is freed by qib_free_devdata() */ -} static const struct sysfs_ops qib_port_ops = { .show = qib_portattr_show, @@ -411,12 +469,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - char *endp; - long val = simple_strtol(buf, &endp, 0); - - if (val < 0 || endp == buf) - return -EINVAL; + u32 val; + int ret; + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; *(u32 *)((char *) qibp + dattr->counter) = val; return size; } @@ -503,8 +561,11 @@ static ssize_t show_nctxts(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts - - dd->first_user_ctxt); + /* The calculation below deals with a special case where + * cfgctxts is set to 1 on a single-port board. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static ssize_t show_nfreectxts(struct device *device, @@ -646,8 +707,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, int ret; if (!port_num || port_num > dd->num_pports) { - qib_dev_err(dd, "Skipping infiniband class with " - "invalid port %u\n", port_num); + qib_dev_err(dd, + "Skipping infiniband class with invalid port %u\n", + port_num); ret = -ENODEV; goto bail; } @@ -656,8 +718,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, "linkcontrol"); if (ret) { - qib_dev_err(dd, "Skipping linkcontrol sysfs info, " - "(err %d) port %u\n", ret, port_num); + qib_dev_err(dd, + "Skipping linkcontrol sysfs info, (err %d) port %u\n", + ret, port_num); goto bail; } kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); @@ -665,26 +728,70 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, "sl2vl"); if (ret) { - qib_dev_err(dd, "Skipping sl2vl sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_sl; + qib_dev_err(dd, + "Skipping sl2vl sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_link; } kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, "diag_counters"); if (ret) { - qib_dev_err(dd, "Skipping diag_counters sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_diagc; + qib_dev_err(dd, + "Skipping diag_counters sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_sl; } kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return 0; + + ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype, + kobj, "CCMgtA"); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_diagc; + } + + kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc; + } + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control table sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc_entry_bin; + } + + qib_devinfo(dd->pcidev, + "IB%u: Congestion Control Agent enabled for port %d\n", + dd->unit, port_num); + return 0; +bail_cc_entry_bin: + sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr); +bail_cc: + kobject_put(&ppd->pport_cc_kobj); bail_diagc: - kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->diagc_kobj); bail_sl: + kobject_put(&ppd->sl2vl_kobj); +bail_link: kobject_put(&ppd->pport_kobj); bail: return ret; @@ -701,10 +808,14 @@ int qib_verbs_register_sysfs(struct qib_devdata *dd) for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { ret = device_create_file(&dev->dev, qib_attributes[i]); if (ret) - return ret; + goto bail; } return 0; +bail: + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) + device_remove_file(&dev->dev, qib_attributes[i]); + return ret; } /* @@ -717,7 +828,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; - kobject_put(&ppd->pport_kobj); + if (qib_cc_table_size && + ppd->congestion_entries_shadow) { + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + kobject_put(&ppd->pport_cc_kobj); + } kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->pport_kobj); } } diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index ddde72e11ed..647f7beb1b0 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -449,8 +450,9 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, goto failed_write; ret = qib_twsi_wr(dd, addr, 0); if (ret) { - qib_dev_err(dd, "Failed to write interface" - " write addr %02X\n", addr); + qib_dev_err(dd, + "Failed to write interface write addr %02X\n", + addr); goto failed_write; } } diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 1bf626c4017..31d3561400a 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -295,6 +295,7 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, nbufs = last - first + 1; /* number in range to check */ if (dd->upd_pio_shadow) { +update_shadow: /* * Minor optimization. If we had no buffers on last call, * start out by doing the update; continue and do scan even @@ -304,37 +305,39 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, updated++; } i = first; -rescan: /* * While test_and_set_bit() is atomic, we do that and then the * change_bit(), and the pair is not. See if this is the cause * of the remaining armlaunch errors. */ spin_lock_irqsave(&dd->pioavail_lock, flags); + if (dd->last_pio >= first && dd->last_pio <= last) + i = dd->last_pio + 1; + if (!first) + /* adjust to min possible */ + nbufs = last - dd->min_kernel_pio + 1; for (j = 0; j < nbufs; j++, i++) { if (i > last) - i = first; + i = !first ? dd->min_kernel_pio : first; if (__test_and_set_bit((2 * i) + 1, shadow)) continue; /* flip generation bit */ __change_bit(2 * i, shadow); /* remember that the buffer can be written to now */ __set_bit(i, dd->pio_writing); + if (!first && first != last) /* first == last on VL15, avoid */ + dd->last_pio = i; break; } spin_unlock_irqrestore(&dd->pioavail_lock, flags); if (j == nbufs) { - if (!updated) { + if (!updated) /* * First time through; shadow exhausted, but may be * buffers available, try an update and then rescan. */ - update_send_bufs(dd); - updated++; - i = first; - goto rescan; - } + goto update_shadow; no_send_bufs(dd); buf = NULL; } else { @@ -422,14 +425,20 @@ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + start, dd->pioavailshadow); __set_bit(start, dd->pioavailkernel); + if ((start >> 1) < dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; } else { __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT, dd->pioavailshadow); __clear_bit(start, dd->pioavailkernel); + if ((start >> 1) > dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; } start += 2; } + if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1) + dd->last_pio = dd->min_kernel_pio - 1; spin_unlock_irqrestore(&dd->pioavail_lock, flags); dd->f_txchk_change(dd, ostart, len, avail, rcd); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 847e7afdfd9..aa3a8035bb6 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -72,9 +72,9 @@ int qib_make_uc_req(struct qib_qp *qp) goto done; } - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -281,11 +281,7 @@ inv: set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -403,14 +399,9 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto rewind; wc.opcode = IB_WC_RECV; -last_imm: qib_copy_sge(&qp->r_sge, data, tlen, 0); - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); +last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; @@ -422,7 +413,6 @@ last_imm: wc.pkey_index = 0; wc.dlid_path_bits = 0; wc.port_num = 0; - wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -494,13 +484,7 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr-> - refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); else { ret = qib_get_rwqe(qp, 1); if (ret < 0) @@ -510,6 +494,8 @@ rdma_last_imm: } wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -525,11 +511,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); break; default: diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 828609fa4d2..aaf7039f8ed 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) struct qib_sge *sge; struct ib_wc wc; u32 length; + enum ib_qp_type sqptype, dqptype; qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; } - if (qp->ibqp.qp_type != sqp->ibqp.qp_type || + + sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : sqp->ibqp.qp_type; + dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : qp->ibqp.qp_type; + + if (dqptype != sqptype || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { ibp->n_pkt_drops++; goto drop; @@ -194,11 +201,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) } length -= len; } - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -277,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp) ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) - ibp->n_multicast_xmit++; + this_cpu_inc(ibp->pmastats->n_multicast_xmit); else - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } else { - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { /* @@ -321,11 +324,11 @@ int qib_make_ud_req(struct qib_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, &ah_attr->grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; - ohdr = &qp->s_hdr.u.l.oth; + ohdr = &qp->s_hdr->u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -333,7 +336,7 @@ int qib_make_ud_req(struct qib_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = QIB_LRH_BTH; - ohdr = &qp->s_hdr.u.oth; + ohdr = &qp->s_hdr->u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -346,15 +349,15 @@ int qib_make_ud_req(struct qib_qp *qp) lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ else lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - qp->s_hdr.lrh[3] = cpu_to_be16(lid); + qp->s_hdr->lrh[3] = cpu_to_be16(lid); } else - qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; + qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; @@ -556,11 +559,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 82442085cbe..d2806cae234 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -52,21 +52,48 @@ /* attempt to drain the queue for 5secs */ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + struct qib_user_sdma_pkt { - u8 naddr; /* dimension of addr (1..3) ... */ + struct list_head list; /* list element */ + + u8 tiddma; /* if this is NEW tid-sdma */ + u8 largepkt; /* this is large pkt from kmalloc */ + u16 frag_size; /* frag size used by PSM */ + u16 index; /* last header index or push index */ + u16 naddr; /* dimension of addr (1..3) ... */ + u16 addrlimit; /* addr array size */ + u16 tidsmidx; /* current tidsm index */ + u16 tidsmcount; /* tidsm array item count */ + u16 payload_size; /* payload size so far for header */ + u32 bytes_togo; /* bytes for processing */ u32 counter; /* sdma pkts queued counter for this entry */ + struct qib_tid_session_member *tidsm; /* tid session member array */ + struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ u64 added; /* global descq number of entries */ struct { - u32 offset; /* offset for kvaddr, addr */ - u32 length; /* length in page */ - u8 put_page; /* should we put_page? */ - u8 dma_mapped; /* is page dma_mapped? */ + u16 offset; /* offset for kvaddr, addr */ + u16 length; /* length in page */ + u16 first_desc; /* first desc */ + u16 last_desc; /* last desc */ + u16 put_page; /* should we put_page? */ + u16 dma_mapped; /* is page dma_mapped? */ + u16 dma_length; /* for dma_unmap_page() */ + u16 padding; struct page *page; /* may be NULL (coherent mem) */ void *kvaddr; /* FIXME: only for pio hack */ dma_addr_t addr; } addr[4]; /* max pages, any more and we coalesce */ - struct list_head list; /* list element */ }; struct qib_user_sdma_queue { @@ -77,6 +104,12 @@ struct qib_user_sdma_queue { */ struct list_head sent; + /* + * Because above list will be accessed by both process and + * signal handler, we need a spinlock for it. + */ + spinlock_t sent_lock ____cacheline_aligned_in_smp; + /* headers with expected length are allocated from here... */ char header_cache_name[64]; struct dma_pool *header_cache; @@ -88,27 +121,83 @@ struct qib_user_sdma_queue { /* as packets go on the queued queue, they are counted... */ u32 counter; u32 sent_counter; + /* pending packets, not sending yet */ + u32 num_pending; + /* sending packets, not complete yet */ + u32 num_sending; + /* global descq number of entry of last sending packet */ + u64 added; /* dma page table */ struct rb_root dma_pages_root; + struct qib_user_sdma_rb_node *sdma_rb_node; + /* protect everything above... */ struct mutex lock; }; +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + struct qib_user_sdma_queue * qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) { struct qib_user_sdma_queue *pq = kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; if (!pq) goto done; pq->counter = 0; pq->sent_counter = 0; - INIT_LIST_HEAD(&pq->sent); + pq->num_pending = 0; + pq->num_sending = 0; + pq->added = 0; + pq->sdma_rb_node = NULL; + INIT_LIST_HEAD(&pq->sent); + spin_lock_init(&pq->sent_lock); mutex_init(&pq->lock); snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), @@ -131,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->dma_pages_root = RB_ROOT; + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + goto done; +err_rb: + dma_pool_destroy(pq->header_cache); err_slab: kmem_cache_destroy(pq->pkt_slab); err_kfree: @@ -144,34 +255,310 @@ done: } static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, - int i, size_t offset, size_t len, - int put_page, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) + int i, u16 offset, u16 len, + u16 first_desc, u16 last_desc, + u16 put_page, u16 dma_mapped, + struct page *page, void *kvaddr, + dma_addr_t dma_addr, u16 dma_length) { pkt->addr[i].offset = offset; pkt->addr[i].length = len; + pkt->addr[i].first_desc = first_desc; + pkt->addr[i].last_desc = last_desc; pkt->addr[i].put_page = put_page; pkt->addr[i].dma_mapped = dma_mapped; pkt->addr[i].page = page; pkt->addr[i].kvaddr = kvaddr; pkt->addr[i].addr = dma_addr; + pkt->addr[i].dma_length = dma_length; +} + +static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + size_t len, dma_addr_t *dma_addr) +{ + void *hdr; + + if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) + hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + dma_addr); + else + hdr = NULL; + + if (!hdr) { + hdr = kmalloc(len, GFP_KERNEL); + if (!hdr) + return NULL; + + *dma_addr = 0; + } + + return hdr; } -static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt, - u32 counter, size_t offset, - size_t len, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) +static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + struct page *page, u16 put, + u16 offset, u16 len, void *kvaddr) { - pkt->naddr = 1; - pkt->counter = counter; - qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, - kvaddr, dma_addr); + __le16 *pbc16; + void *pbcvaddr; + struct qib_message_header *hdr; + u16 newlen, pbclen, lastdesc, dma_mapped; + u32 vcto; + union qib_seqnum seqnum; + dma_addr_t pbcdaddr; + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + page, offset, len, DMA_TO_DEVICE); + int ret = 0; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + /* + * dma mapping error, pkt has not managed + * this page yet, return the page here so + * the caller can ignore this page. + */ + if (put) { + put_page(page); + } else { + /* coalesce case */ + kunmap(page); + __free_page(page); + } + ret = -ENOMEM; + goto done; + } + offset = 0; + dma_mapped = 1; + + +next_fragment: + + /* + * In tid-sdma, the transfer length is restricted by + * receiver side current tid page length. + */ + if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length) + newlen = pkt->tidsm[pkt->tidsmidx].length; + else + newlen = len; + + /* + * Then the transfer length is restricted by MTU. + * the last descriptor flag is determined by: + * 1. the current packet is at frag size length. + * 2. the current tid page is done if tid-sdma. + * 3. there is no more byte togo if sdma. + */ + lastdesc = 0; + if ((pkt->payload_size + newlen) >= pkt->frag_size) { + newlen = pkt->frag_size - pkt->payload_size; + lastdesc = 1; + } else if (pkt->tiddma) { + if (newlen == pkt->tidsm[pkt->tidsmidx].length) + lastdesc = 1; + } else { + if (newlen == pkt->bytes_togo) + lastdesc = 1; + } + + /* fill the next fragment in this page */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + offset, newlen, /* offset, len */ + 0, lastdesc, /* first last desc */ + put, dma_mapped, /* put page, dma mapped */ + page, kvaddr, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->bytes_togo -= newlen; + pkt->payload_size += newlen; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* If there is no more byte togo. (lastdesc==1) */ + if (pkt->bytes_togo == 0) { + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + goto done; + } + + /* If tid-sdma, advance tid info. */ + if (pkt->tiddma) { + pkt->tidsm[pkt->tidsmidx].length -= newlen; + if (pkt->tidsm[pkt->tidsmidx].length) { + pkt->tidsm[pkt->tidsmidx].offset += newlen; + } else { + pkt->tidsmidx++; + if (pkt->tidsmidx == pkt->tidsmcount) { + ret = -EFAULT; + goto done; + } + } + } + + /* + * If this is NOT the last descriptor. (newlen==len) + * the current packet is not done yet, but the current + * send side page is done. + */ + if (lastdesc == 0) + goto done; + + /* + * If running this driver under PSM with message size + * fitting into one transfer unit, it is not possible + * to pass this line. otherwise, it is a buggggg. + */ + + /* + * Since the current packet is done, and there are more + * bytes togo, we need to create a new sdma header, copying + * from previous sdma header and modify both. + */ + pbclen = pkt->addr[pkt->index].length; + pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr); + if (!pbcvaddr) { + ret = -ENOMEM; + goto done; + } + /* Copy the previous sdma header to new sdma header */ + pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr; + memcpy(pbcvaddr, pbc16, pbclen); + + /* Modify the previous sdma header */ + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* turn on the header suppression */ + hdr->iph.pkt_flags = + cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2); + /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */ + hdr->flags &= ~(0x04|0x20); + } else { + /* turn off extra bytes: 20-21 bits */ + hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF); + /* turn off ACK_REQ: 0x04 */ + hdr->flags &= ~(0x04); + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + /* Modify the new sdma header */ + pbc16 = (__le16 *)pbcvaddr; + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* Set new tid and offset for new sdma header */ + hdr->iph.ver_ctxt_tid_offset = cpu_to_le32( + (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) + + (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) + + (pkt->tidsm[pkt->tidsmidx].offset>>2)); + } else { + /* Middle protocol new packet offset */ + hdr->uwords[2] += pkt->payload_size; + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* Next sequence number in new sdma header */ + seqnum.val = be32_to_cpu(hdr->bth[2]); + if (pkt->tiddma) + seqnum.seq++; + else + seqnum.pkt++; + hdr->bth[2] = cpu_to_be32(seqnum.val); + + /* Init new sdma header. */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + 0, pbclen, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbcvaddr, /* struct page, virt addr */ + pbcdaddr, pbclen); /* dma addr, dma length */ + pkt->index = pkt->naddr; + pkt->payload_size = 0; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* Prepare for next fragment in this page */ + if (newlen != len) { + if (dma_mapped) { + put = 0; + dma_mapped = 0; + page = NULL; + kvaddr = NULL; + } + len -= newlen; + offset += newlen; + + goto next_fragment; + } + +done: + return ret; } /* we've too many pages in the iovec, coalesce to a single page */ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, const struct iovec *iov, unsigned long niov) @@ -182,7 +569,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, char *mpage; int i; int len = 0; - dma_addr_t dma_addr; if (!page) { ret = -ENOMEM; @@ -205,17 +591,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, len += iov[i].iov_len; } - dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_unmap; - } - - qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, - dma_addr); - pkt->naddr = 2; - + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + page, 0, 0, len, mpage_save); goto done; free_unmap: @@ -238,16 +615,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* - * Truncate length to page boundary. - */ -static int qib_user_sdma_page_length(unsigned long addr, unsigned long len) -{ - const unsigned long offset = addr & ~PAGE_MASK; - - return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; -} - static void qib_user_sdma_free_pkt_frag(struct device *dev, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, @@ -256,10 +623,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, const int i = frag; if (pkt->addr[i].page) { + /* only user data has page */ if (pkt->addr[i].dma_mapped) dma_unmap_page(dev, pkt->addr[i].addr, - pkt->addr[i].length, + pkt->addr[i].dma_length, DMA_TO_DEVICE); if (pkt->addr[i].kvaddr) @@ -269,55 +637,80 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, put_page(pkt->addr[i].page); else __free_page(pkt->addr[i].page); - } else if (pkt->addr[i].kvaddr) - /* free coherent mem from cache... */ - dma_pool_free(pq->header_cache, + } else if (pkt->addr[i].kvaddr) { + /* for headers */ + if (pkt->addr[i].dma_mapped) { + /* from kmalloc & dma mapped */ + dma_unmap_single(dev, + pkt->addr[i].addr, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + kfree(pkt->addr[i].kvaddr); + } else if (pkt->addr[i].addr) { + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, pkt->addr[i].kvaddr, pkt->addr[i].addr); + } else { + /* from kmalloc but not dma mapped */ + kfree(pkt->addr[i].kvaddr); + } + } } /* return number of pages pinned... */ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, unsigned long addr, int tlen, int npages) { - struct page *pages[2]; - int j; - int ret; - - ret = get_user_pages(current, current->mm, addr, - npages, 0, 1, pages, NULL); - - if (ret != npages) { - int i; - - for (i = 0; i < ret; i++) - put_page(pages[i]); - - ret = -ENOMEM; - goto done; - } + struct page *pages[8]; + int i, j; + int ret = 0; - for (j = 0; j < npages; j++) { - /* map the pages... */ - const int flen = qib_user_sdma_page_length(addr, tlen); - dma_addr_t dma_addr = - dma_map_page(&dd->pcidev->dev, - pages[j], 0, flen, DMA_TO_DEVICE); - unsigned long fofs = addr & ~PAGE_MASK; + while (npages) { + if (npages > 8) + j = 8; + else + j = npages; - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = get_user_pages_fast(addr, j, 0, pages); + if (ret != j) { + i = 0; + j = ret; ret = -ENOMEM; - goto done; + goto free_pages; } - qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, - pages[j], kmap(pages[j]), dma_addr); + for (i = 0; i < j; i++) { + /* map the pages... */ + unsigned long fofs = addr & ~PAGE_MASK; + int flen = ((fofs + tlen) > PAGE_SIZE) ? + (PAGE_SIZE - fofs) : tlen; + + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + pages[i], 1, fofs, flen, NULL); + if (ret < 0) { + /* current page has beed taken + * care of inside above call. + */ + i++; + goto free_pages; + } + + addr += flen; + tlen -= flen; + } - pkt->naddr++; - addr += flen; - tlen -= flen; + npages -= j; } + goto done; + + /* if error, return all pages not managed by pkt */ +free_pages: + while (i < j) + put_page(pages[i++]); + done: return ret; } @@ -335,7 +728,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, const int npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; - ret = qib_user_sdma_pin_pages(dd, pkt, addr, + ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, iov[idx].iov_len, npages); if (ret < 0) goto free_pkt; @@ -344,9 +737,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, goto done; free_pkt: - for (idx = 0; idx < pkt->naddr; idx++) + /* we need to ignore the first entry here */ + for (idx = 1; idx < pkt->naddr; idx++) qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + /* need to dma unmap the first entry, this is to restore to + * the original state so that caller can free the memory in + * error condition. Caller does not know if dma mapped or not*/ + if (pkt->addr[0].dma_mapped) { + dma_unmap_single(&dd->pcidev->dev, + pkt->addr[0].addr, + pkt->addr[0].dma_length, + DMA_TO_DEVICE); + pkt->addr[0].addr = 0; + pkt->addr[0].dma_mapped = 0; + } + done: return ret; } @@ -359,8 +765,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd, { int ret = 0; - if (npages >= ARRAY_SIZE(pkt->addr)) - ret = qib_user_sdma_coalesce(dd, pkt, iov, niov); + if (pkt->frag_size == pkt->bytes_togo && + npages >= ARRAY_SIZE(pkt->addr)) + ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov); else ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); @@ -380,7 +787,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, for (i = 0; i < pkt->naddr; i++) qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); - kmem_cache_free(pq->pkt_slab, pkt); + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); } INIT_LIST_HEAD(list); } @@ -393,63 +803,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, * as, if there is an error we clean it... */ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, - struct list_head *list, const struct iovec *iov, unsigned long niov, - int maxpkts) + struct list_head *list, + int *maxpkts, int *ndesc) { unsigned long idx = 0; int ret = 0; int npkts = 0; - struct page *page = NULL; __le32 *pbc; dma_addr_t dma_addr; struct qib_user_sdma_pkt *pkt = NULL; size_t len; size_t nw; u32 counter = pq->counter; - int dma_mapped = 0; + u16 frag_size; - while (idx < niov && npkts < maxpkts) { + while (idx < niov && npkts < *maxpkts) { const unsigned long addr = (unsigned long) iov[idx].iov_base; const unsigned long idx_save = idx; unsigned pktnw; unsigned pktnwc; int nfrags = 0; int npages = 0; + int bytes_togo = 0; + int tiddma = 0; int cfur; - dma_mapped = 0; len = iov[idx].iov_len; nw = len >> 2; - page = NULL; - - pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); - if (!pkt) { - ret = -ENOMEM; - goto free_list; - } if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || len > PAGE_SIZE || len & 3 || addr & 3) { ret = -EINVAL; - goto free_pkt; + goto free_list; } - if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) - pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, - &dma_addr); - else - pbc = NULL; - + pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr); if (!pbc) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto free_pkt; - } - pbc = kmap(page); + ret = -ENOMEM; + goto free_list; } cfur = copy_from_user(pbc, iov[idx].iov_base, len); @@ -474,8 +869,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, * we can verify that the packet is consistent with the * iovec lengths. */ - pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK; - if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { + pktnw = le32_to_cpu(*pbc) & 0xFFFF; + if (pktnw < pktnwc) { ret = -EINVAL; goto free_pbc; } @@ -486,17 +881,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, const unsigned long faddr = (unsigned long) iov[idx].iov_base; - if (slen & 3 || faddr & 3 || !slen || - slen > PAGE_SIZE) { + if (slen & 3 || faddr & 3 || !slen) { ret = -EINVAL; goto free_pbc; } - npages++; - if ((faddr & PAGE_MASK) != - ((faddr + slen - 1) & PAGE_MASK)) - npages++; + npages += qib_user_sdma_num_pages(&iov[idx]); + bytes_togo += slen; pktnwc += slen >> 2; idx++; nfrags++; @@ -507,48 +899,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, goto free_pbc; } - if (page) { - dma_addr = dma_map_page(&dd->pcidev->dev, - page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF; + if (((frag_size ? frag_size : bytes_togo) + len) > + ppd->ibmaxlen) { + ret = -EINVAL; + goto free_pbc; + } + + if (frag_size) { + int pktsize, tidsmsize, n; + + n = npages*((2*PAGE_SIZE/frag_size)+1); + pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; + + /* + * Determine if this is tid-sdma or just sdma. + */ + tiddma = (((le32_to_cpu(pbc[7])>> + QLOGIC_IB_I_TID_SHIFT)& + QLOGIC_IB_I_TID_MASK) != + QLOGIC_IB_I_TID_MASK); + + if (tiddma) + tidsmsize = iov[idx].iov_len; + else + tidsmsize = 0; + + pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (!pkt) { ret = -ENOMEM; goto free_pbc; } + pkt->largepkt = 1; + pkt->frag_size = frag_size; + pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + + if (tiddma) { + char *tidsm = (char *)pkt + pktsize; + cfur = copy_from_user(tidsm, + iov[idx].iov_base, tidsmsize); + if (cfur) { + ret = -EFAULT; + goto free_pkt; + } + pkt->tidsm = + (struct qib_tid_session_member *)tidsm; + pkt->tidsmcount = tidsmsize/ + sizeof(struct qib_tid_session_member); + pkt->tidsmidx = 0; + idx++; + } - dma_mapped = 1; + /* + * pbc 'fill1' field is borrowed to pass frag size, + * we need to clear it after picking frag size, the + * hardware requires this field to be zero. + */ + *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); + } else { + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_pbc; + } + pkt->largepkt = 0; + pkt->frag_size = bytes_togo; + pkt->addrlimit = ARRAY_SIZE(pkt->addr); } - - qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, - page, pbc, dma_addr); + pkt->bytes_togo = bytes_togo; + pkt->payload_size = 0; + pkt->counter = counter; + pkt->tiddma = tiddma; + + /* setup the first header */ + qib_user_sdma_init_frag(pkt, 0, /* index */ + 0, len, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbc, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->index = 0; + pkt->naddr = 1; if (nfrags) { ret = qib_user_sdma_init_payload(dd, pq, pkt, iov + idx_save + 1, nfrags, npages); if (ret < 0) - goto free_pbc_dma; + goto free_pkt; + } else { + /* since there is no payload, mark the + * header as the last desc. */ + pkt->addr[0].last_desc = 1; + + if (dma_addr == 0) { + /* + * the header is not dma mapped yet. + * it should be from kmalloc. + */ + dma_addr = dma_map_single(&dd->pcidev->dev, + pbc, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + dma_addr)) { + ret = -ENOMEM; + goto free_pkt; + } + pkt->addr[0].addr = dma_addr; + pkt->addr[0].dma_mapped = 1; + } } counter++; npkts++; + pkt->pq = pq; + pkt->index = 0; /* reset index for push on hw */ + *ndesc += pkt->naddr; list_add_tail(&pkt->list, list); } + *maxpkts = npkts; ret = idx; goto done; -free_pbc_dma: - if (dma_mapped) - dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); +free_pkt: + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); free_pbc: - if (page) { - kunmap(page); - __free_page(page); - } else + if (dma_addr) dma_pool_free(pq->header_cache, pbc, dma_addr); -free_pkt: - kmem_cache_free(pq->pkt_slab, pkt); + else + kfree(pbc); free_list: qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); done: @@ -569,10 +1052,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, struct list_head free_list; struct qib_user_sdma_pkt *pkt; struct qib_user_sdma_pkt *pkt_prev; + unsigned long flags; int ret = 0; + if (!pq->num_sending) + return 0; + INIT_LIST_HEAD(&free_list); + /* + * We need this spin lock here because interrupt handler + * might modify this list in qib_user_sdma_send_desc(), also + * we can not get interrupted, otherwise it is a deadlock. + */ + spin_lock_irqsave(&pq->sent_lock, flags); list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { s64 descd = ppd->sdma_descq_removed - pkt->added; @@ -583,7 +1076,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, /* one more packet cleaned */ ret++; + pq->num_sending--; } + spin_unlock_irqrestore(&pq->sent_lock, flags); if (!list_empty(&free_list)) { u32 counter; @@ -604,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) if (!pq) return; - kmem_cache_destroy(pq->pkt_slab); + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); kfree(pq); } @@ -627,6 +1127,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq) { struct qib_devdata *dd = ppd->dd; + unsigned long flags; int i; if (!pq) @@ -634,7 +1135,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { mutex_lock(&pq->lock); - if (list_empty(&pq->sent)) { + if (!pq->num_pending && !pq->num_sending) { mutex_unlock(&pq->lock); break; } @@ -644,29 +1145,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, msleep(10); } - if (!list_empty(&pq->sent)) { + if (pq->num_pending || pq->num_sending) { + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; struct list_head free_list; + mutex_lock(&pq->lock); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Since we hold sdma_lock, it is safe without sent_lock. + */ + if (pq->num_pending) { + list_for_each_entry_safe(pkt, pkt_prev, + &ppd->sdma_userpending, list) { + if (pkt->pq == pq) { + list_move_tail(&pkt->list, &pq->sent); + pq->num_pending--; + pq->num_sending++; + } + } + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); INIT_LIST_HEAD(&free_list); - mutex_lock(&pq->lock); list_splice_init(&pq->sent, &free_list); + pq->num_sending = 0; qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); mutex_unlock(&pq->lock); } } -static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd, +static inline __le64 qib_sdma_make_desc0(u8 gen, u64 addr, u64 dwlen, u64 dwoffset) { - u8 tmpgen; - - tmpgen = ppd->sdma_generation; - return cpu_to_le64(/* SDmaPhyAddr[31:0] */ ((addr & 0xfffffffcULL) << 32) | /* SDmaGeneration[1:0] */ - ((tmpgen & 3ULL) << 30) | + ((gen & 3ULL) << 30) | /* SDmaDwordCount[10:0] */ ((dwlen & 0x7ffULL) << 16) | /* SDmaBufOffset[12:2] */ @@ -692,7 +1208,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr) static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, struct qib_user_sdma_pkt *pkt, int idx, - unsigned ofs, u16 tail) + unsigned ofs, u16 tail, u8 gen) { const u64 addr = (u64) pkt->addr[idx].addr + (u64) pkt->addr[idx].offset; @@ -702,105 +1218,159 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, descqp = &ppd->sdma_descq[tail].qw[0]; - descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs); - if (idx == 0) + descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs); + if (pkt->addr[idx].first_desc) descq0 = qib_sdma_make_first_desc0(descq0); - if (idx == pkt->naddr - 1) + if (pkt->addr[idx].last_desc) { descq0 = qib_sdma_make_last_desc0(descq0); + if (ppd->sdma_intrequest) { + descq0 |= cpu_to_le64(1ULL << 15); + ppd->sdma_intrequest = 0; + } + } descqp[0] = descq0; descqp[1] = qib_sdma_make_desc1(addr); } -/* pq->lock must be held, get packets on the wire... */ -static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, - struct qib_user_sdma_queue *pq, - struct list_head *pktlist) +void qib_user_sdma_send_desc(struct qib_pportdata *ppd, + struct list_head *pktlist) { struct qib_devdata *dd = ppd->dd; - int ret = 0; - unsigned long flags; - u16 tail; - u8 generation; - u64 descq_added; - - if (list_empty(pktlist)) - return 0; - - if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) - return -ECOMM; - - spin_lock_irqsave(&ppd->sdma_lock, flags); + u16 nfree, nsent; + u16 tail, tail_c; + u8 gen, gen_c; - /* keep a copy for restoring purposes in case of problems */ - generation = ppd->sdma_generation; - descq_added = ppd->sdma_descq_added; - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; - } + nfree = qib_sdma_descq_freecnt(ppd); + if (!nfree) + return; - tail = ppd->sdma_descq_tail; +retry: + nsent = 0; + tail_c = tail = ppd->sdma_descq_tail; + gen_c = gen = ppd->sdma_generation; while (!list_empty(pktlist)) { struct qib_user_sdma_pkt *pkt = list_entry(pktlist->next, struct qib_user_sdma_pkt, list); - int i; + int i, j, c = 0; unsigned ofs = 0; u16 dtail = tail; - if (pkt->naddr > qib_sdma_descq_freecnt(ppd)) - goto unlock_check_tail; - - for (i = 0; i < pkt->naddr; i++) { - qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail); + for (i = pkt->index; i < pkt->naddr && nfree; i++) { + qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen); ofs += pkt->addr[i].length >> 2; if (++tail == ppd->sdma_descq_cnt) { tail = 0; - ++ppd->sdma_generation; + ++gen; + ppd->sdma_intrequest = 1; + } else if (tail == (ppd->sdma_descq_cnt>>1)) { + ppd->sdma_intrequest = 1; } - } - - if ((ofs << 2) > ppd->ibmaxlen) { - ret = -EMSGSIZE; - goto unlock; - } + nfree--; + if (pkt->addr[i].last_desc == 0) + continue; - /* - * If the packet is >= 2KB mtu equivalent, we have to use - * the large buffers, and have to mark each descriptor as - * part of a large buffer packet. - */ - if (ofs > dd->piosize2kmax_dwords) { - for (i = 0; i < pkt->naddr; i++) { - ppd->sdma_descq[dtail].qw[0] |= - cpu_to_le64(1ULL << 14); - if (++dtail == ppd->sdma_descq_cnt) - dtail = 0; + /* + * If the packet is >= 2KB mtu equivalent, we + * have to use the large buffers, and have to + * mark each descriptor as part of a large + * buffer packet. + */ + if (ofs > dd->piosize2kmax_dwords) { + for (j = pkt->index; j <= i; j++) { + ppd->sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == ppd->sdma_descq_cnt) + dtail = 0; + } } + c += i + 1 - pkt->index; + pkt->index = i + 1; /* index for next first */ + tail_c = dtail = tail; + gen_c = gen; + ofs = 0; /* reset for next packet */ } - ppd->sdma_descq_added += pkt->naddr; - pkt->added = ppd->sdma_descq_added; - list_move_tail(&pkt->list, &pq->sent); - ret++; + ppd->sdma_descq_added += c; + nsent += c; + if (pkt->index == pkt->naddr) { + pkt->added = ppd->sdma_descq_added; + pkt->pq->added = pkt->added; + pkt->pq->num_pending--; + spin_lock(&pkt->pq->sent_lock); + pkt->pq->num_sending++; + list_move_tail(&pkt->list, &pkt->pq->sent); + spin_unlock(&pkt->pq->sent_lock); + } + if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt) + break; } -unlock_check_tail: /* advance the tail on the chip if necessary */ - if (ppd->sdma_descq_tail != tail) - dd->f_sdma_update_tail(ppd, tail); + if (ppd->sdma_descq_tail != tail_c) { + ppd->sdma_generation = gen_c; + dd->f_sdma_update_tail(ppd, tail_c); + } + + if (nfree && !list_empty(pktlist)) + goto retry; + + return; +} + +/* pq->lock must be held, get packets on the wire... */ +static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist, int count) +{ + unsigned long flags; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; -unlock: - if (unlikely(ret < 0)) { - ppd->sdma_generation = generation; - ppd->sdma_descq_added = descq_added; + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; } - spin_unlock_irqrestore(&ppd->sdma_lock, flags); - return ret; + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + + pq->num_pending += count; + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); + + return 0; } int qib_user_sdma_writev(struct qib_ctxtdata *rcd, @@ -822,19 +1392,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, if (!qib_sdma_running(ppd)) goto done_unlock; - if (ppd->sdma_descq_added != ppd->sdma_descq_removed) { + /* if I have packets not complete yet */ + if (pq->added > ppd->sdma_descq_removed) qib_user_sdma_hwqueue_clean(ppd); + /* if I have complete packets to be freed */ + if (pq->num_sending) qib_user_sdma_queue_clean(ppd, pq); - } while (dim) { - const int mxp = 8; + int mxp = 1; + int ndesc = 0; - down_write(¤t->mm->mmap_sem); - ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); - up_write(¤t->mm->mmap_sem); - - if (ret <= 0) + ret = qib_user_sdma_queue_pkts(dd, ppd, pq, + iov, dim, &list, &mxp, &ndesc); + if (ret < 0) goto done_unlock; else { dim -= ret; @@ -844,24 +1415,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, /* force packets onto the sdma hw queue... */ if (!list_empty(&list)) { /* - * Lazily clean hw queue. the 4 is a guess of about - * how many sdma descriptors a packet will take (it - * doesn't have to be perfect). + * Lazily clean hw queue. */ - if (qib_sdma_descq_freecnt(ppd) < ret * 4) { + if (qib_sdma_descq_freecnt(ppd) < ndesc) { qib_user_sdma_hwqueue_clean(ppd); - qib_user_sdma_queue_clean(ppd, pq); + if (pq->num_sending) + qib_user_sdma_queue_clean(ppd, pq); } - ret = qib_user_sdma_push_pkts(ppd, pq, &list); + ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp); if (ret < 0) goto done_unlock; else { - npkts += ret; - pq->counter += ret; - - if (!list_empty(&list)) - goto done_unlock; + npkts += mxp; + pq->counter += mxp; } } } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 7b6c3bffa9d..9bcfbd84298 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -183,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -224,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -333,7 +333,8 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) * @qp: the QP to post on * @wr: the work request to send */ -static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr) +static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, + int *scheduled) { struct qib_swqe *wqe; u32 next; @@ -435,11 +436,17 @@ bail_inval_free: while (j) { struct qib_sge *sge = &wqe->sg_list[--j]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } bail_inval: ret = -EINVAL; bail: + if (!ret && !wr->next && + !qib_sdma_empty( + dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { + qib_schedule_send(qp); + *scheduled = 1; + } spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -457,9 +464,10 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct qib_qp *qp = to_iqp(ibqp); int err = 0; + int scheduled = 0; for (; wr; wr = wr->next) { - err = qib_post_one_send(qp, wr); + err = qib_post_one_send(qp, wr, &scheduled); if (err) { *bad_wr = wr; goto bail; @@ -467,7 +475,8 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } /* Try to do the send work in the caller's context. */ - qib_do_send(&qp->s_work); + if (!scheduled) + qib_do_send(&qp->s_work); bail: return err; @@ -636,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) } else goto drop; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - ibp->opstats[opcode & 0x7f].n_bytes += tlen; - ibp->opstats[opcode & 0x7f].n_packets++; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; +#ifdef CONFIG_DEBUG_FS + rcd->opstats->stats[opcode].n_bytes += tlen; + rcd->opstats->stats[opcode].n_packets++; +#endif /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; @@ -651,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; - ibp->n_multicast_rcv++; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* @@ -667,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) &rcd->lookaside_qp->refcount)) wake_up( &rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } + rcd->lookaside_qp = NULL; + } } if (!rcd->lookaside_qp) { qp = qib_lookup_qpn(ibp, qp_num); @@ -678,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) rcd->lookaside_qpn = qp_num; } else qp = rcd->lookaside_qp; - ibp->n_unicast_rcv++; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); } return; @@ -978,7 +989,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); if (tx->mr) { - atomic_dec(&tx->mr->refcount); + qib_put_mr(tx->mr); tx->mr = NULL; } if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { @@ -1336,7 +1347,7 @@ done: } qib_sendbuf_done(dd, pbufn); if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_wqe) { @@ -1845,6 +1856,23 @@ bail: return ret; } +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) +{ + struct ib_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct qib_qp *qp0; + + memset(&attr, 0, sizeof attr); + attr.dlid = dlid; + attr.port_num = ppd_from_ibp(ibp)->port; + rcu_read_lock(); + qp0 = rcu_dereference(ibp->qp0); + if (qp0) + ah = ib_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + /** * qib_destroy_ah - destroy an address handle * @ibah: the AH to destroy @@ -2060,13 +2088,15 @@ int qib_register_ib_device(struct qib_devdata *dd) spin_lock_init(&dev->lk_table.lock); dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct qib_mregion **) + dev->lk_table.table = (struct qib_mregion __rcu **) __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; } - memset(dev->lk_table.table, 0, lk_tab_size); + RCU_INIT_POINTER(dev->dma_mr, NULL); + for (i = 0; i < dev->lk_table.max; i++) + RCU_INIT_POINTER(dev->lk_table.table[i], NULL); INIT_LIST_HEAD(&dev->pending_mmaps); spin_lock_init(&dev->pending_lock); dev->mmap_offset = PAGE_SIZE; @@ -2196,7 +2226,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->dma_ops = &qib_dma_mapping_ops; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), - QIB_IDSTR " %s", init_utsname()->nodename); + "Intel Infiniband HCA %s", init_utsname()->nodename); ret = ib_register_device(ibdev, qib_create_port_files); if (ret) @@ -2206,7 +2236,8 @@ int qib_register_ib_device(struct qib_devdata *dd) if (ret) goto err_agents; - if (qib_verbs_register_sysfs(dd)) + ret = qib_verbs_register_sysfs(dd); + if (ret) goto err_class; goto bail; @@ -2289,3 +2320,17 @@ void qib_unregister_ib_device(struct qib_devdata *dd) get_order(lk_tab_size)); kfree(dev->qp_table); } + +/* + * This must be called with s_lock held. + */ +void qib_schedule_send(struct qib_qp *qp) +{ + if (qib_send_ok(qp)) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + queue_work(ppd->qib_wq, &qp->s_work); + } +} diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 0c19ef0c412..bfc8948fdd3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,6 +41,8 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/kthread.h> +#include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> @@ -148,14 +150,14 @@ struct ib_reth { __be64 vaddr; __be32 rkey; __be32 length; -} __attribute__ ((packed)); +} __packed; struct ib_atomic_eth { __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ __be32 rkey; __be64 swap_data; __be64 compare_data; -} __attribute__ ((packed)); +} __packed; struct qib_other_headers { __be32 bth[3]; @@ -176,7 +178,7 @@ struct qib_other_headers { __be32 aeth; struct ib_atomic_eth atomic_eth; } u; -} __attribute__ ((packed)); +} __packed; /* * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes @@ -193,12 +195,12 @@ struct qib_ib_header { } l; struct qib_other_headers oth; } u; -} __attribute__ ((packed)); +} __packed; struct qib_pio_header { __le32 pbc[2]; struct qib_ib_header hdr; -} __attribute__ ((packed)); +} __packed; /* * There is one struct qib_mcast for each multicast GID. @@ -266,7 +268,8 @@ struct qib_cq_wc { */ struct qib_cq { struct ib_cq ibcq; - struct work_struct comptask; + struct kthread_work comptask; + struct qib_devdata *dd; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; @@ -302,6 +305,9 @@ struct qib_mregion { u32 max_segs; /* number of qib_segs in all the arrays */ u32 mapsz; /* size of the map array */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + struct rcu_head list; atomic_t refcount; struct qib_segarray *map[0]; /* the segments */ }; @@ -367,9 +373,10 @@ struct qib_rwq { struct qib_rq { struct qib_rwq *wq; - spinlock_t lock; /* protect changes in this struct */ u32 size; /* size of RWQE array */ u8 max_sge; + spinlock_t lock /* protect changes in this struct */ + ____cacheline_aligned_in_smp; }; struct qib_srq { @@ -412,31 +419,75 @@ struct qib_ack_entry { */ struct qib_qp { struct ib_qp ibqp; - struct qib_qp *next; /* link list for QPN hash table */ - struct qib_qp *timer_next; /* link list for qib_ib_timer() */ - struct list_head iowait; /* link for wait PIO buf */ - struct list_head rspwait; /* link for waititing to respond */ + /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; - struct qib_ib_header s_hdr; /* next packet header to send */ - atomic_t refcount; - wait_queue_head_t wait; - wait_queue_head_t wait_dma; - struct timer_list s_timer; - struct work_struct s_work; + struct qib_qp __rcu *next; /* link list for QPN hash table */ + struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; + struct qib_ib_header *s_hdr; /* next packet header to send */ + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + + u8 state; /* QP state */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + + struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct qib_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waititing to respond */ + + struct qib_sge_state r_sge; /* current receive data */ + struct qib_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; struct qib_sge_state *s_cur_sge; + u32 s_flags; struct qib_verbs_txreq *s_tx; - struct qib_mregion *s_rdma_mr; + struct qib_swqe *s_wqe; struct qib_sge_state s_sge; /* current send request data */ - struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]; - struct qib_sge_state s_ack_rdma_sge; - struct qib_sge_state s_rdma_read_sge; - struct qib_sge_state r_sge; /* current receive data */ - spinlock_t r_lock; /* used for APM */ - spinlock_t s_lock; + struct qib_mregion *s_rdma_mr; atomic_t s_dma_busy; - u32 s_flags; u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ @@ -447,60 +498,34 @@ struct qib_qp { u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u64 r_wr_id; /* ID for current receive WQE */ - unsigned long r_aflags; - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; - u8 state; /* QP state */ u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_flags; - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - u8 qp_access_flags; - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - u8 s_srate; - u8 s_draining; - u8 s_mig_state; - u8 timeout; /* Timeout for this QP */ - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 port_num; - enum ib_mtu path_mtu; - u32 pmtu; /* decoded from path_mtu */ - u32 remote_qpn; - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - unsigned long timeout_jiffies; /* computed from timeout */ - struct qib_swqe *s_wq; /* send work queue */ - struct qib_swqe *s_wqe; - struct qib_rq r_rq; /* receive work queue */ - struct qib_sge r_sg_list[0]; /* verified SGEs */ + + struct qib_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + struct list_head iowait; /* link for wait PIO buf */ + + struct work_struct s_work; + + wait_queue_head_t wait_dma; + + struct qib_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; }; /* @@ -627,7 +652,7 @@ struct qib_lkey_table { u32 next; /* next unused index (speeds search) */ u32 gen; /* generation count */ u32 max; /* size of the table */ - struct qib_mregion **table; + struct qib_mregion __rcu **table; }; struct qib_opcode_stats { @@ -635,9 +660,20 @@ struct qib_opcode_stats { u64 n_bytes; /* total number of bytes */ }; +struct qib_opcode_stats_perctx { + struct qib_opcode_stats stats[128]; +}; + +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + struct qib_ibport { - struct qib_qp *qp0; - struct qib_qp *qp1; + struct qib_qp __rcu *qp0; + struct qib_qp __rcu *qp1; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct qib_ah *sm_ah; struct qib_ah *smi_ah; @@ -651,10 +687,11 @@ struct qib_ibport { __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ u64 tid; /* TID for traps */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ u64 z_symbol_error_counter; /* starting count for PMA */ u64 z_link_error_recovery_counter; /* starting count for PMA */ u64 z_link_downed_counter; /* starting count for PMA */ @@ -701,15 +738,15 @@ struct qib_ibport { u8 vl_high_limit; u8 sl_to_vl[16]; - struct qib_opcode_stats opstats[128]; }; + struct qib_ibdev { struct ib_device ibdev; struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct qib_mregion *dma_mr; + struct qib_mregion __rcu *dma_mr; /* QP numbers are shared by all IB ports */ struct qib_qpn_table qpn_table; @@ -720,7 +757,7 @@ struct qib_ibdev { struct list_head memwait; /* list for wait kernel memory */ struct list_head txreq_free; struct timer_list mem_timer; - struct qib_qp **qp_table; + struct qib_qp __rcu **qp_table; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ @@ -744,6 +781,10 @@ struct qib_ibdev { spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; +#ifdef CONFIG_DEBUG_FS + /* per HCA debugfs */ + struct dentry *qib_ibdev_dbg; +#endif }; struct qib_verbs_counters { @@ -808,16 +849,10 @@ static inline int qib_send_ok(struct qib_qp *qp) !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); } -extern struct workqueue_struct *qib_cq_wq; - /* * This must be called with s_lock held. */ -static inline void qib_schedule_send(struct qib_qp *qp) -{ - if (qib_send_ok(qp)) - queue_work(ib_wq, &qp->s_work); -} +void qib_schedule_send(struct qib_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) { @@ -890,6 +925,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); void qib_free_qpn_table(struct qib_qpn_table *qpt); +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); + +int qib_qp_iter_next(struct qib_qp_iter *iter); + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); + +#endif + void qib_get_credit(struct qib_qp *qp, u32 aeth); unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); @@ -914,6 +961,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); + void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); @@ -925,9 +974,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp); -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); +void qib_free_lkey(struct qib_mregion *mr); int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_sge *isge, struct ib_sge *sge, int acc); @@ -950,6 +999,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qib_destroy_srq(struct ib_srq *ibsrq); +int qib_cq_init(struct qib_devdata *dd); + +void qib_cq_exit(struct qib_devdata *dd); + void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); @@ -995,6 +1048,29 @@ int qib_unmap_fmr(struct list_head *fmr_list); int qib_dealloc_fmr(struct ib_fmr *ibfmr); +static inline void qib_get_mr(struct qib_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +void mr_rcu_callback(struct rcu_head *list); + +static inline void qib_put_mr(struct qib_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + call_rcu(&mr->list, mr_rcu_callback); +} + +static inline void qib_put_ss(struct qib_sge_state *ss) +{ + while (ss->num_sge) { + qib_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + + void qib_release_mmap_info(struct kref *ref); struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 561b8bca406..1d7281c5a02 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -102,10 +103,10 @@ int qib_enable_wc(struct qib_devdata *dd) u64 atmp; atmp = pioaddr & ~(piolen - 1); if (atmp < addr || (atmp + piolen) > (addr + len)) { - qib_dev_err(dd, "No way to align address/size " - "(%llx/%llx), no WC mtrr\n", - (unsigned long long) atmp, - (unsigned long long) piolen << 1); + qib_dev_err(dd, + "No way to align address/size (%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); ret = -ENODEV; } else { pioaddr = atmp; @@ -120,8 +121,7 @@ int qib_enable_wc(struct qib_devdata *dd) if (cookie < 0) { { qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", + "mtrr_add() WC for PIO bufs failed (%d)\n", cookie); ret = -EINVAL; } |
