aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/qib
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/qib')
-rw-r--r--drivers/infiniband/hw/qib/Kconfig14
-rw-r--r--drivers/infiniband/hw/qib/Makefile1
-rw-r--r--drivers/infiniband/hw/qib/qib.h80
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h32
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c67
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c283
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c52
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c185
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c22
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c562
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c247
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h17
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c191
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c128
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c64
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1025
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c23
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h59
31 files changed, 2559 insertions, 685 deletions
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 8349f9c5064..495be09781b 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -1,7 +1,15 @@
config INFINIBAND_QIB
- tristate "QLogic PCIe HCA support"
+ tristate "Intel PCIe HCA support"
depends on 64BIT
---help---
- This is a low-level driver for QLogic PCIe QLE InfiniBand host
- channel adapters. This driver does not support the QLogic
+ This is a low-level driver for Intel PCIe QLE InfiniBand host
+ channel adapters. This driver does not support the Intel
HyperTransport card (model QHT7140).
+
+config INFINIBAND_QIB_DCA
+ bool "QIB DCA support"
+ depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m)
+ default y
+ ---help---
+ Setting this enables DCA support on some Intel chip sets
+ with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index f12d7bb8b39..57f8103e51f 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
+ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4d11575c201..c00ae093b6f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,7 +1,7 @@
#ifndef _QIB_KERNEL_H
#define _QIB_KERNEL_H
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -51,6 +51,7 @@
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/sched.h>
+#include <linux/kthread.h>
#include "qib_common.h"
#include "qib_verbs.h"
@@ -88,7 +89,6 @@ struct qlogic_ib_stats {
extern struct qlogic_ib_stats qib_stats;
extern const struct pci_error_handlers qib_pci_err_handler;
-extern struct pci_driver qib_driver;
#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
/*
@@ -114,6 +114,11 @@ struct qib_eep_log_mask {
/*
* Below contains all data related to a single context (formerly called port).
*/
+
+#ifdef CONFIG_DEBUG_FS
+struct qib_opcode_stats_perctx;
+#endif
+
struct qib_ctxtdata {
void **rcvegrbuf;
dma_addr_t *rcvegrbuf_phys;
@@ -154,6 +159,8 @@ struct qib_ctxtdata {
*/
/* instead of calculating it */
unsigned ctxt;
+ /* local node of context */
+ int node_id;
/* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
@@ -222,12 +229,15 @@ struct qib_ctxtdata {
u8 redirect_seq_cnt;
/* ctxt rcvhdrq head offset */
u32 head;
- u32 pkt_count;
/* lookaside fields */
struct qib_qp *lookaside_qp;
u32 lookaside_qpn;
/* QPs waiting for context processing */
struct list_head qp_wait_list;
+#ifdef CONFIG_DEBUG_FS
+ /* verbs stats per CTX */
+ struct qib_opcode_stats_perctx *opstats;
+#endif
};
struct qib_sge_state;
@@ -428,9 +438,19 @@ struct qib_verbs_txreq {
#define ACTIVITY_TIMER 5
#define MAX_NAME_SIZE 64
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify;
+#endif
+
struct qib_msix_entry {
struct msix_entry msix;
void *arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int dca;
+ int rcv;
+ struct qib_irq_notify *notifier;
+#endif
char name[MAX_NAME_SIZE];
cpumask_var_t mask;
};
@@ -555,11 +575,13 @@ struct qib_pportdata {
/* read/write using lock */
spinlock_t sdma_lock ____cacheline_aligned_in_smp;
struct list_head sdma_activelist;
+ struct list_head sdma_userpending;
u64 sdma_descq_added;
u64 sdma_descq_removed;
u16 sdma_descq_tail;
u16 sdma_descq_head;
u8 sdma_generation;
+ u8 sdma_intrequest;
struct tasklet_struct sdma_sw_clean_up_task
____cacheline_aligned_in_smp;
@@ -828,6 +850,9 @@ struct qib_devdata {
struct qib_ctxtdata *);
void (*f_writescratch)(struct qib_devdata *, u32);
int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
+#endif
char *boardname; /* human readable board info */
@@ -843,8 +868,10 @@ struct qib_devdata {
/* last buffer for user use */
u32 lastctxt_piobuf;
- /* saturating counter of (non-port-specific) device interrupts */
- u32 int_counter;
+ /* reset value */
+ u64 z_int_counter;
+ /* percpu intcounter */
+ u64 __percpu *int_counter;
/* pio bufs allocated per ctxt */
u32 pbufsctxt;
@@ -1075,6 +1102,10 @@ struct qib_devdata {
u16 psxmitwait_check_rate;
/* high volume overflow errors defered to tasklet */
struct tasklet_struct error_tasklet;
+ /* per device cq worker */
+ struct kthread_worker *worker;
+
+ int assigned_node_id; /* NUMA node closest to HCA */
};
/* hol_state values */
@@ -1154,8 +1185,8 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
int qib_setup_eagerbufs(struct qib_ctxtdata *);
void qib_set_ctxtcnt(struct qib_devdata *);
int qib_create_ctxts(struct qib_devdata *dd);
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32);
-void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
@@ -1298,6 +1329,8 @@ int qib_setup_sdma(struct qib_pportdata *);
void qib_teardown_sdma(struct qib_pportdata *);
void __qib_sdma_intr(struct qib_pportdata *);
void qib_sdma_intr(struct qib_pportdata *);
+void qib_user_sdma_send_desc(struct qib_pportdata *dd,
+ struct list_head *pktlist);
int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
u32, struct qib_verbs_txreq *);
/* ppd->sdma_lock should be locked before calling this. */
@@ -1320,7 +1353,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd)
return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
}
int qib_sdma_running(struct qib_pportdata *);
-
+void dump_sdma_state(struct qib_pportdata *ppd);
void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
@@ -1418,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *);
void qib_nomsix(struct qib_devdata *);
void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
/*
* dma_addr wrappers - all 0's invalid for hw
@@ -1445,6 +1482,7 @@ extern unsigned qib_n_krcv_queues;
extern unsigned qib_sdma_fetch_arb;
extern unsigned qib_compat_ddr_negotiate;
extern int qib_special_trigger;
+extern unsigned qib_numa_aware;
extern struct mutex qib_mutex;
@@ -1474,27 +1512,23 @@ extern struct mutex qib_mutex;
* first to avoid possible serial port delays from printk.
*/
#define qib_early_err(dev, fmt, ...) \
- do { \
- dev_err(dev, fmt, ##__VA_ARGS__); \
- } while (0)
+ dev_err(dev, fmt, ##__VA_ARGS__)
#define qib_dev_err(dd, fmt, ...) \
- do { \
- dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \
- } while (0)
+ dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
+ qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+
+#define qib_dev_warn(dd, fmt, ...) \
+ dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
+ qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
#define qib_dev_porterr(dd, port, fmt, ...) \
- do { \
- dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
- qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
- ##__VA_ARGS__); \
- } while (0)
+ dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
+ qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
+ ##__VA_ARGS__)
#define qib_devinfo(pcidev, fmt, ...) \
- do { \
- dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \
- } while (0)
+ dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
/*
* this is used for formatting hw error messages...
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index d39e0183ff8..5670ace27c6 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@ struct qib_base_info {
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define QIB_USER_SWMINOR 11
+#define QIB_USER_SWMINOR 13
#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
@@ -701,7 +701,37 @@ struct qib_message_header {
__be32 bth[3];
/* fields below this point are in host byte order */
struct qib_header iph;
+ /* fields below are simplified, but should match PSM */
+ /* some are accessed by driver when packet spliting is needed */
__u8 sub_opcode;
+ __u8 flags;
+ __u16 commidx;
+ __u32 ack_seq_num;
+ __u8 flowid;
+ __u8 hdr_dlen;
+ __u16 mqhdr;
+ __u32 uwords[4];
+};
+
+/* sequence number bits for message */
+union qib_seqnum {
+ struct {
+ __u32 seq:11;
+ __u32 gen:8;
+ __u32 flow:5;
+ };
+ struct {
+ __u32 pkt:16;
+ __u32 msg:8;
+ };
+ __u32 val;
+};
+
+/* qib receiving-dma tid-session-member */
+struct qib_tid_session_member {
+ __u16 tid;
+ __u16 offset;
+ __u16 length;
};
/* IB - LRH header consts */
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
index 5246aa486bb..ab4e11cfab1 100644
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -34,8 +35,10 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/kthread.h>
#include "qib_verbs.h"
+#include "qib.h"
/**
* qib_cq_enter - add a new entry to the completion queue
@@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
- cq->notify = IB_CQ_NONE;
- cq->triggered++;
+ struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
- queue_work(qib_cq_wq, &cq->comptask);
+ smp_rmb();
+ worker = cq->dd->worker;
+ if (likely(worker)) {
+ cq->notify = IB_CQ_NONE;
+ cq->triggered++;
+ queue_kthread_work(worker, &cq->comptask);
+ }
}
spin_unlock_irqrestore(&cq->lock, flags);
@@ -163,7 +171,7 @@ bail:
return npolled;
}
-static void send_complete(struct work_struct *work)
+static void send_complete(struct kthread_work *work)
{
struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
@@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
* The number of entries should be >= the number requested or return
* an error.
*/
+ cq->dd = dd_from_dev(dev);
cq->ibcq.cqe = entries;
cq->notify = IB_CQ_NONE;
cq->triggered = 0;
spin_lock_init(&cq->lock);
- INIT_WORK(&cq->comptask, send_complete);
+ init_kthread_work(&cq->comptask, send_complete);
wc->head = 0;
wc->tail = 0;
cq->queue = wc;
@@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq)
struct qib_ibdev *dev = to_idev(ibcq->device);
struct qib_cq *cq = to_icq(ibcq);
- flush_work(&cq->comptask);
+ flush_kthread_work(&cq->comptask);
spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
spin_unlock(&dev->n_cqs_lock);
@@ -483,3 +492,49 @@ bail_free:
bail:
return ret;
}
+
+int qib_cq_init(struct qib_devdata *dd)
+{
+ int ret = 0;
+ int cpu;
+ struct task_struct *task;
+
+ if (dd->worker)
+ return 0;
+ dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
+ if (!dd->worker)
+ return -ENOMEM;
+ init_kthread_worker(dd->worker);
+ task = kthread_create_on_node(
+ kthread_worker_fn,
+ dd->worker,
+ dd->assigned_node_id,
+ "qib_cq%d", dd->unit);
+ if (IS_ERR(task))
+ goto task_fail;
+ cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+ kthread_bind(task, cpu);
+ wake_up_process(task);
+out:
+ return ret;
+task_fail:
+ ret = PTR_ERR(task);
+ kfree(dd->worker);
+ dd->worker = NULL;
+ goto out;
+}
+
+void qib_cq_exit(struct qib_devdata *dd)
+{
+ struct kthread_worker *worker;
+
+ worker = dd->worker;
+ if (!worker)
+ return;
+ /* blocks future queuing from send_complete() */
+ dd->worker = NULL;
+ smp_wmb();
+ flush_kthread_worker(worker);
+ kthread_stop(worker->task);
+ kfree(worker);
+}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
new file mode 100644
index 00000000000..799a0c3bffc
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -0,0 +1,283 @@
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+
+#include "qib.h"
+#include "qib_verbs.h"
+#include "qib_debugfs.h"
+
+static struct dentry *qib_dbg_root;
+
+#define DEBUGFS_FILE(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+ .start = _##name##_seq_start, \
+ .next = _##name##_seq_next, \
+ .stop = _##name##_seq_stop, \
+ .show = _##name##_seq_show \
+}; \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+ struct seq_file *seq; \
+ int ret; \
+ ret = seq_open(s, &_##name##_seq_ops); \
+ if (ret) \
+ return ret; \
+ seq = s->private_data; \
+ seq->private = inode->i_private; \
+ return 0; \
+} \
+static const struct file_operations _##name##_file_ops = { \
+ .owner = THIS_MODULE, \
+ .open = _##name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = seq_release \
+};
+
+#define DEBUGFS_FILE_CREATE(name) \
+do { \
+ struct dentry *ent; \
+ ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \
+ ibd, &_##name##_file_ops); \
+ if (!ent) \
+ pr_warn("create of " #name " failed\n"); \
+} while (0)
+
+static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct qib_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+
+static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _opcode_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_user_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu\n", i,
+ (unsigned long long) n_packets,
+ (unsigned long long) n_bytes);
+
+ return 0;
+}
+
+DEBUGFS_FILE(opcode_stats)
+
+static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (!*pos)
+ return SEQ_START_TOKEN;
+ if (*pos >= dd->first_user_ctxt)
+ return NULL;
+ return pos;
+}
+
+static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (v == SEQ_START_TOKEN)
+ return pos;
+
+ ++*pos;
+ if (*pos >= dd->first_user_ctxt)
+ return NULL;
+ return pos;
+}
+
+static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _ctx_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos;
+ loff_t i, j;
+ u64 n_packets = 0;
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(s, "Ctx:npkts\n");
+ return 0;
+ }
+
+ spos = v;
+ i = *spos;
+
+ if (!dd->rcd[i])
+ return SEQ_SKIP;
+
+ for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
+ n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
+
+ if (!n_packets)
+ return SEQ_SKIP;
+
+ seq_printf(s, " %llu:%llu\n", i, n_packets);
+ return 0;
+}
+
+DEBUGFS_FILE(ctx_stats)
+
+static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_qp_iter *iter;
+ loff_t n = *pos;
+
+ iter = qib_qp_iter_init(s->private);
+ if (!iter)
+ return NULL;
+
+ while (n--) {
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ }
+
+ return iter;
+}
+
+static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ loff_t *pos)
+{
+ struct qib_qp_iter *iter = iter_ptr;
+
+ (*pos)++;
+
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+{
+ /* nothing for now */
+}
+
+static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
+{
+ struct qib_qp_iter *iter = iter_ptr;
+
+ if (!iter)
+ return 0;
+
+ qib_qp_iter_print(s, iter);
+
+ return 0;
+}
+
+DEBUGFS_FILE(qp_stats)
+
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd)
+{
+ char name[10];
+
+ snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit);
+ ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root);
+ if (!ibd->qib_ibdev_dbg) {
+ pr_warn("create of %s failed\n", name);
+ return;
+ }
+ DEBUGFS_FILE_CREATE(opcode_stats);
+ DEBUGFS_FILE_CREATE(ctx_stats);
+ DEBUGFS_FILE_CREATE(qp_stats);
+ return;
+}
+
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd)
+{
+ if (!qib_dbg_root)
+ goto out;
+ debugfs_remove_recursive(ibd->qib_ibdev_dbg);
+out:
+ ibd->qib_ibdev_dbg = NULL;
+}
+
+void qib_dbg_init(void)
+{
+ qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL);
+ if (!qib_dbg_root)
+ pr_warn("init of debugfs failed\n");
+}
+
+void qib_dbg_exit(void)
+{
+ debugfs_remove_recursive(qib_dbg_root);
+ qib_dbg_root = NULL;
+}
+
+#endif
+
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h
new file mode 100644
index 00000000000..7ae983a91b8
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.h
@@ -0,0 +1,45 @@
+#ifndef _QIB_DEBUGFS_H
+#define _QIB_DEBUGFS_H
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+struct qib_ibdev;
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
+void qib_dbg_init(void);
+void qib_dbg_exit(void);
+
+#endif
+
+#endif /* _QIB_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 1686fd4bda8..5dfda4c5cc9 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct qib_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct qib_devdata *dd;
@@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp,
}
ppd = &dd->pport[dp.port - 1];
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
- clen = dp.len >> 2;
-
- if ((plen + 4) > ppd->ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
qib_devinfo(dd->pcidev,
@@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp,
*/
if (dd->flags & QIB_PIO_FLUSH_WC) {
qib_flush_wc();
- qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
qib_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- qib_pio_copy(piobuf + 2, tmpbuf, clen);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen);
if (dd->flags & QIB_USE_SPCL_TRIG) {
u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
@@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd,
const struct diag_observer *op)
{
struct diag_observer_list_elt *olp;
- int ret = -EINVAL;
+ unsigned long flags;
if (!dd || !op)
- goto bail;
- ret = -ENOMEM;
+ return -EINVAL;
olp = vmalloc(sizeof *olp);
if (!olp) {
pr_err("vmalloc for observer failed\n");
- goto bail;
+ return -ENOMEM;
}
- if (olp) {
- unsigned long flags;
- spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
- olp->op = op;
- olp->next = dd->diag_observer_list;
- dd->diag_observer_list = olp;
- spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
- ret = 0;
- }
-bail:
- return ret;
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp->op = op;
+ olp->next = dd->diag_observer_list;
+ dd->diag_observer_list = olp;
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+ return 0;
}
/* Remove all registered observers when device is closed */
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
index 2920bb39a65..59fe092b4b0 100644
--- a/drivers/infiniband/hw/qib/qib_dma.c
+++ b/drivers/infiniband/hw/qib/qib_dma.c
@@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int qib_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size, enum dma_data_direction dir)
{
@@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = {
.unmap_page = qib_dma_unmap_page,
.map_sg = qib_map_sg,
.unmap_sg = qib_unmap_sg,
- .dma_address = qib_sg_dma_address,
- .dma_len = qib_sg_dma_len,
.sync_single_for_cpu = qib_sync_single_for_cpu,
.sync_single_for_device = qib_sync_single_for_device,
.alloc_coherent = qib_dma_alloc_coherent,
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 5423edcab51..5bee08f16d7 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -63,8 +64,8 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
"Attempt pre-IBTA 1.2 DDR speed negotiation");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic IB driver");
+MODULE_AUTHOR("Intel <ibsupport@intel.com>");
+MODULE_DESCRIPTION("Intel IB driver");
MODULE_VERSION(QIB_DRIVER_VERSION);
/*
@@ -557,7 +558,6 @@ move_along:
}
rcd->head = l;
- rcd->pkt_count += i;
/*
* Iterate over all QPs waiting to respond.
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 4f7aa301b3b..b15e34eeef6 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -39,7 +39,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/uio.h>
+#include <linux/aio.h>
#include <linux/jiffies.h>
#include <asm/pgtable.h>
#include <linux/delay.h>
@@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
return pollflag;
}
+static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
+{
+ struct qib_filedata *fd = fp->private_data;
+ const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+ const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
+ int local_cpu;
+
+ /*
+ * If process has NOT already set it's affinity, select and
+ * reserve a processor for it on the local NUMA node.
+ */
+ if ((weight >= qib_cpulist_count) &&
+ (cpumask_weight(local_mask) <= qib_cpulist_count)) {
+ for_each_cpu(local_cpu, local_mask)
+ if (!test_and_set_bit(local_cpu, qib_cpulist)) {
+ fd->rec_cpu_num = local_cpu;
+ return;
+ }
+ }
+
+ /*
+ * If process has NOT already set it's affinity, select and
+ * reserve a processor for it, as a rendevous for all
+ * users of the driver. If they don't actually later
+ * set affinity to this cpu, or set it to some other cpu,
+ * it just means that sooner or later we don't recommend
+ * a cpu, and let the scheduler do it's best.
+ */
+ if (weight >= qib_cpulist_count) {
+ int cpu;
+ cpu = find_first_zero_bit(qib_cpulist,
+ qib_cpulist_count);
+ if (cpu == qib_cpulist_count)
+ qib_dev_err(dd,
+ "no cpus avail for affinity PID %u\n",
+ current->pid);
+ else {
+ __set_bit(cpu, qib_cpulist);
+ fd->rec_cpu_num = cpu;
+ }
+ }
+}
+
/*
* Check that userland and driver are compatible for subcontexts.
*/
@@ -1177,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor)
return user_swminor == 3;
default:
/* >= 4 are compatible (or are expected to be) */
- return user_swminor >= 4;
+ return user_swminor <= QIB_USER_SWMINOR;
}
}
/* make no promises yet for future major versions */
@@ -1259,12 +1302,20 @@ bail:
static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
struct file *fp, const struct qib_user_info *uinfo)
{
+ struct qib_filedata *fd = fp->private_data;
struct qib_devdata *dd = ppd->dd;
struct qib_ctxtdata *rcd;
void *ptmp = NULL;
int ret;
+ int numa_id;
+
+ assign_ctxt_affinity(fp, dd);
+
+ numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
+ cpu_to_node(fd->rec_cpu_num) :
+ numa_node_id()) : dd->assigned_node_id;
- rcd = qib_create_ctxtdata(ppd, ctxt);
+ rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
/*
* Allocate memory for use in qib_tid_update() at open to
@@ -1296,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
goto bail;
bailerr:
+ if (fd->rec_cpu_num != -1)
+ __clear_bit(fd->rec_cpu_num, qib_cpulist);
+
dd->rcd[ctxt] = NULL;
kfree(rcd);
kfree(ptmp);
@@ -1405,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
cused++;
else
cfree++;
- if (pusable && cfree && cused < inuse) {
+ if (cfree && cused < inuse) {
udd = dd;
inuse = cused;
}
@@ -1485,6 +1539,58 @@ static int qib_open(struct inode *in, struct file *fp)
return fp->private_data ? 0 : -ENOMEM;
}
+static int find_hca(unsigned int cpu, int *unit)
+{
+ int ret = 0, devmax, npresent, nup, ndev;
+
+ *unit = -1;
+
+ devmax = qib_count_units(&npresent, &nup);
+ if (!npresent) {
+ ret = -ENXIO;
+ goto done;
+ }
+ if (!nup) {
+ ret = -ENETDOWN;
+ goto done;
+ }
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct qib_devdata *dd = qib_lookup(ndev);
+ if (dd) {
+ if (pcibus_to_node(dd->pcidev->bus) < 0) {
+ ret = -EINVAL;
+ goto done;
+ }
+ if (cpu_to_node(cpu) ==
+ pcibus_to_node(dd->pcidev->bus)) {
+ *unit = ndev;
+ goto done;
+ }
+ }
+ }
+done:
+ return ret;
+}
+
+static int do_qib_user_sdma_queue_create(struct file *fp)
+{
+ struct qib_filedata *fd = fp->private_data;
+ struct qib_ctxtdata *rcd = fd->rcd;
+ struct qib_devdata *dd = rcd->dd;
+
+ if (dd->flags & QIB_HAS_SEND_DMA) {
+
+ fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
+ dd->unit,
+ rcd->ctxt,
+ fd->subctxt);
+ if (!fd->pq)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
/*
* Get ctxt early, so can set affinity prior to memory allocation.
*/
@@ -1517,61 +1623,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
if (qib_compatible_subctxts(swmajor, swminor) &&
uinfo->spu_subctxt_cnt) {
ret = find_shared_ctxt(fp, uinfo);
- if (ret) {
- if (ret > 0)
- ret = 0;
- goto done_chk_sdma;
+ if (ret > 0) {
+ ret = do_qib_user_sdma_queue_create(fp);
+ if (!ret)
+ assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+ goto done_ok;
}
}
i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
if (i_minor)
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
- else
+ else {
+ int unit;
+ const unsigned int cpu = cpumask_first(&current->cpus_allowed);
+ const unsigned int weight =
+ cpumask_weight(&current->cpus_allowed);
+
+ if (weight == 1 && !test_bit(cpu, qib_cpulist))
+ if (!find_hca(cpu, &unit) && unit >= 0)
+ if (!find_free_ctxt(unit, fp, uinfo)) {
+ ret = 0;
+ goto done_chk_sdma;
+ }
ret = get_a_ctxt(fp, uinfo, alg);
-
-done_chk_sdma:
- if (!ret) {
- struct qib_filedata *fd = fp->private_data;
- const struct qib_ctxtdata *rcd = fd->rcd;
- const struct qib_devdata *dd = rcd->dd;
- unsigned int weight;
-
- if (dd->flags & QIB_HAS_SEND_DMA) {
- fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
- dd->unit,
- rcd->ctxt,
- fd->subctxt);
- if (!fd->pq)
- ret = -ENOMEM;
- }
-
- /*
- * If process has NOT already set it's affinity, select and
- * reserve a processor for it, as a rendezvous for all
- * users of the driver. If they don't actually later
- * set affinity to this cpu, or set it to some other cpu,
- * it just means that sooner or later we don't recommend
- * a cpu, and let the scheduler do it's best.
- */
- weight = cpumask_weight(tsk_cpus_allowed(current));
- if (!ret && weight >= qib_cpulist_count) {
- int cpu;
- cpu = find_first_zero_bit(qib_cpulist,
- qib_cpulist_count);
- if (cpu != qib_cpulist_count) {
- __set_bit(cpu, qib_cpulist);
- fd->rec_cpu_num = cpu;
- }
- } else if (weight == 1 &&
- test_bit(cpumask_first(tsk_cpus_allowed(current)),
- qib_cpulist))
- qib_devinfo(dd->pcidev,
- "%s PID %u affinity set to cpu %d; already allocated\n",
- current->comm, current->pid,
- cpumask_first(tsk_cpus_allowed(current)));
}
+done_chk_sdma:
+ if (!ret)
+ ret = do_qib_user_sdma_queue_create(fp);
+done_ok:
mutex_unlock(&qib_mutex);
done:
@@ -2208,7 +2289,7 @@ int qib_cdev_init(int minor, const char *name,
goto err_cdev;
}
- device = device_create(qib_class, NULL, dev, NULL, name);
+ device = device_create(qib_class, NULL, dev, NULL, "%s", name);
if (!IS_ERR(device))
goto done;
ret = PTR_ERR(device);
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f247fc6e618..cab610ccd50 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode,
static ssize_t driver_stats_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ qib_stats.sps_ints = qib_sps_ints();
return simple_read_from_buffer(buf, count, ppos, &qib_stats,
sizeof qib_stats);
}
@@ -456,13 +457,13 @@ static int remove_file(struct dentry *parent, char *name)
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
simple_unlink(parent->d_inode, tmp);
} else {
spin_unlock(&tmp->d_lock);
}
+ dput(tmp);
ret = 0;
bail:
@@ -491,6 +492,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
+ mutex_lock(&dir->d_inode->i_mutex);
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -505,8 +507,10 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- d_delete(dir);
+ mutex_unlock(&dir->d_inode->i_mutex);
ret = simple_rmdir(root->d_inode, dir);
+ d_delete(dir);
+ dput(dir);
bail:
mutex_unlock(&root->d_inode->i_mutex);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a099ac171e2..d68266ac761 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
* All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64);
/*
* This file contains all the chip-specific register information and
- * access functions for the QLogic QLogic_IB PCI-Express chip.
+ * access functions for the Intel Intel_IB PCI-Express chip.
*
*/
@@ -1633,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
@@ -1807,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
* isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler re-ordering around actual reset */
@@ -3265,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR;
ppd->link_width_enabled = IB_WIDTH_4X;
@@ -3463,6 +3465,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum)
return -ENXIO;
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ return 0;
+}
+#endif
+
/* Dummy function, as 6120 boards never disable EEPROM Write */
static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen)
{
@@ -3538,6 +3547,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
dd->f_xgxs_reset = qib_6120_xgxs_reset;
dd->f_writescratch = writescratch;
dd->f_tempsense_rd = qib_6120_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_6120_notify_dca;
+#endif
/*
* Do remaining pcie setup and save pcie values in dd.
* Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 64d0ecb90cd..7dec89fdc12 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
-
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
unlikely_7220_intr(dd, istat);
@@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
* isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler reordering around actual reset */
@@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
init_waitqueue_head(&cpspec->autoneg_wait);
INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
@@ -4513,6 +4513,13 @@ bail:
return ret;
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ return 0;
+}
+#endif
+
/* Dummy function, as 7220 boards never disable EEPROM Write */
static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
{
@@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
dd->f_xgxs_reset = qib_7220_xgxs_reset;
dd->f_writescratch = writescratch;
dd->f_tempsense_rd = qib_7220_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_7220_notify_dca;
+#endif
/*
* Do remaining pcie setup and save pcie values in dd.
* Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 3f6b21e9dc1..a7eb32517a0 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -44,6 +44,9 @@
#include <linux/module.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
#include "qib.h"
#include "qib_7322_regs.h"
@@ -80,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
static void serdes_7322_los_enable(struct qib_pportdata *, int);
static int serdes_7322_init_old(struct qib_pportdata *);
static int serdes_7322_init_new(struct qib_pportdata *);
+static void dump_sdma_7322_state(struct qib_pportdata *);
#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
@@ -519,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = {
[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
};
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify {
+ int rcv;
+ void *arg;
+ struct irq_affinity_notify notify;
+};
+#endif
+
struct qib_chip_specific {
u64 __iomem *cregbase;
u64 *cntrs;
@@ -546,6 +558,12 @@ struct qib_chip_specific {
u32 lastbuf_for_pio;
u32 stay_in_freeze;
u32 recovery_ports_initted;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ u32 dca_ctrl;
+ int rhdr_cpu[18];
+ int sdma_cpu[2];
+ u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */
+#endif
struct qib_msix_entry *msix_entries;
unsigned long *sendchkenable;
unsigned long *sendgrhchk;
@@ -573,7 +591,7 @@ struct vendor_txdds_ent {
static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
-#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */
+#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */
#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */
#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
@@ -635,6 +653,7 @@ struct qib_chippport_specific {
u8 ibmalfusesnap;
struct qib_qsfp_data qsfp_data;
char epmsgbuf[192]; /* for port error interrupt msg buffer */
+ char sdmamsgbuf[192]; /* for per-port sdma error messages */
};
static struct {
@@ -642,27 +661,75 @@ static struct {
irq_handler_t handler;
int lsb;
int port; /* 0 if not port-specific, else port # */
+ int dca;
} irq_table[] = {
- { "", qib_7322intr, -1, 0 },
+ { "", qib_7322intr, -1, 0, 0 },
{ " (buf avail)", qib_7322bufavail,
- SYM_LSB(IntStatus, SendBufAvail), 0 },
+ SYM_LSB(IntStatus, SendBufAvail), 0, 0},
{ " (sdma 0)", sdma_intr,
- SYM_LSB(IntStatus, SDmaInt_0), 1 },
+ SYM_LSB(IntStatus, SDmaInt_0), 1, 1 },
{ " (sdma 1)", sdma_intr,
- SYM_LSB(IntStatus, SDmaInt_1), 2 },
+ SYM_LSB(IntStatus, SDmaInt_1), 2, 1 },
{ " (sdmaI 0)", sdma_idle_intr,
- SYM_LSB(IntStatus, SDmaIdleInt_0), 1 },
+ SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1},
{ " (sdmaI 1)", sdma_idle_intr,
- SYM_LSB(IntStatus, SDmaIdleInt_1), 2 },
+ SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1},
{ " (sdmaP 0)", sdma_progress_intr,
- SYM_LSB(IntStatus, SDmaProgressInt_0), 1 },
+ SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 },
{ " (sdmaP 1)", sdma_progress_intr,
- SYM_LSB(IntStatus, SDmaProgressInt_1), 2 },
+ SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 },
{ " (sdmaC 0)", sdma_cleanup_intr,
- SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 },
+ SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 },
{ " (sdmaC 1)", sdma_cleanup_intr,
- SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 },
+ SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0},
+};
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static const struct dca_reg_map {
+ int shadow_inx;
+ int lsb;
+ u64 mask;
+ u16 regno;
+} dca_rcvhdr_reg_map[] = {
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH),
+ ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) },
+ { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH),
+ ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) },
};
+#endif
/* ibcctrl bits */
#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
@@ -686,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *);
static void setup_7322_link_recovery(struct qib_pportdata *, u32);
static void check_7322_rxe_status(struct qib_pportdata *);
static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static void qib_setup_dca(struct qib_devdata *dd);
+static void setup_dca_notifier(struct qib_devdata *dd,
+ struct qib_msix_entry *m);
+static void reset_dca_notifier(struct qib_devdata *dd,
+ struct qib_msix_entry *m);
+#endif
/**
* qib_read_ureg32 - read 32-bit virtualized per-context register
@@ -1522,6 +1596,8 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
struct qib_devdata *dd = ppd->dd;
errs &= QIB_E_P_SDMAERRS;
+ err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf),
+ errs, qib_7322p_error_msgs);
if (errs & QIB_E_P_SDMAUNEXPDATA)
qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit,
@@ -1529,6 +1605,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (errs != QIB_E_P_SDMAHALT) {
+ /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
+ qib_dev_porterr(dd, ppd->port,
+ "SDMA %s 0x%016llx %s\n",
+ qib_sdma_state_names[ppd->sdma_state.current_state],
+ errs, ppd->cpspec->sdmamsgbuf);
+ dump_sdma_7322_state(ppd);
+ }
+
switch (ppd->sdma_state.current_state) {
case qib_sdma_state_s00_hw_down:
break;
@@ -2084,6 +2169,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_dev_err(dd, "%s hardware error\n", msg);
+ if (hwerrs &
+ (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
+ int pidx = 0;
+ int err;
+ unsigned long flags;
+ struct qib_pportdata *ppd = dd->pport;
+ for (; pidx < dd->num_pports; ++pidx, ppd++) {
+ err = 0;
+ if (pidx == 0 && (hwerrs &
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
+ err++;
+ if (pidx == 1 && (hwerrs &
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
+ err++;
+ if (err) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ dump_sdma_7322_state(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+ }
+
if (isfatal && !dd->diag_client) {
qib_dev_err(dd,
"Fatal Hardware Error, no longer usable, SN %.16s\n",
@@ -2287,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
qib_write_kreg(dd, kr_scratch, 0ULL);
+ /* ensure previous Tx parameters are not still forced */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
if (qib_compat_ddr_negotiate) {
ppd->cpspec->ibdeltainprog = 1;
ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
@@ -2558,6 +2671,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on)
qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink);
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ switch (event) {
+ case DCA_PROVIDER_ADD:
+ if (dd->flags & QIB_DCA_ENABLED)
+ break;
+ if (!dca_add_requester(&dd->pcidev->dev)) {
+ qib_devinfo(dd->pcidev, "DCA enabled\n");
+ dd->flags |= QIB_DCA_ENABLED;
+ qib_setup_dca(dd);
+ }
+ break;
+ case DCA_PROVIDER_REMOVE:
+ if (dd->flags & QIB_DCA_ENABLED) {
+ dca_remove_requester(&dd->pcidev->dev);
+ dd->flags &= ~QIB_DCA_ENABLED;
+ dd->cspec->dca_ctrl = 0;
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA),
+ dd->cspec->dca_ctrl);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu)
+{
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_chip_specific *cspec = dd->cspec;
+
+ if (!(dd->flags & QIB_DCA_ENABLED))
+ return;
+ if (cspec->rhdr_cpu[rcd->ctxt] != cpu) {
+ const struct dca_reg_map *rmp;
+
+ cspec->rhdr_cpu[rcd->ctxt] = cpu;
+ rmp = &dca_rcvhdr_reg_map[rcd->ctxt];
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask;
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |=
+ (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb;
+ qib_devinfo(dd->pcidev,
+ "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu,
+ (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+ qib_write_kreg(dd, rmp->regno,
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+ cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+ }
+}
+
+static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_chip_specific *cspec = dd->cspec;
+ unsigned pidx = ppd->port - 1;
+
+ if (!(dd->flags & QIB_DCA_ENABLED))
+ return;
+ if (cspec->sdma_cpu[pidx] != cpu) {
+ cspec->sdma_cpu[pidx] = cpu;
+ cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ?
+ SYM_MASK(DCACtrlF, SendDma1DCAOPH) :
+ SYM_MASK(DCACtrlF, SendDma0DCAOPH));
+ cspec->dca_rcvhdr_ctrl[4] |=
+ (u64) dca3_get_tag(&dd->pcidev->dev, cpu) <<
+ (ppd->hw_pidx ?
+ SYM_LSB(DCACtrlF, SendDma1DCAOPH) :
+ SYM_LSB(DCACtrlF, SendDma0DCAOPH));
+ qib_devinfo(dd->pcidev,
+ "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu,
+ (long long) cspec->dca_rcvhdr_ctrl[4]);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlF),
+ cspec->dca_rcvhdr_ctrl[4]);
+ cspec->dca_ctrl |= ppd->hw_pidx ?
+ SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) :
+ SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+ }
+}
+
+static void qib_setup_dca(struct qib_devdata *dd)
+{
+ struct qib_chip_specific *cspec = dd->cspec;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++)
+ cspec->rhdr_cpu[i] = -1;
+ for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+ cspec->sdma_cpu[i] = -1;
+ cspec->dca_rcvhdr_ctrl[0] =
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[1] =
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[2] =
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[3] =
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[4] =
+ (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt));
+ for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+ qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i,
+ cspec->dca_rcvhdr_ctrl[i]);
+ for (i = 0; i < cspec->num_msix_entries; i++)
+ setup_dca_notifier(dd, &cspec->msix_entries[i]);
+}
+
+static void qib_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct qib_irq_notify *n =
+ container_of(notify, struct qib_irq_notify, notify);
+ int cpu = cpumask_first(mask);
+
+ if (n->rcv) {
+ struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+ qib_update_rhdrq_dca(rcd, cpu);
+ } else {
+ struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+ qib_update_sdma_dca(ppd, cpu);
+ }
+}
+
+static void qib_irq_notifier_release(struct kref *ref)
+{
+ struct qib_irq_notify *n =
+ container_of(ref, struct qib_irq_notify, notify.kref);
+ struct qib_devdata *dd;
+
+ if (n->rcv) {
+ struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+ dd = rcd->dd;
+ } else {
+ struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+ dd = ppd->dd;
+ }
+ qib_devinfo(dd->pcidev,
+ "release on HCA notify 0x%p n 0x%p\n", ref, n);
+ kfree(n);
+}
+#endif
+
/*
* Disable MSIx interrupt if enabled, call generic MSIx code
* to cleanup, and clear pending MSIx interrupts.
@@ -2575,6 +2844,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
dd->cspec->num_msix_entries = 0;
for (i = 0; i < n; i++) {
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
+#endif
irq_set_affinity_hint(
dd->cspec->msix_entries[i].msix.vector, NULL);
free_cpumask_var(dd->cspec->msix_entries[i].mask);
@@ -2602,6 +2874,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
{
int i;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ if (dd->flags & QIB_DCA_ENABLED) {
+ dca_remove_requester(&dd->pcidev->dev);
+ dd->flags &= ~QIB_DCA_ENABLED;
+ dd->cspec->dca_ctrl = 0;
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl);
+ }
+#endif
+
qib_7322_free_irq(dd);
kfree(dd->cspec->cntrs);
kfree(dd->cspec->sendchkenable);
@@ -2834,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* handle "errors" of various kinds first, device ahead of port */
if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
@@ -2905,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
@@ -2934,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
@@ -2967,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -2996,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3025,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3055,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3068,6 +3335,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
return IRQ_HANDLED;
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+ if (!m->dca)
+ return;
+ qib_devinfo(dd->pcidev,
+ "Disabling notifier on HCA %d irq %d\n",
+ dd->unit,
+ m->msix.vector);
+ irq_set_affinity_notifier(
+ m->msix.vector,
+ NULL);
+ m->notifier = NULL;
+}
+
+static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+ struct qib_irq_notify *n;
+
+ if (!m->dca)
+ return;
+ n = kzalloc(sizeof(*n), GFP_KERNEL);
+ if (n) {
+ int ret;
+
+ m->notifier = n;
+ n->notify.irq = m->msix.vector;
+ n->notify.notify = qib_irq_notifier_notify;
+ n->notify.release = qib_irq_notifier_release;
+ n->arg = m->arg;
+ n->rcv = m->rcv;
+ qib_devinfo(dd->pcidev,
+ "set notifier irq %d rcv %d notify %p\n",
+ n->notify.irq, n->rcv, &n->notify);
+ ret = irq_set_affinity_notifier(
+ n->notify.irq,
+ &n->notify);
+ if (ret) {
+ m->notifier = NULL;
+ kfree(n);
+ }
+ }
+}
+
+#endif
+
/*
* Set up our chip-specific interrupt handler.
* The interrupt type has already been setup, so
@@ -3149,6 +3463,9 @@ try_intx:
void *arg;
u64 val;
int lsb, reg, sh;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int dca = 0;
+#endif
dd->cspec->msix_entries[msixnum].
name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
@@ -3161,6 +3478,9 @@ try_intx:
arg = dd->pport + irq_table[i].port - 1;
} else
arg = dd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca = irq_table[i].dca;
+#endif
lsb = irq_table[i].lsb;
handler = irq_table[i].handler;
snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3178,6 +3498,9 @@ try_intx:
continue;
if (qib_krcvq01_no_msi && ctxt < 2)
continue;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca = 1;
+#endif
lsb = QIB_I_RCVAVAIL_LSB + ctxt;
handler = qib_7322pintr;
snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3203,6 +3526,11 @@ try_intx:
goto try_intx;
}
dd->cspec->msix_entries[msixnum].arg = arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->cspec->msix_entries[msixnum].dca = dca;
+ dd->cspec->msix_entries[msixnum].rcv =
+ handler == qib_7322pintr;
+#endif
if (lsb >= 0) {
reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -3381,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
dd->pport->cpspec->ibsymdelta = 0;
dd->pport->cpspec->iblnkerrdelta = 0;
dd->pport->cpspec->ibmalfdelta = 0;
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
/*
* Keep chip from being accessed until we are ready. Use
@@ -5853,21 +6182,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
- int ret;
-
+ char *n;
if (strlen(str) >= MAX_ATTEN_LEN) {
pr_info("txselect_values string too long\n");
return -ENOSPC;
}
- ret = kstrtoul(str, 0, &val);
- if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ val = simple_strtoul(str, &n, 0);
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
TXDDS_MFG_SZ)) {
pr_info("txselect_values must start with a number < %d\n",
TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
- return ret ? ret : -EINVAL;
+ return -EINVAL;
}
-
strcpy(txselect_list, str);
+
list_for_each_entry(dd, &qib_dev_list, list)
if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
set_no_qsfp_atten(dd, 1);
@@ -6216,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
}
dd->num_pports++;
- qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ if (ret) {
+ dd->num_pports--;
+ goto bail;
+ }
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_width_enabled = IB_WIDTH_4X;
@@ -6452,6 +6784,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
}
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+static void dump_sdma_7322_state(struct qib_pportdata *ppd)
+{
+ u64 reg, reg1, reg2;
+
+ reg = qib_read_kreg_port(ppd, krp_senddmastatus);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmastatus: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_sendctrl);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sendctrl: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmabase);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmabase: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+
+ /* get bufuse bits, clear them, and print them again if non-zero */
+ reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
+ /* 0 and 1 should always be zero, so print as short form */
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+ reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+ /* 0 and 1 should always be zero, so print as short form */
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmatail);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmatail: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmahead);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmahead: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmaheadaddr: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmalengen);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmalengen: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmadesccnt: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmaidlecnt: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmapriorityhld: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmareloadcnt: 0x%016llx\n", reg);
+
+ dump_sdma_state(ppd);
+}
+
static struct sdma_set_state_action sdma_7322_action_table[] = {
[qib_sdma_state_s00_hw_down] = {
.go_s99_running_tofalse = 1,
@@ -6885,6 +7297,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
dd->f_sdma_init_early = qib_7322_sdma_init_early;
dd->f_writescratch = writescratch;
dd->f_tempsense_rd = qib_7322_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_7322_notify_dca;
+#endif
/*
* Do remaining PCIe setup and save PCIe values in dd.
* Any error printing is already done by the init code.
@@ -6921,7 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
actual_cnt -= dd->num_pports;
tabsize = actual_cnt;
- dd->cspec->msix_entries = kmalloc(tabsize *
+ dd->cspec->msix_entries = kzalloc(tabsize *
sizeof(struct qib_msix_entry), GFP_KERNEL);
if (!dd->cspec->msix_entries) {
qib_dev_err(dd, "No memory for MSIx table\n");
@@ -6941,7 +7356,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
/* clear diagctrl register, in case diags were running and crashed */
qib_write_kreg(dd, kr_hwdiagctrl, 0);
-
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ if (!dca_add_requester(&pdev->dev)) {
+ qib_devinfo(dd->pcidev, "DCA enabled\n");
+ dd->flags |= QIB_DCA_ENABLED;
+ qib_setup_dca(dd);
+ }
+#endif
goto bail;
bail_cleanup:
@@ -7156,15 +7577,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = {
{ 0, 0, 0, 1 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 2 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 2 }, /* QMH7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
{ 0, 0, 0, 3 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 4 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
};
static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
@@ -7173,15 +7599,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
{ 0, 0, 0, 7 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 8 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 8 }, /* QMH7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
{ 0, 0, 0, 9 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 10 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
};
static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
@@ -7190,15 +7621,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
{ 0, 1, 0, 5 }, /* QMH7342 backplane settings */
{ 0, 1, 0, 6 }, /* QMH7342 backplane settings */
{ 0, 1, 0, 8 }, /* QMH7342 backplane settings */
- { 0, 1, 12, 10 }, /* QME7342 backplane setting */
- { 0, 1, 12, 11 }, /* QME7342 backplane setting */
- { 0, 1, 12, 12 }, /* QME7342 backplane setting */
- { 0, 1, 12, 14 }, /* QME7342 backplane setting */
- { 0, 1, 12, 6 }, /* QME7342 backplane setting */
- { 0, 1, 12, 7 }, /* QME7342 backplane setting */
- { 0, 1, 12, 8 }, /* QME7342 backplane setting */
{ 0, 1, 0, 10 }, /* QMH7342 backplane settings */
{ 0, 1, 0, 12 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
};
static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 50e33aa0b4e..8d3c78ddc90 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -39,10 +39,17 @@
#include <linux/idr.h>
#include <linux/module.h>
#include <linux/printk.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
#include "qib.h"
#include "qib_common.h"
#include "qib_mad.h"
+#ifdef CONFIG_DEBUG_FS
+#include "qib_debugfs.h"
+#include "qib_verbs.h"
+#endif
#undef pr_fmt
#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
@@ -64,6 +71,11 @@ ushort qib_cfgctxts;
module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
+unsigned qib_numa_aware;
+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
+MODULE_PARM_DESC(numa_aware,
+ "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
+
/*
* If set, do not write to any regs if avoidable, hack to allow
* check for deranged default register values.
@@ -89,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
-struct workqueue_struct *qib_cq_wq;
-
static void verify_interrupt(unsigned long);
static struct idr qib_unit_table;
@@ -120,7 +130,11 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
int qib_create_ctxts(struct qib_devdata *dd)
{
unsigned i;
- int ret;
+ int local_node_id = pcibus_to_node(dd->pcidev->bus);
+
+ if (local_node_id < 0)
+ local_node_id = numa_node_id();
+ dd->assigned_node_id = local_node_id;
/*
* Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -130,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd)
if (!dd->rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata array, failing\n");
- ret = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
/* create (one or more) kctxt */
@@ -143,38 +156,51 @@ int qib_create_ctxts(struct qib_devdata *dd)
continue;
ppd = dd->pport + (i % dd->num_pports);
- rcd = qib_create_ctxtdata(ppd, i);
+
+ rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
if (!rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata for Kernel ctxt, failing\n");
- ret = -ENOMEM;
- goto done;
+ kfree(dd->rcd);
+ dd->rcd = NULL;
+ return -ENOMEM;
}
rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
rcd->seq_cnt = 1;
}
- ret = 0;
-done:
- return ret;
+ return 0;
}
/*
* Common code for user and kernel context setup.
*/
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
+ int node_id)
{
struct qib_devdata *dd = ppd->dd;
struct qib_ctxtdata *rcd;
- rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+ rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
if (rcd) {
INIT_LIST_HEAD(&rcd->qp_wait_list);
+ rcd->node_id = node_id;
rcd->ppd = ppd;
rcd->dd = dd;
rcd->cnt = 1;
rcd->ctxt = ctxt;
dd->rcd[ctxt] = rcd;
-
+#ifdef CONFIG_DEBUG_FS
+ if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+ GFP_KERNEL, node_id);
+ if (!rcd->opstats) {
+ kfree(rcd);
+ qib_dev_err(dd,
+ "Unable to allocate per ctxt stats buffer\n");
+ return NULL;
+ }
+ }
+#endif
dd->f_init_ctxt(rcd);
/*
@@ -204,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
/*
* Common code for initializing the physical port structure.
*/
-void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port)
{
int size;
@@ -214,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
spin_lock_init(&ppd->sdma_lock);
spin_lock_init(&ppd->lflags_lock);
+ spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
init_timer(&ppd->symerr_clear_timer);
@@ -221,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
ppd->symerr_clear_timer.data = (unsigned long)ppd;
ppd->qib_wq = NULL;
-
- spin_lock_init(&ppd->cc_shadow_lock);
+ ppd->ibport_data.pmastats =
+ alloc_percpu(struct qib_pma_counters);
+ if (!ppd->ibport_data.pmastats)
+ return -ENOMEM;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
goto bail;
@@ -270,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
goto bail_3;
}
- return;
+ return 0;
bail_3:
kfree(ppd->ccti_entries_shadow);
@@ -284,7 +313,7 @@ bail_1:
bail:
/* User is intentionally disabling the congestion control agent */
if (!qib_cc_table_size)
- return;
+ return 0;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
qib_cc_table_size = 0;
@@ -295,7 +324,7 @@ bail:
qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
port);
- return;
+ return 0;
}
static int init_pioavailregs(struct qib_devdata *dd)
@@ -429,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd)
dd->intrchk_timer.function = verify_interrupt;
dd->intrchk_timer.data = (unsigned long) dd;
+ ret = qib_cq_init(dd);
done:
return ret;
}
@@ -495,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd)
static void verify_interrupt(unsigned long opaque)
{
struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ u64 int_counter;
if (!dd)
return; /* being torn down */
@@ -503,7 +534,8 @@ static void verify_interrupt(unsigned long opaque)
* If we don't have a lid or any interrupts, let the user know and
* don't bother checking again.
*/
- if (dd->int_counter == 0) {
+ int_counter = qib_int_counter(dd) - dd->z_int_counter;
+ if (int_counter == 0) {
if (!dd->f_intr_fallback(dd))
dev_err(&dd->pcidev->dev,
"No interrupts detected, not usable.\n");
@@ -603,6 +635,12 @@ wq_error:
return -ENOMEM;
}
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+ free_percpu(ppd->ibport_data.pmastats);
+ ppd->ibport_data.pmastats = NULL;
+}
+
/**
* qib_init - do the actual initialization sequence on the chip
* @dd: the qlogic_ib device
@@ -890,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd)
destroy_workqueue(ppd->qib_wq);
ppd->qib_wq = NULL;
}
+ qib_free_pportdata(ppd);
}
qib_update_eeprom_log(dd);
@@ -944,6 +983,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
+#ifdef CONFIG_DEBUG_FS
+ kfree(rcd->opstats);
+ rcd->opstats = NULL;
+#endif
kfree(rcd);
}
@@ -1033,7 +1076,6 @@ done:
dd->f_set_armlaunch(dd, 1);
}
-
void qib_free_devdata(struct qib_devdata *dd)
{
unsigned long flags;
@@ -1043,9 +1085,37 @@ void qib_free_devdata(struct qib_devdata *dd)
list_del(&dd->list);
spin_unlock_irqrestore(&qib_devs_lock, flags);
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_exit(&dd->verbs_dev);
+#endif
+ free_percpu(dd->int_counter);
ib_dealloc_device(&dd->verbs_dev.ibdev);
}
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+ int cpu;
+ u64 int_counter = 0;
+
+ for_each_possible_cpu(cpu)
+ int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+ return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+ unsigned long flags;
+ struct qib_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ sps_ints += qib_int_counter(dd);
+ }
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ return sps_ints;
+}
+
/*
* Allocate our primary per-unit data structure. Must be done via verbs
* allocator, because the verbs cleanup process both does cleanup and
@@ -1061,10 +1131,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
int ret;
dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
- if (!dd) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
+ if (!dd)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&dd->list);
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&qib_devs_lock, flags);
@@ -1081,8 +1151,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (ret < 0) {
qib_early_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
- ib_dealloc_device(&dd->verbs_dev.ibdev);
- dd = ERR_PTR(ret);
+ goto bail;
+ }
+ dd->int_counter = alloc_percpu(u64);
+ if (!dd->int_counter) {
+ ret = -ENOMEM;
+ qib_early_err(&pdev->dev,
+ "Could not allocate per-cpu int_counter\n");
goto bail;
}
@@ -1096,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
qib_early_err(&pdev->dev,
"Could not alloc cpulist info, cpu affinity might be wrong\n");
}
-
-bail:
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
return dd;
+bail:
+ if (!list_empty(&dd->list))
+ list_del_init(&dd->list);
+ ib_dealloc_device(&dd->verbs_dev.ibdev);
+ return ERR_PTR(ret);;
}
/*
@@ -1138,7 +1219,7 @@ void qib_disable_after_error(struct qib_devdata *dd)
static void qib_remove_one(struct pci_dev *);
static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
-#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: "
+#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
#define PFX QIB_DRV_NAME ": "
static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
@@ -1150,7 +1231,7 @@ static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
-struct pci_driver qib_driver = {
+static struct pci_driver qib_driver = {
.name = QIB_DRV_NAME,
.probe = qib_init_one,
.remove = qib_remove_one,
@@ -1158,11 +1239,40 @@ struct pci_driver qib_driver = {
.err_handler = &qib_pci_err_handler,
};
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+ .notifier_call = qib_notify_dca,
+ .next = NULL,
+ .priority = 0
+};
+
+static int qib_notify_dca_device(struct device *device, void *data)
+{
+ struct qib_devdata *dd = dev_get_drvdata(device);
+ unsigned long event = *(unsigned long *)data;
+
+ return dd->f_notify_dca(dd, event);
+}
+
+static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
+ void *p)
+{
+ int rval;
+
+ rval = driver_for_each_device(&qib_driver.driver, NULL,
+ &event, qib_notify_dca_device);
+ return rval ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+#endif
+
/*
* Do all the generic driver unit- and chip-independent memory
* allocation and initialization.
*/
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
{
int ret;
@@ -1170,22 +1280,22 @@ static int __init qlogic_ib_init(void)
if (ret)
goto bail;
- qib_cq_wq = create_singlethread_workqueue("qib_cq");
- if (!qib_cq_wq) {
- ret = -ENOMEM;
- goto bail_dev;
- }
-
/*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
idr_init(&qib_unit_table);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_register_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_init();
+#endif
ret = pci_register_driver(&qib_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
- goto bail_unit;
+ goto bail_dev;
}
/* not fatal if it doesn't work */
@@ -1193,21 +1303,25 @@ static int __init qlogic_ib_init(void)
pr_err("Unable to register ipathfs\n");
goto bail; /* all OK */
-bail_unit:
- idr_destroy(&qib_unit_table);
- destroy_workqueue(qib_cq_wq);
bail_dev:
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_exit();
+#endif
+ idr_destroy(&qib_unit_table);
qib_dev_cleanup();
bail:
return ret;
}
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
/*
* Do the non-unit driver cleanup, memory free, etc. at unload.
*/
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
{
int ret;
@@ -1217,9 +1331,13 @@ static void __exit qlogic_ib_cleanup(void)
"Unable to cleanup counter filesystem: error %d\n",
-ret);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
pci_unregister_driver(&qib_driver);
-
- destroy_workqueue(qib_cq_wq);
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_exit();
+#endif
qib_cpulist_count = 0;
kfree(qib_cpulist);
@@ -1228,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void)
qib_dev_cleanup();
}
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
/* this can only be called after a successful initialization */
static void cleanup_device_data(struct qib_devdata *dd)
@@ -1270,7 +1388,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
if (dd->pageshadow) {
struct page **tmpp = dd->pageshadow;
dma_addr_t *tmpd = dd->physshadow;
- int i, cnt = 0;
+ int i;
for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) {
int ctxt_tidbase = ctxt * dd->rcvtidcnt;
@@ -1283,13 +1401,13 @@ static void cleanup_device_data(struct qib_devdata *dd)
PAGE_SIZE, PCI_DMA_FROMDEVICE);
qib_release_user_pages(&tmpp[i], 1);
tmpp[i] = NULL;
- cnt++;
}
}
- tmpp = dd->pageshadow;
dd->pageshadow = NULL;
vfree(tmpp);
+ dd->physshadow = NULL;
+ vfree(tmpd);
}
/*
@@ -1311,6 +1429,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
}
kfree(tmp);
kfree(dd->boardname);
+ qib_cq_exit(dd);
}
/*
@@ -1355,7 +1474,7 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd = qib_init_iba6120_funcs(pdev, ent);
#else
qib_early_err(&pdev->dev,
- "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
+ "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
ent->device);
dd = ERR_PTR(-ENODEV);
#endif
@@ -1371,7 +1490,7 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
default:
qib_early_err(&pdev->dev,
- "Failing on unknown QLogic deviceid 0x%x\n",
+ "Failing on unknown Intel deviceid 0x%x\n",
ent->device);
ret = -ENODEV;
}
@@ -1483,6 +1602,7 @@ static void qib_remove_one(struct pci_dev *pdev)
int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
{
unsigned amt;
+ int old_node_id;
if (!rcd->rcvhdrq) {
dma_addr_t phys_hdrqtail;
@@ -1492,9 +1612,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
sizeof(u32), PAGE_SIZE);
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
gfp_flags | __GFP_COMP);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrq) {
qib_dev_err(dd,
@@ -1510,9 +1634,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
}
if (!(dd->flags & QIB_NODMA_RTAIL)) {
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1556,6 +1682,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
gfp_t gfp_flags;
+ int old_node_id;
/*
* GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1574,25 +1701,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
size = rcd->rcvegrbuf_size;
if (!rcd->rcvegrbuf) {
rcd->rcvegrbuf =
- kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]),
- GFP_KERNEL);
+ kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf)
goto bail;
}
if (!rcd->rcvegrbuf_phys) {
rcd->rcvegrbuf_phys =
- kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
- GFP_KERNEL);
+ kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf_phys)
goto bail_rcvegrbuf;
}
for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
if (rcd->rcvegrbuf[e])
continue;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvegrbuf[e] =
dma_alloc_coherent(&dd->pcidev->dev, size,
&rcd->rcvegrbuf_phys[e],
gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvegrbuf[e])
goto bail_rcvegrbuf_phys;
}
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 81c7b73695d..3b9afccaaad 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -61,7 +61,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
if (dma_region) {
struct qib_mregion *tmr;
- tmr = rcu_dereference(dev->dma_mr);
+ tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
qib_get_mr(mr);
rcu_assign_pointer(dev->dma_mr, mr);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index ccb119143d2..22c720e5740 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
event.event = IB_EVENT_PKEY_CHANGE;
event.device = &dd->verbs_dev.ibdev;
- event.element.port_num = 1;
+ event.element.port_num = port;
ib_dispatch_event(&event);
}
return 0;
@@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
return reply((struct ib_smp *)pmp);
}
+static void qib_snapshot_pmacounters(
+ struct qib_ibport *ibp,
+ struct qib_pma_counters *pmacounters)
+{
+ struct qib_pma_counters *p;
+ int cpu;
+
+ memset(pmacounters, 0, sizeof(*pmacounters));
+ for_each_possible_cpu(cpu) {
+ p = per_cpu_ptr(ibp->pmastats, cpu);
+ pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+ pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+ pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+ pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+ }
+}
+
static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
@@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
u8 port_select = p->port_select;
memset(pmp->data, 0, sizeof(pmp->data));
@@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
p->port_rcv_data = cpu_to_be64(rwords);
p->port_xmit_packets = cpu_to_be64(spkts);
p->port_rcv_packets = cpu_to_be64(rpkts);
- p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit);
- p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv);
- p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit);
- p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv);
+
+ qib_snapshot_pmacounters(ibp, &pma);
+
+ p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+ - ibp->z_unicast_xmit);
+ p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+ - ibp->z_unicast_rcv);
+ p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+ - ibp->z_multicast_xmit);
+ p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+ - ibp->z_multicast_rcv);
bail:
return reply((struct ib_smp *) pmp);
@@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
@@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
ibp->z_port_rcv_packets = rpkts;
+ qib_snapshot_pmacounters(ibp, &pma);
+
if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
- ibp->n_unicast_xmit = 0;
+ ibp->z_unicast_xmit = pma.n_unicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
- ibp->n_unicast_rcv = 0;
+ ibp->z_unicast_rcv = pma.n_unicast_rcv;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
- ibp->n_multicast_xmit = 0;
+ ibp->z_multicast_xmit = pma.n_multicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
- ibp->n_multicast_rcv = 0;
+ ibp->z_multicast_rcv = pma.n_multicast_rcv;
return pma_get_portcounters_ext(pmp, ibdev, port);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 57bd3fa016b..941d4d50d8e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
__be32 revision;
u8 local_port_num;
u8 vendor_id[3];
-} __attribute__ ((packed));
+} __packed;
struct ib_mad_notice_attr {
u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
__be16 reserved;
__be16 lid; /* where violation happened */
u8 port_num; /* where violation happened */
- } __attribute__ ((packed)) ntc_129_131;
+ } __packed ntc_129_131;
struct {
__be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
__be32 new_cap_mask; /* new capability mask */
u8 reserved3;
u8 change_flags; /* low 3 bits only */
- } __attribute__ ((packed)) ntc_144;
+ } __packed ntc_144;
struct {
__be16 reserved;
__be16 lid; /* lid where sys guid changed */
__be16 reserved2;
__be64 new_sys_guid;
- } __attribute__ ((packed)) ntc_145;
+ } __packed ntc_145;
struct {
__be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
u8 reserved3;
u8 dr_trunc_hop;
u8 dr_rtn_path[30];
- } __attribute__ ((packed)) ntc_256;
+ } __packed ntc_256;
struct {
__be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
__be32 qp2; /* high 8 bits reserved */
union ib_gid gid1;
union ib_gid gid2;
- } __attribute__ ((packed)) ntc_257_258;
+ } __packed ntc_257_258;
} details;
};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
__be64 port_rcv_packets;
__be64 port_xmit_wait;
__be64 port_adr_events;
-} __attribute__ ((packed));
+} __packed;
#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
@@ -415,7 +415,6 @@ struct cc_table_shadow {
struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
} __packed;
-#endif /* _QIB_MAD_H */
/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2.
@@ -428,3 +427,5 @@ struct cc_table_shadow {
COUNTER_MASK(1, 2) | \
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
+
+#endif /* _QIB_MAD_H */
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index e6687ded821..9bbb55347cc 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct qib_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ struct scatterlist *sg;
+ int n, m, entry;
struct ib_mr *ret;
if (length == 0) {
@@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
+ n = umem->nmap;
mr = alloc_mr(n, pd);
if (IS_ERR(mr)) {
@@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
- vaddr = page_address(sg_page(&chunk->page_list[i]));
+ vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
@@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m++;
n = 0;
}
- }
}
ret = &mr->ibmr;
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index c574ec7c85e..61a0046efb7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -51,8 +51,8 @@
* file calls, even though this violates some
* expectations of harmlessness.
*/
-static int qib_tune_pcie_caps(struct qib_devdata *);
-static int qib_tune_pcie_coalesce(struct qib_devdata *);
+static void qib_tune_pcie_caps(struct qib_devdata *);
+static void qib_tune_pcie_coalesce(struct qib_devdata *);
/*
* Do all the common PCIe setup and initialization.
@@ -197,46 +197,47 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
struct qib_msix_entry *qib_msix_entry)
{
int ret;
- u32 tabsize = 0;
- u16 msix_flags;
+ int nvec = *msixcnt;
struct msix_entry *msix_entry;
int i;
+ ret = pci_msix_vec_count(dd->pcidev);
+ if (ret < 0)
+ goto do_intx;
+
+ nvec = min(nvec, ret);
+
/* We can't pass qib_msix_entry array to qib_msix_setup
* so use a dummy msix_entry array and copy the allocated
* irq back to the qib_msix_entry array. */
- msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry) {
- ret = -ENOMEM;
+ msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL);
+ if (!msix_entry)
goto do_intx;
- }
- for (i = 0; i < *msixcnt; i++)
+
+ for (i = 0; i < nvec; i++)
msix_entry[i] = qib_msix_entry[i].msix;
- pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags);
- tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE);
- if (tabsize > *msixcnt)
- tabsize = *msixcnt;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- if (ret > 0) {
- tabsize = ret;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- }
-do_intx:
- if (ret) {
- qib_dev_err(dd,
- "pci_enable_msix %d vectors failed: %d, falling back to INTx\n",
- tabsize, ret);
- tabsize = 0;
- }
- for (i = 0; i < tabsize; i++)
+ ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
+ if (ret < 0)
+ goto free_msix_entry;
+ else
+ nvec = ret;
+
+ for (i = 0; i < nvec; i++)
qib_msix_entry[i].msix = msix_entry[i];
+
kfree(msix_entry);
- *msixcnt = tabsize;
+ *msixcnt = nvec;
+ return;
- if (ret)
- qib_enable_intx(dd->pcidev);
+free_msix_entry:
+ kfree(msix_entry);
+do_intx:
+ qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, "
+ "falling back to INTx\n", nvec, ret);
+ *msixcnt = 0;
+ qib_enable_intx(dd->pcidev);
}
/**
@@ -283,12 +284,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
goto bail;
}
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX);
+ pos = dd->pcidev->msix_cap;
if (nent && *nent && pos) {
qib_msix_setup(dd, pos, nent, entry);
ret = 0; /* did it, either MSIx or INTx */
} else {
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (pos)
ret = qib_msi_setup(dd, pos);
else
@@ -357,7 +358,7 @@ int qib_reinit_intr(struct qib_devdata *dd)
if (!dd->msi_lo)
goto bail;
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (!pos) {
qib_dev_err(dd,
"Can't find MSI capability, can't restore MSI settings\n");
@@ -426,7 +427,7 @@ void qib_enable_intx(struct pci_dev *pdev)
if (new != cw)
pci_write_config_word(pdev, PCI_COMMAND, new);
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ pos = pdev->msi_cap;
if (pos) {
/* then turn off MSI */
pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
@@ -434,7 +435,7 @@ void qib_enable_intx(struct pci_dev *pdev)
if (new != cw)
pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
}
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ pos = pdev->msix_cap;
if (pos) {
/* then turn off MSIx */
pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw);
@@ -476,30 +477,6 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
"pci_enable_device failed after reset: %d\n", r);
}
-/* code to adjust PCIe capabilities. */
-
-static int fld2val(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- wd &= mask;
- lsbmask = mask ^ (mask & (mask - 1));
- wd /= lsbmask;
- return wd;
-}
-
-static int val2fld(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- lsbmask = mask ^ (mask & (mask - 1));
- wd *= lsbmask;
- return wd;
-}
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
@@ -511,7 +488,7 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
* of these chipsets, with some BIOS settings, and enabling it on those
* systems may result in the system crashing, and/or data corruption.
*/
-static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
+static void qib_tune_pcie_coalesce(struct qib_devdata *dd)
{
int r;
struct pci_dev *parent;
@@ -519,18 +496,18 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
u32 mask, bits, val;
if (!qib_pcie_coalesce)
- return 0;
+ return;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
if (parent->bus->parent) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- return 1;
+ return;
}
if (!pci_is_pcie(parent))
- return 1;
+ return;
if (parent->vendor != 0x8086)
- return 1;
+ return;
/*
* - bit 12: Max_rdcmp_Imt_EN: need to set to 1
@@ -563,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
mask = (3U << 24) | (7U << 10);
} else {
/* not one of the chipsets that we know about */
- return 1;
+ return;
}
pci_read_config_dword(parent, 0x48, &val);
val &= ~mask;
val |= bits;
r = pci_write_config_dword(parent, 0x48, val);
- return 0;
}
/*
@@ -580,55 +556,44 @@ static int qib_pcie_caps;
module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-static int qib_tune_pcie_caps(struct qib_devdata *dd)
+static void qib_tune_pcie_caps(struct qib_devdata *dd)
{
- int ret = 1; /* Assume the worst */
struct pci_dev *parent;
- u16 pcaps, pctl, ecaps, ectl;
- int rc_sup, ep_sup;
- int rc_cur, ep_cur;
+ u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
+ u16 rc_mrrs, ep_mrrs, max_mrrs;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
- if (parent->bus->parent) {
+ if (!pci_is_root_bus(parent->bus)) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- goto bail;
+ return;
}
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
- goto bail;
- pcie_capability_read_word(parent, PCI_EXP_DEVCAP, &pcaps);
- pcie_capability_read_word(parent, PCI_EXP_DEVCTL, &pctl);
+ return;
+
+ rc_mpss = parent->pcie_mpss;
+ rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCAP, &ecaps);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+ ep_mpss = dd->pcidev->pcie_mpss;
+ ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
- ret = 0;
/* Find max payload supported by root, endpoint */
- rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD);
- ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD);
- if (rc_sup > ep_sup)
- rc_sup = ep_sup;
-
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD);
+ if (rc_mpss > ep_mpss)
+ rc_mpss = ep_mpss;
/* If Supported greater than limit in module param, limit it */
- if (rc_sup > (qib_pcie_caps & 7))
- rc_sup = qib_pcie_caps & 7;
+ if (rc_mpss > (qib_pcie_caps & 7))
+ rc_mpss = qib_pcie_caps & 7;
/* If less than (allowed, supported), bump root payload */
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl);
+ if (rc_mpss > rc_mps) {
+ rc_mps = rc_mpss;
+ pcie_set_mps(parent, 128 << rc_mps);
}
/* If less than (allowed, supported), bump endpoint payload */
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ if (rc_mpss > ep_mps) {
+ ep_mps = rc_mpss;
+ pcie_set_mps(dd->pcidev, 128 << ep_mps);
}
/*
@@ -636,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
* No field for max supported, but PCIe spec limits it to 4096,
* which is code '5' (log2(4096) - 7)
*/
- rc_sup = 5;
- if (rc_sup > ((qib_pcie_caps >> 4) & 7))
- rc_sup = (qib_pcie_caps >> 4) & 7;
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ);
-
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ);
- pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl);
+ max_mrrs = 5;
+ if (max_mrrs > ((qib_pcie_caps >> 4) & 7))
+ max_mrrs = (qib_pcie_caps >> 4) & 7;
+
+ max_mrrs = 128 << max_mrrs;
+ rc_mrrs = pcie_get_readrq(parent);
+ ep_mrrs = pcie_get_readrq(dd->pcidev);
+
+ if (max_mrrs > rc_mrrs) {
+ rc_mrrs = max_mrrs;
+ pcie_set_readrq(parent, rc_mrrs);
}
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ if (max_mrrs > ep_mrrs) {
+ ep_mrrs = max_mrrs;
+ pcie_set_readrq(dd->pcidev, ep_mrrs);
}
-bail:
- return ret;
}
/* End of PCIe capability tuning */
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index a6a2cc2ba26..7fcc150d603 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -35,6 +35,9 @@
#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/jhash.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+#endif
#include "qib.h"
@@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
unsigned long flags;
unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
- spin_lock_irqsave(&dev->qpt_lock, flags);
atomic_inc(&qp->refcount);
+ spin_lock_irqsave(&dev->qpt_lock, flags);
if (qp->ibqp.qp_num == 0)
rcu_assign_pointer(ibp->qp0, qp);
@@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
}
spin_unlock_irqrestore(&dev->qpt_lock, flags);
- synchronize_rcu();
}
/*
@@ -247,36 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
unsigned long flags;
+ int removed = 1;
spin_lock_irqsave(&dev->qpt_lock, flags);
if (rcu_dereference_protected(ibp->qp0,
lockdep_is_held(&dev->qpt_lock)) == qp) {
- atomic_dec(&qp->refcount);
rcu_assign_pointer(ibp->qp0, NULL);
} else if (rcu_dereference_protected(ibp->qp1,
lockdep_is_held(&dev->qpt_lock)) == qp) {
- atomic_dec(&qp->refcount);
rcu_assign_pointer(ibp->qp1, NULL);
} else {
struct qib_qp *q;
struct qib_qp __rcu **qpp;
+ removed = 0;
qpp = &dev->qp_table[n];
for (; (q = rcu_dereference_protected(*qpp,
lockdep_is_held(&dev->qpt_lock))) != NULL;
qpp = &q->next)
if (q == qp) {
- atomic_dec(&qp->refcount);
rcu_assign_pointer(*qpp,
rcu_dereference_protected(qp->next,
lockdep_is_held(&dev->qpt_lock)));
+ removed = 1;
break;
}
}
spin_unlock_irqrestore(&dev->qpt_lock, flags);
- synchronize_rcu();
+ if (removed) {
+ synchronize_rcu();
+ atomic_dec(&qp->refcount);
+ }
}
/**
@@ -334,26 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
{
struct qib_qp *qp = NULL;
+ rcu_read_lock();
if (unlikely(qpn <= 1)) {
- rcu_read_lock();
if (qpn == 0)
qp = rcu_dereference(ibp->qp0);
else
qp = rcu_dereference(ibp->qp1);
+ if (qp)
+ atomic_inc(&qp->refcount);
} else {
struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
unsigned n = qpn_hash(dev, qpn);
- rcu_read_lock();
for (qp = rcu_dereference(dev->qp_table[n]); qp;
qp = rcu_dereference(qp->next))
- if (qp->ibqp.qp_num == qpn)
+ if (qp->ibqp.qp_num == qpn) {
+ atomic_inc(&qp->refcount);
break;
+ }
}
- if (qp)
- if (unlikely(!atomic_inc_not_zero(&qp->refcount)))
- qp = NULL;
-
rcu_read_unlock();
return qp;
}
@@ -581,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
@@ -981,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
struct ib_qp *ret;
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
- init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+ init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+ init_attr->create_flags) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
@@ -1286,3 +1291,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
}
}
}
+
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter {
+ struct qib_ibdev *dev;
+ struct qib_qp *qp;
+ int n;
+};
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
+{
+ struct qib_qp_iter *iter;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->dev = dev;
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+int qib_qp_iter_next(struct qib_qp_iter *iter)
+{
+ struct qib_ibdev *dev = iter->dev;
+ int n = iter->n;
+ int ret = 1;
+ struct qib_qp *pqp = iter->qp;
+ struct qib_qp *qp;
+
+ rcu_read_lock();
+ for (; n < dev->qp_table_size; n++) {
+ if (pqp)
+ qp = rcu_dereference(pqp->next);
+ else
+ qp = rcu_dereference(dev->qp_table[n]);
+ pqp = qp;
+ if (qp) {
+ if (iter->qp)
+ atomic_dec(&iter->qp->refcount);
+ atomic_inc(&qp->refcount);
+ rcu_read_unlock();
+ iter->qp = qp;
+ iter->n = n;
+ return 0;
+ }
+ }
+ rcu_read_unlock();
+ if (iter->qp)
+ atomic_dec(&iter->qp->refcount);
+ return ret;
+}
+
+static const char * const qp_type_str[] = {
+ "SMI", "GSI", "RC", "UC", "UD",
+};
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
+{
+ struct qib_swqe *wqe;
+ struct qib_qp *qp = iter->qp;
+
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ seq_printf(s,
+ "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
+ iter->n,
+ qp->ibqp.qp_num,
+ qp_type_str[qp->ibqp.qp_type],
+ qp->state,
+ wqe->wr.opcode,
+ qp->s_hdrwords,
+ qp->s_flags,
+ atomic_read(&qp->s_dma_busy),
+ !list_empty(&qp->iowait),
+ qp->timeout,
+ wqe->ssn,
+ qp->s_lsn,
+ qp->s_last_psn,
+ qp->s_psn, qp->s_next_psn,
+ qp->s_sending_psn, qp->s_sending_hpsn,
+ qp->s_last, qp->s_acked, qp->s_cur,
+ qp->s_tail, qp->s_head, qp->s_size,
+ qp->remote_qpn,
+ qp->remote_ah_attr.dlid);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 3ab341320ea..2f2501890c4 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp)
qib_flush_wc();
qib_sendbuf_done(dd, pbufn);
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
goto done;
queue_ack:
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 357b6cfcd46..4c07a8b34ff 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 50a8a0d4fe6..911205d3d5a 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 3fc51443121..c6d6a54d2e1 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -423,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd)
void __qib_sdma_intr(struct qib_pportdata *ppd)
{
- if (__qib_sdma_running(ppd))
+ if (__qib_sdma_running(ppd)) {
qib_sdma_make_progress(ppd);
+ if (!list_empty(&ppd->sdma_userpending))
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ }
}
int qib_setup_sdma(struct qib_pportdata *ppd)
@@ -452,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd)
ppd->sdma_descq_removed = 0;
ppd->sdma_descq_added = 0;
+ ppd->sdma_intrequest = 0;
+ INIT_LIST_HEAD(&ppd->sdma_userpending);
+
INIT_LIST_HEAD(&ppd->sdma_activelist);
tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
@@ -708,6 +714,62 @@ unlock:
return ret;
}
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+void dump_sdma_state(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_desc *descq;
+ struct qib_sdma_txreq *txp, *txpnext;
+ __le64 *descqp;
+ u64 desc[2];
+ u64 addr;
+ u16 gen, dwlen, dwoffset;
+ u16 head, tail, cnt;
+
+ head = ppd->sdma_descq_head;
+ tail = ppd->sdma_descq_tail;
+ cnt = qib_sdma_descq_freecnt(ppd);
+ descq = ppd->sdma_descq;
+
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA ppd->sdma_descq_head: %u\n", head);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA ppd->sdma_descq_tail: %u\n", tail);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sdma_descq_freecnt: %u\n", cnt);
+
+ /* print info for each entry in the descriptor queue */
+ while (head != tail) {
+ char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
+
+ descqp = &descq[head].qw[0];
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+ flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
+ flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
+ flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
+ flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
+ flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
+ addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
+ gen = (desc[0] >> 30) & 3ULL;
+ dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
+ dwoffset = (desc[0] & 0x7ffULL) << 2;
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
+ head, flags, addr, gen, dwlen, dwoffset);
+ if (++head == ppd->sdma_descq_cnt)
+ head = 0;
+ }
+
+ /* print dma descriptor indices from the TX requests */
+ list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
+ list)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
+ txp->start_idx, txp->next_descq_idx);
+}
+
void qib_sdma_process_event(struct qib_pportdata *ppd,
enum qib_sdma_events event)
{
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 034cc821de5..3c8e4e3caca 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -808,10 +808,14 @@ int qib_verbs_register_sysfs(struct qib_devdata *dd)
for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
ret = device_create_file(&dev->dev, qib_attributes[i]);
if (ret)
- return ret;
+ goto bail;
}
return 0;
+bail:
+ for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
+ device_remove_file(&dev->dev, qib_attributes[i]);
+ return ret;
}
/*
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index d6c7fe7f88d..aaf7039f8ed 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
struct qib_sge *sge;
struct ib_wc wc;
u32 length;
+ enum ib_qp_type sqptype, dqptype;
qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
}
- if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+
+ sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : sqp->ibqp.qp_type;
+ dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : qp->ibqp.qp_type;
+
+ if (dqptype != sqptype ||
!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
ibp->n_pkt_drops++;
goto drop;
@@ -273,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp)
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
if (ah_attr->dlid != QIB_PERMISSIVE_LID)
- ibp->n_multicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
} else {
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
/*
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 82442085cbe..d2806cae234 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -52,21 +52,48 @@
/* attempt to drain the queue for 5secs */
#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+ struct rb_node node;
+ int refcount;
+ pid_t pid;
+};
+
struct qib_user_sdma_pkt {
- u8 naddr; /* dimension of addr (1..3) ... */
+ struct list_head list; /* list element */
+
+ u8 tiddma; /* if this is NEW tid-sdma */
+ u8 largepkt; /* this is large pkt from kmalloc */
+ u16 frag_size; /* frag size used by PSM */
+ u16 index; /* last header index or push index */
+ u16 naddr; /* dimension of addr (1..3) ... */
+ u16 addrlimit; /* addr array size */
+ u16 tidsmidx; /* current tidsm index */
+ u16 tidsmcount; /* tidsm array item count */
+ u16 payload_size; /* payload size so far for header */
+ u32 bytes_togo; /* bytes for processing */
u32 counter; /* sdma pkts queued counter for this entry */
+ struct qib_tid_session_member *tidsm; /* tid session member array */
+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
u64 added; /* global descq number of entries */
struct {
- u32 offset; /* offset for kvaddr, addr */
- u32 length; /* length in page */
- u8 put_page; /* should we put_page? */
- u8 dma_mapped; /* is page dma_mapped? */
+ u16 offset; /* offset for kvaddr, addr */
+ u16 length; /* length in page */
+ u16 first_desc; /* first desc */
+ u16 last_desc; /* last desc */
+ u16 put_page; /* should we put_page? */
+ u16 dma_mapped; /* is page dma_mapped? */
+ u16 dma_length; /* for dma_unmap_page() */
+ u16 padding;
struct page *page; /* may be NULL (coherent mem) */
void *kvaddr; /* FIXME: only for pio hack */
dma_addr_t addr;
} addr[4]; /* max pages, any more and we coalesce */
- struct list_head list; /* list element */
};
struct qib_user_sdma_queue {
@@ -77,6 +104,12 @@ struct qib_user_sdma_queue {
*/
struct list_head sent;
+ /*
+ * Because above list will be accessed by both process and
+ * signal handler, we need a spinlock for it.
+ */
+ spinlock_t sent_lock ____cacheline_aligned_in_smp;
+
/* headers with expected length are allocated from here... */
char header_cache_name[64];
struct dma_pool *header_cache;
@@ -88,27 +121,83 @@ struct qib_user_sdma_queue {
/* as packets go on the queued queue, they are counted... */
u32 counter;
u32 sent_counter;
+ /* pending packets, not sending yet */
+ u32 num_pending;
+ /* sending packets, not complete yet */
+ u32 num_sending;
+ /* global descq number of entry of last sending packet */
+ u64 added;
/* dma page table */
struct rb_root dma_pages_root;
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+
/* protect everything above... */
struct mutex lock;
};
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ sdma_rb_node = container_of(node,
+ struct qib_user_sdma_rb_node, node);
+ if (pid < sdma_rb_node->pid)
+ node = node->rb_left;
+ else if (pid > sdma_rb_node->pid)
+ node = node->rb_right;
+ else
+ return sdma_rb_node;
+ }
+ return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+ struct rb_node **node = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct qib_user_sdma_rb_node *got;
+
+ while (*node) {
+ got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ parent = *node;
+ if (new->pid < got->pid)
+ node = &((*node)->rb_left);
+ else if (new->pid > got->pid)
+ node = &((*node)->rb_right);
+ else
+ return 0;
+ }
+
+ rb_link_node(&new->node, parent, node);
+ rb_insert_color(&new->node, root);
+ return 1;
+}
+
struct qib_user_sdma_queue *
qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
{
struct qib_user_sdma_queue *pq =
kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+ struct qib_user_sdma_rb_node *sdma_rb_node;
if (!pq)
goto done;
pq->counter = 0;
pq->sent_counter = 0;
- INIT_LIST_HEAD(&pq->sent);
+ pq->num_pending = 0;
+ pq->num_sending = 0;
+ pq->added = 0;
+ pq->sdma_rb_node = NULL;
+ INIT_LIST_HEAD(&pq->sent);
+ spin_lock_init(&pq->sent_lock);
mutex_init(&pq->lock);
snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
@@ -131,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->dma_pages_root = RB_ROOT;
+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+ current->pid);
+ if (sdma_rb_node) {
+ sdma_rb_node->refcount++;
+ } else {
+ int ret;
+ sdma_rb_node = kmalloc(sizeof(
+ struct qib_user_sdma_rb_node), GFP_KERNEL);
+ if (!sdma_rb_node)
+ goto err_rb;
+
+ sdma_rb_node->refcount = 1;
+ sdma_rb_node->pid = current->pid;
+
+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+ sdma_rb_node);
+ BUG_ON(ret == 0);
+ }
+ pq->sdma_rb_node = sdma_rb_node;
+
goto done;
+err_rb:
+ dma_pool_destroy(pq->header_cache);
err_slab:
kmem_cache_destroy(pq->pkt_slab);
err_kfree:
@@ -144,34 +255,310 @@ done:
}
static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
- int i, size_t offset, size_t len,
- int put_page, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+ int i, u16 offset, u16 len,
+ u16 first_desc, u16 last_desc,
+ u16 put_page, u16 dma_mapped,
+ struct page *page, void *kvaddr,
+ dma_addr_t dma_addr, u16 dma_length)
{
pkt->addr[i].offset = offset;
pkt->addr[i].length = len;
+ pkt->addr[i].first_desc = first_desc;
+ pkt->addr[i].last_desc = last_desc;
pkt->addr[i].put_page = put_page;
pkt->addr[i].dma_mapped = dma_mapped;
pkt->addr[i].page = page;
pkt->addr[i].kvaddr = kvaddr;
pkt->addr[i].addr = dma_addr;
+ pkt->addr[i].dma_length = dma_length;
+}
+
+static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ size_t len, dma_addr_t *dma_addr)
+{
+ void *hdr;
+
+ if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+ hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ dma_addr);
+ else
+ hdr = NULL;
+
+ if (!hdr) {
+ hdr = kmalloc(len, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ *dma_addr = 0;
+ }
+
+ return hdr;
}
-static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt,
- u32 counter, size_t offset,
- size_t len, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ struct page *page, u16 put,
+ u16 offset, u16 len, void *kvaddr)
{
- pkt->naddr = 1;
- pkt->counter = counter;
- qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
- kvaddr, dma_addr);
+ __le16 *pbc16;
+ void *pbcvaddr;
+ struct qib_message_header *hdr;
+ u16 newlen, pbclen, lastdesc, dma_mapped;
+ u32 vcto;
+ union qib_seqnum seqnum;
+ dma_addr_t pbcdaddr;
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ page, offset, len, DMA_TO_DEVICE);
+ int ret = 0;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ /*
+ * dma mapping error, pkt has not managed
+ * this page yet, return the page here so
+ * the caller can ignore this page.
+ */
+ if (put) {
+ put_page(page);
+ } else {
+ /* coalesce case */
+ kunmap(page);
+ __free_page(page);
+ }
+ ret = -ENOMEM;
+ goto done;
+ }
+ offset = 0;
+ dma_mapped = 1;
+
+
+next_fragment:
+
+ /*
+ * In tid-sdma, the transfer length is restricted by
+ * receiver side current tid page length.
+ */
+ if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
+ newlen = pkt->tidsm[pkt->tidsmidx].length;
+ else
+ newlen = len;
+
+ /*
+ * Then the transfer length is restricted by MTU.
+ * the last descriptor flag is determined by:
+ * 1. the current packet is at frag size length.
+ * 2. the current tid page is done if tid-sdma.
+ * 3. there is no more byte togo if sdma.
+ */
+ lastdesc = 0;
+ if ((pkt->payload_size + newlen) >= pkt->frag_size) {
+ newlen = pkt->frag_size - pkt->payload_size;
+ lastdesc = 1;
+ } else if (pkt->tiddma) {
+ if (newlen == pkt->tidsm[pkt->tidsmidx].length)
+ lastdesc = 1;
+ } else {
+ if (newlen == pkt->bytes_togo)
+ lastdesc = 1;
+ }
+
+ /* fill the next fragment in this page */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ offset, newlen, /* offset, len */
+ 0, lastdesc, /* first last desc */
+ put, dma_mapped, /* put page, dma mapped */
+ page, kvaddr, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->bytes_togo -= newlen;
+ pkt->payload_size += newlen;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* If there is no more byte togo. (lastdesc==1) */
+ if (pkt->bytes_togo == 0) {
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ goto done;
+ }
+
+ /* If tid-sdma, advance tid info. */
+ if (pkt->tiddma) {
+ pkt->tidsm[pkt->tidsmidx].length -= newlen;
+ if (pkt->tidsm[pkt->tidsmidx].length) {
+ pkt->tidsm[pkt->tidsmidx].offset += newlen;
+ } else {
+ pkt->tidsmidx++;
+ if (pkt->tidsmidx == pkt->tidsmcount) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ }
+
+ /*
+ * If this is NOT the last descriptor. (newlen==len)
+ * the current packet is not done yet, but the current
+ * send side page is done.
+ */
+ if (lastdesc == 0)
+ goto done;
+
+ /*
+ * If running this driver under PSM with message size
+ * fitting into one transfer unit, it is not possible
+ * to pass this line. otherwise, it is a buggggg.
+ */
+
+ /*
+ * Since the current packet is done, and there are more
+ * bytes togo, we need to create a new sdma header, copying
+ * from previous sdma header and modify both.
+ */
+ pbclen = pkt->addr[pkt->index].length;
+ pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
+ if (!pbcvaddr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Copy the previous sdma header to new sdma header */
+ pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
+ memcpy(pbcvaddr, pbc16, pbclen);
+
+ /* Modify the previous sdma header */
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* turn on the header suppression */
+ hdr->iph.pkt_flags =
+ cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
+ /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
+ hdr->flags &= ~(0x04|0x20);
+ } else {
+ /* turn off extra bytes: 20-21 bits */
+ hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
+ /* turn off ACK_REQ: 0x04 */
+ hdr->flags &= ~(0x04);
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ /* Modify the new sdma header */
+ pbc16 = (__le16 *)pbcvaddr;
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* Set new tid and offset for new sdma header */
+ hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
+ (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
+ (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
+ (pkt->tidsm[pkt->tidsmidx].offset>>2));
+ } else {
+ /* Middle protocol new packet offset */
+ hdr->uwords[2] += pkt->payload_size;
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* Next sequence number in new sdma header */
+ seqnum.val = be32_to_cpu(hdr->bth[2]);
+ if (pkt->tiddma)
+ seqnum.seq++;
+ else
+ seqnum.pkt++;
+ hdr->bth[2] = cpu_to_be32(seqnum.val);
+
+ /* Init new sdma header. */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ 0, pbclen, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbcvaddr, /* struct page, virt addr */
+ pbcdaddr, pbclen); /* dma addr, dma length */
+ pkt->index = pkt->naddr;
+ pkt->payload_size = 0;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* Prepare for next fragment in this page */
+ if (newlen != len) {
+ if (dma_mapped) {
+ put = 0;
+ dma_mapped = 0;
+ page = NULL;
+ kvaddr = NULL;
+ }
+ len -= newlen;
+ offset += newlen;
+
+ goto next_fragment;
+ }
+
+done:
+ return ret;
}
/* we've too many pages in the iovec, coalesce to a single page */
static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov)
@@ -182,7 +569,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
char *mpage;
int i;
int len = 0;
- dma_addr_t dma_addr;
if (!page) {
ret = -ENOMEM;
@@ -205,17 +591,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
len += iov[i].iov_len;
}
- dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
- ret = -ENOMEM;
- goto free_unmap;
- }
-
- qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
- dma_addr);
- pkt->naddr = 2;
-
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ page, 0, 0, len, mpage_save);
goto done;
free_unmap:
@@ -238,16 +615,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/*
- * Truncate length to page boundary.
- */
-static int qib_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
- const unsigned long offset = addr & ~PAGE_MASK;
-
- return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
static void qib_user_sdma_free_pkt_frag(struct device *dev,
struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
@@ -256,10 +623,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
const int i = frag;
if (pkt->addr[i].page) {
+ /* only user data has page */
if (pkt->addr[i].dma_mapped)
dma_unmap_page(dev,
pkt->addr[i].addr,
- pkt->addr[i].length,
+ pkt->addr[i].dma_length,
DMA_TO_DEVICE);
if (pkt->addr[i].kvaddr)
@@ -269,55 +637,80 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
put_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
- } else if (pkt->addr[i].kvaddr)
- /* free coherent mem from cache... */
- dma_pool_free(pq->header_cache,
+ } else if (pkt->addr[i].kvaddr) {
+ /* for headers */
+ if (pkt->addr[i].dma_mapped) {
+ /* from kmalloc & dma mapped */
+ dma_unmap_single(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+ kfree(pkt->addr[i].kvaddr);
+ } else if (pkt->addr[i].addr) {
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
pkt->addr[i].kvaddr, pkt->addr[i].addr);
+ } else {
+ /* from kmalloc but not dma mapped */
+ kfree(pkt->addr[i].kvaddr);
+ }
+ }
}
/* return number of pages pinned... */
static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
unsigned long addr, int tlen, int npages)
{
- struct page *pages[2];
- int j;
- int ret;
-
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
- if (ret != npages) {
- int i;
-
- for (i = 0; i < ret; i++)
- put_page(pages[i]);
-
- ret = -ENOMEM;
- goto done;
- }
+ struct page *pages[8];
+ int i, j;
+ int ret = 0;
- for (j = 0; j < npages; j++) {
- /* map the pages... */
- const int flen = qib_user_sdma_page_length(addr, tlen);
- dma_addr_t dma_addr =
- dma_map_page(&dd->pcidev->dev,
- pages[j], 0, flen, DMA_TO_DEVICE);
- unsigned long fofs = addr & ~PAGE_MASK;
+ while (npages) {
+ if (npages > 8)
+ j = 8;
+ else
+ j = npages;
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = get_user_pages_fast(addr, j, 0, pages);
+ if (ret != j) {
+ i = 0;
+ j = ret;
ret = -ENOMEM;
- goto done;
+ goto free_pages;
}
- qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
- pages[j], kmap(pages[j]), dma_addr);
+ for (i = 0; i < j; i++) {
+ /* map the pages... */
+ unsigned long fofs = addr & ~PAGE_MASK;
+ int flen = ((fofs + tlen) > PAGE_SIZE) ?
+ (PAGE_SIZE - fofs) : tlen;
+
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ pages[i], 1, fofs, flen, NULL);
+ if (ret < 0) {
+ /* current page has beed taken
+ * care of inside above call.
+ */
+ i++;
+ goto free_pages;
+ }
+
+ addr += flen;
+ tlen -= flen;
+ }
- pkt->naddr++;
- addr += flen;
- tlen -= flen;
+ npages -= j;
}
+ goto done;
+
+ /* if error, return all pages not managed by pkt */
+free_pages:
+ while (i < j)
+ put_page(pages[i++]);
+
done:
return ret;
}
@@ -335,7 +728,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
const int npages = qib_user_sdma_num_pages(iov + idx);
const unsigned long addr = (unsigned long) iov[idx].iov_base;
- ret = qib_user_sdma_pin_pages(dd, pkt, addr,
+ ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
iov[idx].iov_len, npages);
if (ret < 0)
goto free_pkt;
@@ -344,9 +737,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
goto done;
free_pkt:
- for (idx = 0; idx < pkt->naddr; idx++)
+ /* we need to ignore the first entry here */
+ for (idx = 1; idx < pkt->naddr; idx++)
qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+ /* need to dma unmap the first entry, this is to restore to
+ * the original state so that caller can free the memory in
+ * error condition. Caller does not know if dma mapped or not*/
+ if (pkt->addr[0].dma_mapped) {
+ dma_unmap_single(&dd->pcidev->dev,
+ pkt->addr[0].addr,
+ pkt->addr[0].dma_length,
+ DMA_TO_DEVICE);
+ pkt->addr[0].addr = 0;
+ pkt->addr[0].dma_mapped = 0;
+ }
+
done:
return ret;
}
@@ -359,8 +765,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
{
int ret = 0;
- if (npages >= ARRAY_SIZE(pkt->addr))
- ret = qib_user_sdma_coalesce(dd, pkt, iov, niov);
+ if (pkt->frag_size == pkt->bytes_togo &&
+ npages >= ARRAY_SIZE(pkt->addr))
+ ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
else
ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
@@ -380,7 +787,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
for (i = 0; i < pkt->naddr; i++)
qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
- kmem_cache_free(pq->pkt_slab, pkt);
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
}
INIT_LIST_HEAD(list);
}
@@ -393,63 +803,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
* as, if there is an error we clean it...
*/
static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq,
- struct list_head *list,
const struct iovec *iov,
unsigned long niov,
- int maxpkts)
+ struct list_head *list,
+ int *maxpkts, int *ndesc)
{
unsigned long idx = 0;
int ret = 0;
int npkts = 0;
- struct page *page = NULL;
__le32 *pbc;
dma_addr_t dma_addr;
struct qib_user_sdma_pkt *pkt = NULL;
size_t len;
size_t nw;
u32 counter = pq->counter;
- int dma_mapped = 0;
+ u16 frag_size;
- while (idx < niov && npkts < maxpkts) {
+ while (idx < niov && npkts < *maxpkts) {
const unsigned long addr = (unsigned long) iov[idx].iov_base;
const unsigned long idx_save = idx;
unsigned pktnw;
unsigned pktnwc;
int nfrags = 0;
int npages = 0;
+ int bytes_togo = 0;
+ int tiddma = 0;
int cfur;
- dma_mapped = 0;
len = iov[idx].iov_len;
nw = len >> 2;
- page = NULL;
-
- pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
- if (!pkt) {
- ret = -ENOMEM;
- goto free_list;
- }
if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
len > PAGE_SIZE || len & 3 || addr & 3) {
ret = -EINVAL;
- goto free_pkt;
+ goto free_list;
}
- if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
- pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
- &dma_addr);
- else
- pbc = NULL;
-
+ pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
if (!pbc) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto free_pkt;
- }
- pbc = kmap(page);
+ ret = -ENOMEM;
+ goto free_list;
}
cfur = copy_from_user(pbc, iov[idx].iov_base, len);
@@ -474,8 +869,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
* we can verify that the packet is consistent with the
* iovec lengths.
*/
- pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK;
- if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ pktnw = le32_to_cpu(*pbc) & 0xFFFF;
+ if (pktnw < pktnwc) {
ret = -EINVAL;
goto free_pbc;
}
@@ -486,17 +881,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
const unsigned long faddr =
(unsigned long) iov[idx].iov_base;
- if (slen & 3 || faddr & 3 || !slen ||
- slen > PAGE_SIZE) {
+ if (slen & 3 || faddr & 3 || !slen) {
ret = -EINVAL;
goto free_pbc;
}
- npages++;
- if ((faddr & PAGE_MASK) !=
- ((faddr + slen - 1) & PAGE_MASK))
- npages++;
+ npages += qib_user_sdma_num_pages(&iov[idx]);
+ bytes_togo += slen;
pktnwc += slen >> 2;
idx++;
nfrags++;
@@ -507,48 +899,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
goto free_pbc;
}
- if (page) {
- dma_addr = dma_map_page(&dd->pcidev->dev,
- page, 0, len, DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
+ if (((frag_size ? frag_size : bytes_togo) + len) >
+ ppd->ibmaxlen) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (frag_size) {
+ int pktsize, tidsmsize, n;
+
+ n = npages*((2*PAGE_SIZE/frag_size)+1);
+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n;
+
+ /*
+ * Determine if this is tid-sdma or just sdma.
+ */
+ tiddma = (((le32_to_cpu(pbc[7])>>
+ QLOGIC_IB_I_TID_SHIFT)&
+ QLOGIC_IB_I_TID_MASK) !=
+ QLOGIC_IB_I_TID_MASK);
+
+ if (tiddma)
+ tidsmsize = iov[idx].iov_len;
+ else
+ tidsmsize = 0;
+
+ pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL);
+ if (!pkt) {
ret = -ENOMEM;
goto free_pbc;
}
+ pkt->largepkt = 1;
+ pkt->frag_size = frag_size;
+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
+
+ if (tiddma) {
+ char *tidsm = (char *)pkt + pktsize;
+ cfur = copy_from_user(tidsm,
+ iov[idx].iov_base, tidsmsize);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pkt;
+ }
+ pkt->tidsm =
+ (struct qib_tid_session_member *)tidsm;
+ pkt->tidsmcount = tidsmsize/
+ sizeof(struct qib_tid_session_member);
+ pkt->tidsmidx = 0;
+ idx++;
+ }
- dma_mapped = 1;
+ /*
+ * pbc 'fill1' field is borrowed to pass frag size,
+ * we need to clear it after picking frag size, the
+ * hardware requires this field to be zero.
+ */
+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
+ } else {
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+ pkt->largepkt = 0;
+ pkt->frag_size = bytes_togo;
+ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
}
-
- qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
- page, pbc, dma_addr);
+ pkt->bytes_togo = bytes_togo;
+ pkt->payload_size = 0;
+ pkt->counter = counter;
+ pkt->tiddma = tiddma;
+
+ /* setup the first header */
+ qib_user_sdma_init_frag(pkt, 0, /* index */
+ 0, len, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbc, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->index = 0;
+ pkt->naddr = 1;
if (nfrags) {
ret = qib_user_sdma_init_payload(dd, pq, pkt,
iov + idx_save + 1,
nfrags, npages);
if (ret < 0)
- goto free_pbc_dma;
+ goto free_pkt;
+ } else {
+ /* since there is no payload, mark the
+ * header as the last desc. */
+ pkt->addr[0].last_desc = 1;
+
+ if (dma_addr == 0) {
+ /*
+ * the header is not dma mapped yet.
+ * it should be from kmalloc.
+ */
+ dma_addr = dma_map_single(&dd->pcidev->dev,
+ pbc, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pkt->addr[0].addr = dma_addr;
+ pkt->addr[0].dma_mapped = 1;
+ }
}
counter++;
npkts++;
+ pkt->pq = pq;
+ pkt->index = 0; /* reset index for push on hw */
+ *ndesc += pkt->naddr;
list_add_tail(&pkt->list, list);
}
+ *maxpkts = npkts;
ret = idx;
goto done;
-free_pbc_dma:
- if (dma_mapped)
- dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pkt:
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
free_pbc:
- if (page) {
- kunmap(page);
- __free_page(page);
- } else
+ if (dma_addr)
dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
- kmem_cache_free(pq->pkt_slab, pkt);
+ else
+ kfree(pbc);
free_list:
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
done:
@@ -569,10 +1052,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
struct list_head free_list;
struct qib_user_sdma_pkt *pkt;
struct qib_user_sdma_pkt *pkt_prev;
+ unsigned long flags;
int ret = 0;
+ if (!pq->num_sending)
+ return 0;
+
INIT_LIST_HEAD(&free_list);
+ /*
+ * We need this spin lock here because interrupt handler
+ * might modify this list in qib_user_sdma_send_desc(), also
+ * we can not get interrupted, otherwise it is a deadlock.
+ */
+ spin_lock_irqsave(&pq->sent_lock, flags);
list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
s64 descd = ppd->sdma_descq_removed - pkt->added;
@@ -583,7 +1076,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
/* one more packet cleaned */
ret++;
+ pq->num_sending--;
}
+ spin_unlock_irqrestore(&pq->sent_lock, flags);
if (!list_empty(&free_list)) {
u32 counter;
@@ -604,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
if (!pq)
return;
- kmem_cache_destroy(pq->pkt_slab);
+ pq->sdma_rb_node->refcount--;
+ if (pq->sdma_rb_node->refcount == 0) {
+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+ kfree(pq->sdma_rb_node);
+ }
dma_pool_destroy(pq->header_cache);
+ kmem_cache_destroy(pq->pkt_slab);
kfree(pq);
}
@@ -627,6 +1127,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq)
{
struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
int i;
if (!pq)
@@ -634,7 +1135,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
mutex_lock(&pq->lock);
- if (list_empty(&pq->sent)) {
+ if (!pq->num_pending && !pq->num_sending) {
mutex_unlock(&pq->lock);
break;
}
@@ -644,29 +1145,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
msleep(10);
}
- if (!list_empty(&pq->sent)) {
+ if (pq->num_pending || pq->num_sending) {
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
struct list_head free_list;
+ mutex_lock(&pq->lock);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /*
+ * Since we hold sdma_lock, it is safe without sent_lock.
+ */
+ if (pq->num_pending) {
+ list_for_each_entry_safe(pkt, pkt_prev,
+ &ppd->sdma_userpending, list) {
+ if (pkt->pq == pq) {
+ list_move_tail(&pkt->list, &pq->sent);
+ pq->num_pending--;
+ pq->num_sending++;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
INIT_LIST_HEAD(&free_list);
- mutex_lock(&pq->lock);
list_splice_init(&pq->sent, &free_list);
+ pq->num_sending = 0;
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
mutex_unlock(&pq->lock);
}
}
-static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd,
+static inline __le64 qib_sdma_make_desc0(u8 gen,
u64 addr, u64 dwlen, u64 dwoffset)
{
- u8 tmpgen;
-
- tmpgen = ppd->sdma_generation;
-
return cpu_to_le64(/* SDmaPhyAddr[31:0] */
((addr & 0xfffffffcULL) << 32) |
/* SDmaGeneration[1:0] */
- ((tmpgen & 3ULL) << 30) |
+ ((gen & 3ULL) << 30) |
/* SDmaDwordCount[10:0] */
((dwlen & 0x7ffULL) << 16) |
/* SDmaBufOffset[12:2] */
@@ -692,7 +1208,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr)
static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
struct qib_user_sdma_pkt *pkt, int idx,
- unsigned ofs, u16 tail)
+ unsigned ofs, u16 tail, u8 gen)
{
const u64 addr = (u64) pkt->addr[idx].addr +
(u64) pkt->addr[idx].offset;
@@ -702,105 +1218,159 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
descqp = &ppd->sdma_descq[tail].qw[0];
- descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs);
- if (idx == 0)
+ descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
+ if (pkt->addr[idx].first_desc)
descq0 = qib_sdma_make_first_desc0(descq0);
- if (idx == pkt->naddr - 1)
+ if (pkt->addr[idx].last_desc) {
descq0 = qib_sdma_make_last_desc0(descq0);
+ if (ppd->sdma_intrequest) {
+ descq0 |= cpu_to_le64(1ULL << 15);
+ ppd->sdma_intrequest = 0;
+ }
+ }
descqp[0] = descq0;
descqp[1] = qib_sdma_make_desc1(addr);
}
-/* pq->lock must be held, get packets on the wire... */
-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
- struct qib_user_sdma_queue *pq,
- struct list_head *pktlist)
+void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
+ struct list_head *pktlist)
{
struct qib_devdata *dd = ppd->dd;
- int ret = 0;
- unsigned long flags;
- u16 tail;
- u8 generation;
- u64 descq_added;
-
- if (list_empty(pktlist))
- return 0;
-
- if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
- return -ECOMM;
-
- spin_lock_irqsave(&ppd->sdma_lock, flags);
+ u16 nfree, nsent;
+ u16 tail, tail_c;
+ u8 gen, gen_c;
- /* keep a copy for restoring purposes in case of problems */
- generation = ppd->sdma_generation;
- descq_added = ppd->sdma_descq_added;
-
- if (unlikely(!__qib_sdma_running(ppd))) {
- ret = -ECOMM;
- goto unlock;
- }
+ nfree = qib_sdma_descq_freecnt(ppd);
+ if (!nfree)
+ return;
- tail = ppd->sdma_descq_tail;
+retry:
+ nsent = 0;
+ tail_c = tail = ppd->sdma_descq_tail;
+ gen_c = gen = ppd->sdma_generation;
while (!list_empty(pktlist)) {
struct qib_user_sdma_pkt *pkt =
list_entry(pktlist->next, struct qib_user_sdma_pkt,
list);
- int i;
+ int i, j, c = 0;
unsigned ofs = 0;
u16 dtail = tail;
- if (pkt->naddr > qib_sdma_descq_freecnt(ppd))
- goto unlock_check_tail;
-
- for (i = 0; i < pkt->naddr; i++) {
- qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail);
+ for (i = pkt->index; i < pkt->naddr && nfree; i++) {
+ qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
ofs += pkt->addr[i].length >> 2;
if (++tail == ppd->sdma_descq_cnt) {
tail = 0;
- ++ppd->sdma_generation;
+ ++gen;
+ ppd->sdma_intrequest = 1;
+ } else if (tail == (ppd->sdma_descq_cnt>>1)) {
+ ppd->sdma_intrequest = 1;
}
- }
-
- if ((ofs << 2) > ppd->ibmaxlen) {
- ret = -EMSGSIZE;
- goto unlock;
- }
+ nfree--;
+ if (pkt->addr[i].last_desc == 0)
+ continue;
- /*
- * If the packet is >= 2KB mtu equivalent, we have to use
- * the large buffers, and have to mark each descriptor as
- * part of a large buffer packet.
- */
- if (ofs > dd->piosize2kmax_dwords) {
- for (i = 0; i < pkt->naddr; i++) {
- ppd->sdma_descq[dtail].qw[0] |=
- cpu_to_le64(1ULL << 14);
- if (++dtail == ppd->sdma_descq_cnt)
- dtail = 0;
+ /*
+ * If the packet is >= 2KB mtu equivalent, we
+ * have to use the large buffers, and have to
+ * mark each descriptor as part of a large
+ * buffer packet.
+ */
+ if (ofs > dd->piosize2kmax_dwords) {
+ for (j = pkt->index; j <= i; j++) {
+ ppd->sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == ppd->sdma_descq_cnt)
+ dtail = 0;
+ }
}
+ c += i + 1 - pkt->index;
+ pkt->index = i + 1; /* index for next first */
+ tail_c = dtail = tail;
+ gen_c = gen;
+ ofs = 0; /* reset for next packet */
}
- ppd->sdma_descq_added += pkt->naddr;
- pkt->added = ppd->sdma_descq_added;
- list_move_tail(&pkt->list, &pq->sent);
- ret++;
+ ppd->sdma_descq_added += c;
+ nsent += c;
+ if (pkt->index == pkt->naddr) {
+ pkt->added = ppd->sdma_descq_added;
+ pkt->pq->added = pkt->added;
+ pkt->pq->num_pending--;
+ spin_lock(&pkt->pq->sent_lock);
+ pkt->pq->num_sending++;
+ list_move_tail(&pkt->list, &pkt->pq->sent);
+ spin_unlock(&pkt->pq->sent_lock);
+ }
+ if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
+ break;
}
-unlock_check_tail:
/* advance the tail on the chip if necessary */
- if (ppd->sdma_descq_tail != tail)
- dd->f_sdma_update_tail(ppd, tail);
+ if (ppd->sdma_descq_tail != tail_c) {
+ ppd->sdma_generation = gen_c;
+ dd->f_sdma_update_tail(ppd, tail_c);
+ }
+
+ if (nfree && !list_empty(pktlist))
+ goto retry;
+
+ return;
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *pktlist, int count)
+{
+ unsigned long flags;
+
+ if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+ return -ECOMM;
-unlock:
- if (unlikely(ret < 0)) {
- ppd->sdma_generation = generation;
- ppd->sdma_descq_added = descq_added;
+ /* non-blocking mode */
+ if (pq->sdma_rb_node->refcount > 1) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return 0;
}
- spin_unlock_irqrestore(&ppd->sdma_lock, flags);
- return ret;
+ /* In this case, descriptors from this process are not
+ * linked to ppd pending queue, interrupt handler
+ * won't update this process, it is OK to directly
+ * modify without sdma lock.
+ */
+
+
+ pq->num_pending += count;
+ /*
+ * Blocking mode for single rail process, we must
+ * release/regain sdma_lock to give other process
+ * chance to make progress. This is important for
+ * performance.
+ */
+ do {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ qib_user_sdma_send_desc(ppd, pktlist);
+ if (!list_empty(pktlist))
+ qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ } while (!list_empty(pktlist));
+
+ return 0;
}
int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -822,19 +1392,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
if (!qib_sdma_running(ppd))
goto done_unlock;
- if (ppd->sdma_descq_added != ppd->sdma_descq_removed) {
+ /* if I have packets not complete yet */
+ if (pq->added > ppd->sdma_descq_removed)
qib_user_sdma_hwqueue_clean(ppd);
+ /* if I have complete packets to be freed */
+ if (pq->num_sending)
qib_user_sdma_queue_clean(ppd, pq);
- }
while (dim) {
- const int mxp = 8;
+ int mxp = 1;
+ int ndesc = 0;
- down_write(&current->mm->mmap_sem);
- ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
- up_write(&current->mm->mmap_sem);
-
- if (ret <= 0)
+ ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
+ iov, dim, &list, &mxp, &ndesc);
+ if (ret < 0)
goto done_unlock;
else {
dim -= ret;
@@ -844,24 +1415,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
/* force packets onto the sdma hw queue... */
if (!list_empty(&list)) {
/*
- * Lazily clean hw queue. the 4 is a guess of about
- * how many sdma descriptors a packet will take (it
- * doesn't have to be perfect).
+ * Lazily clean hw queue.
*/
- if (qib_sdma_descq_freecnt(ppd) < ret * 4) {
+ if (qib_sdma_descq_freecnt(ppd) < ndesc) {
qib_user_sdma_hwqueue_clean(ppd);
- qib_user_sdma_queue_clean(ppd, pq);
+ if (pq->num_sending)
+ qib_user_sdma_queue_clean(ppd, pq);
}
- ret = qib_user_sdma_push_pkts(ppd, pq, &list);
+ ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
if (ret < 0)
goto done_unlock;
else {
- npkts += ret;
- pq->counter += ret;
-
- if (!list_empty(&list))
- goto done_unlock;
+ npkts += mxp;
+ pq->counter += mxp;
}
}
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index ba51a4715a1..9bcfbd84298 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
} else
goto drop;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- ibp->opstats[opcode & 0x7f].n_bytes += tlen;
- ibp->opstats[opcode & 0x7f].n_packets++;
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
+#ifdef CONFIG_DEBUG_FS
+ rcd->opstats->stats[opcode].n_bytes += tlen;
+ rcd->opstats->stats[opcode].n_packets++;
+#endif
/* Get the destination QP number. */
qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
@@ -660,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
if (mcast == NULL)
goto drop;
- ibp->n_multicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_multicast_rcv);
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
/*
@@ -676,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
&rcd->lookaside_qp->refcount))
wake_up(
&rcd->lookaside_qp->wait);
- rcd->lookaside_qp = NULL;
- }
+ rcd->lookaside_qp = NULL;
+ }
}
if (!rcd->lookaside_qp) {
qp = qib_lookup_qpn(ibp, qp_num);
@@ -687,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
rcd->lookaside_qpn = qp_num;
} else
qp = rcd->lookaside_qp;
- ibp->n_unicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_unicast_rcv);
qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
}
return;
@@ -2224,7 +2226,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->dma_ops = &qib_dma_mapping_ops;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
- "QLogic Infiniband HCA %s", init_utsname()->nodename);
+ "Intel Infiniband HCA %s", init_utsname()->nodename);
ret = ib_register_device(ibdev, qib_create_port_files);
if (ret)
@@ -2234,7 +2236,8 @@ int qib_register_ib_device(struct qib_devdata *dd)
if (ret)
goto err_agents;
- if (qib_verbs_register_sysfs(dd))
+ ret = qib_verbs_register_sysfs(dd);
+ if (ret)
goto err_class;
goto bail;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index aff8b2c1788..bfc8948fdd3 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -41,6 +41,7 @@
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
@@ -149,14 +150,14 @@ struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
-} __attribute__ ((packed));
+} __packed;
struct ib_atomic_eth {
__be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
-} __attribute__ ((packed));
+} __packed;
struct qib_other_headers {
__be32 bth[3];
@@ -177,7 +178,7 @@ struct qib_other_headers {
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
-} __attribute__ ((packed));
+} __packed;
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -194,12 +195,12 @@ struct qib_ib_header {
} l;
struct qib_other_headers oth;
} u;
-} __attribute__ ((packed));
+} __packed;
struct qib_pio_header {
__le32 pbc[2];
struct qib_ib_header hdr;
-} __attribute__ ((packed));
+} __packed;
/*
* There is one struct qib_mcast for each multicast GID.
@@ -267,7 +268,8 @@ struct qib_cq_wc {
*/
struct qib_cq {
struct ib_cq ibcq;
- struct work_struct comptask;
+ struct kthread_work comptask;
+ struct qib_devdata *dd;
spinlock_t lock; /* protect changes in this struct */
u8 notify;
u8 triggered;
@@ -658,6 +660,17 @@ struct qib_opcode_stats {
u64 n_bytes; /* total number of bytes */
};
+struct qib_opcode_stats_perctx {
+ struct qib_opcode_stats stats[128];
+};
+
+struct qib_pma_counters {
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+};
+
struct qib_ibport {
struct qib_qp __rcu *qp0;
struct qib_qp __rcu *qp1;
@@ -674,10 +687,11 @@ struct qib_ibport {
__be64 mkey;
__be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
u64 tid; /* TID for traps */
- u64 n_unicast_xmit; /* total unicast packets sent */
- u64 n_unicast_rcv; /* total unicast packets received */
- u64 n_multicast_xmit; /* total multicast packets sent */
- u64 n_multicast_rcv; /* total multicast packets received */
+ struct qib_pma_counters __percpu *pmastats;
+ u64 z_unicast_xmit; /* starting count for PMA */
+ u64 z_unicast_rcv; /* starting count for PMA */
+ u64 z_multicast_xmit; /* starting count for PMA */
+ u64 z_multicast_rcv; /* starting count for PMA */
u64 z_symbol_error_counter; /* starting count for PMA */
u64 z_link_error_recovery_counter; /* starting count for PMA */
u64 z_link_downed_counter; /* starting count for PMA */
@@ -724,7 +738,6 @@ struct qib_ibport {
u8 vl_high_limit;
u8 sl_to_vl[16];
- struct qib_opcode_stats opstats[128];
};
@@ -768,6 +781,10 @@ struct qib_ibdev {
spinlock_t n_srqs_lock;
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
spinlock_t n_mcast_grps_lock;
+#ifdef CONFIG_DEBUG_FS
+ /* per HCA debugfs */
+ struct dentry *qib_ibdev_dbg;
+#endif
};
struct qib_verbs_counters {
@@ -832,8 +849,6 @@ static inline int qib_send_ok(struct qib_qp *qp)
!(qp->s_flags & QIB_S_ANY_WAIT_SEND));
}
-extern struct workqueue_struct *qib_cq_wq;
-
/*
* This must be called with s_lock held.
*/
@@ -910,6 +925,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
void qib_free_qpn_table(struct qib_qpn_table *qpt);
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter;
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev);
+
+int qib_qp_iter_next(struct qib_qp_iter *iter);
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
+
+#endif
+
void qib_get_credit(struct qib_qp *qp, u32 aeth);
unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
@@ -972,6 +999,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int qib_destroy_srq(struct ib_srq *ibsrq);
+int qib_cq_init(struct qib_devdata *dd);
+
+void qib_cq_exit(struct qib_devdata *dd);
+
void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);