aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/qib
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/qib')
-rw-r--r--drivers/infiniband/hw/qib/Kconfig16
-rw-r--r--drivers/infiniband/hw/qib/Makefile1
-rw-r--r--drivers/infiniband/hw/qib/qib.h192
-rw-r--r--drivers/infiniband/hw/qib/qib_7220.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h46
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c70
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c283
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c66
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c194
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c257
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c130
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c171
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c1394
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c551
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c215
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c578
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h356
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c267
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c255
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c285
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c42
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c104
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c34
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c76
-rw-r--r--drivers/infiniband/hw/qib/qib_srq.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c274
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c26
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c59
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c104
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1026
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c163
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h265
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_x86_64.c14
42 files changed, 5641 insertions, 2124 deletions
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 7c03a70c55a..495be09781b 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -1,7 +1,15 @@
config INFINIBAND_QIB
- tristate "QLogic PCIe HCA support"
- depends on 64BIT && NET
+ tristate "Intel PCIe HCA support"
+ depends on 64BIT
---help---
- This is a low-level driver for QLogic PCIe QLE InfiniBand host
- channel adapters. This driver does not support the QLogic
+ This is a low-level driver for Intel PCIe QLE InfiniBand host
+ channel adapters. This driver does not support the Intel
HyperTransport card (model QHT7140).
+
+config INFINIBAND_QIB_DCA
+ bool "QIB DCA support"
+ depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m)
+ default y
+ ---help---
+ Setting this enables DCA support on some Intel chip sets
+ with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index f12d7bb8b39..57f8103e51f 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
+ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 64c9e7d02d4..c00ae093b6f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,8 +1,8 @@
#ifndef _QIB_KERNEL_H
#define _QIB_KERNEL_H
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -51,6 +51,7 @@
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/sched.h>
+#include <linux/kthread.h>
#include "qib_common.h"
#include "qib_verbs.h"
@@ -87,8 +88,7 @@ struct qlogic_ib_stats {
};
extern struct qlogic_ib_stats qib_stats;
-extern struct pci_error_handlers qib_pci_err_handler;
-extern struct pci_driver qib_driver;
+extern const struct pci_error_handlers qib_pci_err_handler;
#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
/*
@@ -114,6 +114,11 @@ struct qib_eep_log_mask {
/*
* Below contains all data related to a single context (formerly called port).
*/
+
+#ifdef CONFIG_DEBUG_FS
+struct qib_opcode_stats_perctx;
+#endif
+
struct qib_ctxtdata {
void **rcvegrbuf;
dma_addr_t *rcvegrbuf_phys;
@@ -154,6 +159,8 @@ struct qib_ctxtdata {
*/
/* instead of calculating it */
unsigned ctxt;
+ /* local node of context */
+ int node_id;
/* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
@@ -171,7 +178,9 @@ struct qib_ctxtdata {
/* how many alloc_pages() chunks in rcvegrbuf_pages */
u32 rcvegrbuf_chunks;
/* how many egrbufs per chunk */
- u32 rcvegrbufs_perchunk;
+ u16 rcvegrbufs_perchunk;
+ /* ilog2 of above */
+ u16 rcvegrbufs_perchunk_shift;
/* order for rcvegrbuf_pages */
size_t rcvegrbuf_size;
/* rcvhdrq size (for freeing) */
@@ -220,9 +229,15 @@ struct qib_ctxtdata {
u8 redirect_seq_cnt;
/* ctxt rcvhdrq head offset */
u32 head;
- u32 pkt_count;
+ /* lookaside fields */
+ struct qib_qp *lookaside_qp;
+ u32 lookaside_qpn;
/* QPs waiting for context processing */
struct list_head qp_wait_list;
+#ifdef CONFIG_DEBUG_FS
+ /* verbs stats per CTX */
+ struct qib_opcode_stats_perctx *opstats;
+#endif
};
struct qib_sge_state;
@@ -422,6 +437,24 @@ struct qib_verbs_txreq {
/* how often we check for packet activity for "power on hours (in seconds) */
#define ACTIVITY_TIMER 5
+#define MAX_NAME_SIZE 64
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify;
+#endif
+
+struct qib_msix_entry {
+ struct msix_entry msix;
+ void *arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int dca;
+ int rcv;
+ struct qib_irq_notify *notifier;
+#endif
+ char name[MAX_NAME_SIZE];
+ cpumask_var_t mask;
+};
+
/* Below is an opaque struct. Each chip (device) can maintain
* private data needed for its operation, but not germane to the
* rest of the driver. For convenience, we define another that
@@ -506,6 +539,7 @@ struct qib_pportdata {
struct qib_devdata *dd;
struct qib_chippport_specific *cpspec; /* chip-specific per-port */
struct kobject pport_kobj;
+ struct kobject pport_cc_kobj;
struct kobject sl2vl_kobj;
struct kobject diagc_kobj;
@@ -517,8 +551,6 @@ struct qib_pportdata {
/* qib_lflags driver is waiting for */
u32 state_wanted;
spinlock_t lflags_lock;
- /* number of (port-specific) interrupts for this port -- saturates... */
- u32 int_counter;
/* ref count for each pkey */
atomic_t pkeyrefs[4];
@@ -530,24 +562,29 @@ struct qib_pportdata {
u64 *statusp;
/* SendDMA related entries */
- spinlock_t sdma_lock;
- struct qib_sdma_state sdma_state;
- unsigned long sdma_buf_jiffies;
+
+ /* read mostly */
struct qib_sdma_desc *sdma_descq;
+ struct workqueue_struct *qib_wq;
+ struct qib_sdma_state sdma_state;
+ dma_addr_t sdma_descq_phys;
+ volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
+ dma_addr_t sdma_head_phys;
+ u16 sdma_descq_cnt;
+
+ /* read/write using lock */
+ spinlock_t sdma_lock ____cacheline_aligned_in_smp;
+ struct list_head sdma_activelist;
+ struct list_head sdma_userpending;
u64 sdma_descq_added;
u64 sdma_descq_removed;
- u16 sdma_descq_cnt;
u16 sdma_descq_tail;
u16 sdma_descq_head;
- u16 sdma_next_intr;
- u16 sdma_reset_wait;
u8 sdma_generation;
- struct tasklet_struct sdma_sw_clean_up_task;
- struct list_head sdma_activelist;
+ u8 sdma_intrequest;
- dma_addr_t sdma_descq_phys;
- volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
- dma_addr_t sdma_head_phys;
+ struct tasklet_struct sdma_sw_clean_up_task
+ ____cacheline_aligned_in_smp;
wait_queue_head_t state_wait; /* for state_wanted */
@@ -624,6 +661,39 @@ struct qib_pportdata {
struct timer_list led_override_timer;
struct xmit_wait cong_stats;
struct timer_list symerr_clear_timer;
+
+ /* Synchronize access between driver writes and sysfs reads */
+ spinlock_t cc_shadow_lock
+ ____cacheline_aligned_in_smp;
+
+ /* Shadow copy of the congestion control table */
+ struct cc_table_shadow *ccti_entries_shadow;
+
+ /* Shadow copy of the congestion control entries */
+ struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
+
+ /* List of congestion control table entries */
+ struct ib_cc_table_entry_shadow *ccti_entries;
+
+ /* 16 congestion entries with each entry corresponding to a SL */
+ struct ib_cc_congestion_entry_shadow *congestion_entries;
+
+ /* Maximum number of congestion control entries that the agent expects
+ * the manager to send.
+ */
+ u16 cc_supported_table_entries;
+
+ /* Total number of congestion control table entries */
+ u16 total_cct_entry;
+
+ /* Bit map identifying service level */
+ u16 cc_sl_control_map;
+
+ /* maximum congestion control table index */
+ u16 ccti_limit;
+
+ /* CA's max number of 64 entry units in the congestion control table */
+ u8 cc_max_table_entries;
};
/* Observers. Not to be taken lightly, possibly not to ship. */
@@ -653,7 +723,7 @@ struct diag_observer_list_elt;
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a qib_pportdata struct,
- * described above) while fields only used by a particualr chip-type are in
+ * described above) while fields only used by a particular chip-type are in
* a qib_chipdata struct, whose contents are opaque to this file.
*/
struct qib_devdata {
@@ -766,7 +836,7 @@ struct qib_devdata {
void (*f_sdma_hw_start_up)(struct qib_pportdata *);
void (*f_sdma_init_early)(struct qib_pportdata *);
void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32);
- void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32);
+ void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32);
u32 (*f_hdrqempty)(struct qib_ctxtdata *);
u64 (*f_portcntr)(struct qib_pportdata *, u32);
u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **,
@@ -780,6 +850,9 @@ struct qib_devdata {
struct qib_ctxtdata *);
void (*f_writescratch)(struct qib_devdata *, u32);
int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
+#endif
char *boardname; /* human readable board info */
@@ -795,8 +868,10 @@ struct qib_devdata {
/* last buffer for user use */
u32 lastctxt_piobuf;
- /* saturating counter of (non-port-specific) device interrupts */
- u32 int_counter;
+ /* reset value */
+ u64 z_int_counter;
+ /* percpu intcounter */
+ u64 __percpu *int_counter;
/* pio bufs allocated per ctxt */
u32 pbufsctxt;
@@ -807,6 +882,10 @@ struct qib_devdata {
* supports, less gives more pio bufs/ctxt, etc.
*/
u32 cfgctxts;
+ /*
+ * number of ctxts available for PSM open
+ */
+ u32 freectxts;
/*
* hint that we should update pioavailshadow before
@@ -856,7 +935,14 @@ struct qib_devdata {
* pio_writing.
*/
spinlock_t pioavail_lock;
-
+ /*
+ * index of last buffer to optimize search for next
+ */
+ u32 last_pio;
+ /*
+ * min kernel pio buffer to optimize search
+ */
+ u32 min_kernel_pio;
/*
* Shadow copies of registers; size indicates read access size.
* Most of them are readonly, but some are write-only register,
@@ -936,7 +1022,9 @@ struct qib_devdata {
/* chip address space used by 4k pio buffers */
u32 align4k;
/* size of each rcvegrbuffer */
- u32 rcvegrbufsize;
+ u16 rcvegrbufsize;
+ /* log2 of above */
+ u16 rcvegrbufsize_shift;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -1012,6 +1100,12 @@ struct qib_devdata {
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
+ /* high volume overflow errors defered to tasklet */
+ struct tasklet_struct error_tasklet;
+ /* per device cq worker */
+ struct kthread_worker *worker;
+
+ int assigned_node_id; /* NUMA node closest to HCA */
};
/* hol_state values */
@@ -1049,6 +1143,7 @@ extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist;
extern unsigned qib_wc_pat;
+extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int);
int init_chip_wc_pat(struct qib_devdata *dd, u32);
int qib_enable_wc(struct qib_devdata *dd);
@@ -1090,8 +1185,8 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
int qib_setup_eagerbufs(struct qib_ctxtdata *);
void qib_set_ctxtcnt(struct qib_devdata *);
int qib_create_ctxts(struct qib_devdata *dd);
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32);
-void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
@@ -1234,11 +1329,18 @@ int qib_setup_sdma(struct qib_pportdata *);
void qib_teardown_sdma(struct qib_pportdata *);
void __qib_sdma_intr(struct qib_pportdata *);
void qib_sdma_intr(struct qib_pportdata *);
+void qib_user_sdma_send_desc(struct qib_pportdata *dd,
+ struct list_head *pktlist);
int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
u32, struct qib_verbs_txreq *);
/* ppd->sdma_lock should be locked before calling this. */
int qib_sdma_make_progress(struct qib_pportdata *dd);
+static inline int qib_sdma_empty(const struct qib_pportdata *ppd)
+{
+ return ppd->sdma_descq_added == ppd->sdma_descq_removed;
+}
+
/* must be called under qib_sdma_lock */
static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd)
{
@@ -1251,7 +1353,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd)
return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
}
int qib_sdma_running(struct qib_pportdata *);
-
+void dump_sdma_state(struct qib_pportdata *ppd);
void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
@@ -1342,13 +1444,17 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
const struct pci_device_id *);
void qib_pcie_ddcleanup(struct qib_devdata *);
-int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct msix_entry *);
+int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *);
int qib_reinit_intr(struct qib_devdata *);
void qib_enable_intx(struct pci_dev *);
void qib_nomsi(struct qib_devdata *);
void qib_nomsix(struct qib_devdata *);
void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
/*
* dma_addr wrappers - all 0's invalid for hw
@@ -1376,6 +1482,7 @@ extern unsigned qib_n_krcv_queues;
extern unsigned qib_sdma_fetch_arb;
extern unsigned qib_compat_ddr_negotiate;
extern int qib_special_trigger;
+extern unsigned qib_numa_aware;
extern struct mutex qib_mutex;
@@ -1405,27 +1512,23 @@ extern struct mutex qib_mutex;
* first to avoid possible serial port delays from printk.
*/
#define qib_early_err(dev, fmt, ...) \
- do { \
- dev_err(dev, fmt, ##__VA_ARGS__); \
- } while (0)
+ dev_err(dev, fmt, ##__VA_ARGS__)
#define qib_dev_err(dd, fmt, ...) \
- do { \
- dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \
- } while (0)
+ dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
+ qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+
+#define qib_dev_warn(dd, fmt, ...) \
+ dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
+ qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
#define qib_dev_porterr(dd, port, fmt, ...) \
- do { \
- dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
- qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
- ##__VA_ARGS__); \
- } while (0)
+ dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
+ qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
+ ##__VA_ARGS__)
#define qib_devinfo(pcidev, fmt, ...) \
- do { \
- dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \
- } while (0)
+ dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
/*
* this is used for formatting hw error messages...
@@ -1433,6 +1536,7 @@ extern struct mutex qib_mutex;
struct qib_hwerror_msgs {
u64 mask;
const char *msg;
+ size_t sz;
};
#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b }
diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h
index 21f374aa063..a5356cb4252 100644
--- a/drivers/infiniband/hw/qib/qib_7220.h
+++ b/drivers/infiniband/hw/qib/qib_7220.h
@@ -97,7 +97,7 @@ struct qib_chippport_specific {
u64 iblnkerrsnap;
u64 ibcctrl; /* kr_ibcctrl shadow */
u64 ibcddrctrl; /* kr_ibcddrctrl shadow */
- u64 chase_end;
+ unsigned long chase_end;
u32 last_delay_mult;
};
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 145da404088..5670ace27c6 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,13 +279,12 @@ struct qib_base_info {
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define QIB_USER_SWMINOR 11
+#define QIB_USER_SWMINOR 13
#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
#ifndef QIB_KERN_TYPE
#define QIB_KERN_TYPE 0
-#define QIB_IDSTR "QLogic kernel.org driver"
#endif
/*
@@ -302,6 +301,19 @@ struct qib_base_info {
#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
/*
+ * Define the driver version number. This is something that refers only
+ * to the driver itself, not the software interfaces it supports.
+ */
+#define QIB_DRIVER_VERSION_BASE "1.11"
+
+/* create the final driver version string */
+#ifdef QIB_IDSTR
+#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR
+#else
+#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE
+#endif
+
+/*
* If the unit is specified via open, HCA choice is fixed. If port is
* specified, it's also fixed. Otherwise we try to spread contexts
* across ports and HCAs, using different algorithims. WITHIN is
@@ -689,7 +701,37 @@ struct qib_message_header {
__be32 bth[3];
/* fields below this point are in host byte order */
struct qib_header iph;
+ /* fields below are simplified, but should match PSM */
+ /* some are accessed by driver when packet spliting is needed */
__u8 sub_opcode;
+ __u8 flags;
+ __u16 commidx;
+ __u32 ack_seq_num;
+ __u8 flowid;
+ __u8 hdr_dlen;
+ __u16 mqhdr;
+ __u32 uwords[4];
+};
+
+/* sequence number bits for message */
+union qib_seqnum {
+ struct {
+ __u32 seq:11;
+ __u32 gen:8;
+ __u32 flow:5;
+ };
+ struct {
+ __u32 pkt:16;
+ __u32 msg:8;
+ };
+ __u32 val;
+};
+
+/* qib receiving-dma tid-session-member */
+struct qib_tid_session_member {
+ __u16 tid;
+ __u16 offset;
+ __u16 length;
};
/* IB - LRH header consts */
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
index a86cbf880f9..ab4e11cfab1 100644
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -34,8 +35,10 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/kthread.h>
#include "qib_verbs.h"
+#include "qib.h"
/**
* qib_cq_enter - add a new entry to the completion queue
@@ -100,14 +103,20 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
- (cq->notify == IB_CQ_SOLICITED && solicited)) {
- cq->notify = IB_CQ_NONE;
- cq->triggered++;
+ (cq->notify == IB_CQ_SOLICITED &&
+ (solicited || entry->status != IB_WC_SUCCESS))) {
+ struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
- queue_work(qib_cq_wq, &cq->comptask);
+ smp_rmb();
+ worker = cq->dd->worker;
+ if (likely(worker)) {
+ cq->notify = IB_CQ_NONE;
+ cq->triggered++;
+ queue_kthread_work(worker, &cq->comptask);
+ }
}
spin_unlock_irqrestore(&cq->lock, flags);
@@ -162,7 +171,7 @@ bail:
return npolled;
}
-static void send_complete(struct work_struct *work)
+static void send_complete(struct kthread_work *work)
{
struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
@@ -286,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
* The number of entries should be >= the number requested or return
* an error.
*/
+ cq->dd = dd_from_dev(dev);
cq->ibcq.cqe = entries;
cq->notify = IB_CQ_NONE;
cq->triggered = 0;
spin_lock_init(&cq->lock);
- INIT_WORK(&cq->comptask, send_complete);
+ init_kthread_work(&cq->comptask, send_complete);
wc->head = 0;
wc->tail = 0;
cq->queue = wc;
@@ -322,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq)
struct qib_ibdev *dev = to_idev(ibcq->device);
struct qib_cq *cq = to_icq(ibcq);
- flush_work(&cq->comptask);
+ flush_kthread_work(&cq->comptask);
spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
spin_unlock(&dev->n_cqs_lock);
@@ -482,3 +492,49 @@ bail_free:
bail:
return ret;
}
+
+int qib_cq_init(struct qib_devdata *dd)
+{
+ int ret = 0;
+ int cpu;
+ struct task_struct *task;
+
+ if (dd->worker)
+ return 0;
+ dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
+ if (!dd->worker)
+ return -ENOMEM;
+ init_kthread_worker(dd->worker);
+ task = kthread_create_on_node(
+ kthread_worker_fn,
+ dd->worker,
+ dd->assigned_node_id,
+ "qib_cq%d", dd->unit);
+ if (IS_ERR(task))
+ goto task_fail;
+ cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+ kthread_bind(task, cpu);
+ wake_up_process(task);
+out:
+ return ret;
+task_fail:
+ ret = PTR_ERR(task);
+ kfree(dd->worker);
+ dd->worker = NULL;
+ goto out;
+}
+
+void qib_cq_exit(struct qib_devdata *dd)
+{
+ struct kthread_worker *worker;
+
+ worker = dd->worker;
+ if (!worker)
+ return;
+ /* blocks future queuing from send_complete() */
+ dd->worker = NULL;
+ smp_wmb();
+ flush_kthread_worker(worker);
+ kthread_stop(worker->task);
+ kfree(worker);
+}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
new file mode 100644
index 00000000000..799a0c3bffc
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -0,0 +1,283 @@
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+
+#include "qib.h"
+#include "qib_verbs.h"
+#include "qib_debugfs.h"
+
+static struct dentry *qib_dbg_root;
+
+#define DEBUGFS_FILE(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+ .start = _##name##_seq_start, \
+ .next = _##name##_seq_next, \
+ .stop = _##name##_seq_stop, \
+ .show = _##name##_seq_show \
+}; \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+ struct seq_file *seq; \
+ int ret; \
+ ret = seq_open(s, &_##name##_seq_ops); \
+ if (ret) \
+ return ret; \
+ seq = s->private_data; \
+ seq->private = inode->i_private; \
+ return 0; \
+} \
+static const struct file_operations _##name##_file_ops = { \
+ .owner = THIS_MODULE, \
+ .open = _##name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = seq_release \
+};
+
+#define DEBUGFS_FILE_CREATE(name) \
+do { \
+ struct dentry *ent; \
+ ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \
+ ibd, &_##name##_file_ops); \
+ if (!ent) \
+ pr_warn("create of " #name " failed\n"); \
+} while (0)
+
+static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct qib_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+
+static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _opcode_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_user_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu\n", i,
+ (unsigned long long) n_packets,
+ (unsigned long long) n_bytes);
+
+ return 0;
+}
+
+DEBUGFS_FILE(opcode_stats)
+
+static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (!*pos)
+ return SEQ_START_TOKEN;
+ if (*pos >= dd->first_user_ctxt)
+ return NULL;
+ return pos;
+}
+
+static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (v == SEQ_START_TOKEN)
+ return pos;
+
+ ++*pos;
+ if (*pos >= dd->first_user_ctxt)
+ return NULL;
+ return pos;
+}
+
+static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _ctx_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos;
+ loff_t i, j;
+ u64 n_packets = 0;
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(s, "Ctx:npkts\n");
+ return 0;
+ }
+
+ spos = v;
+ i = *spos;
+
+ if (!dd->rcd[i])
+ return SEQ_SKIP;
+
+ for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
+ n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
+
+ if (!n_packets)
+ return SEQ_SKIP;
+
+ seq_printf(s, " %llu:%llu\n", i, n_packets);
+ return 0;
+}
+
+DEBUGFS_FILE(ctx_stats)
+
+static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_qp_iter *iter;
+ loff_t n = *pos;
+
+ iter = qib_qp_iter_init(s->private);
+ if (!iter)
+ return NULL;
+
+ while (n--) {
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ }
+
+ return iter;
+}
+
+static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ loff_t *pos)
+{
+ struct qib_qp_iter *iter = iter_ptr;
+
+ (*pos)++;
+
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+{
+ /* nothing for now */
+}
+
+static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
+{
+ struct qib_qp_iter *iter = iter_ptr;
+
+ if (!iter)
+ return 0;
+
+ qib_qp_iter_print(s, iter);
+
+ return 0;
+}
+
+DEBUGFS_FILE(qp_stats)
+
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd)
+{
+ char name[10];
+
+ snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit);
+ ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root);
+ if (!ibd->qib_ibdev_dbg) {
+ pr_warn("create of %s failed\n", name);
+ return;
+ }
+ DEBUGFS_FILE_CREATE(opcode_stats);
+ DEBUGFS_FILE_CREATE(ctx_stats);
+ DEBUGFS_FILE_CREATE(qp_stats);
+ return;
+}
+
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd)
+{
+ if (!qib_dbg_root)
+ goto out;
+ debugfs_remove_recursive(ibd->qib_ibdev_dbg);
+out:
+ ibd->qib_ibdev_dbg = NULL;
+}
+
+void qib_dbg_init(void)
+{
+ qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL);
+ if (!qib_dbg_root)
+ pr_warn("init of debugfs failed\n");
+}
+
+void qib_dbg_exit(void)
+{
+ debugfs_remove_recursive(qib_dbg_root);
+ qib_dbg_root = NULL;
+}
+
+#endif
+
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h
new file mode 100644
index 00000000000..7ae983a91b8
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.h
@@ -0,0 +1,45 @@
+#ifndef _QIB_DEBUGFS_H
+#define _QIB_DEBUGFS_H
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+struct qib_ibdev;
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
+void qib_dbg_init(void);
+void qib_dbg_exit(void);
+
+#endif
+
+#endif /* _QIB_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 204c4dd9dce..5dfda4c5cc9 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2010 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -46,12 +46,16 @@
#include <linux/pci.h>
#include <linux/poll.h>
#include <linux/vmalloc.h>
+#include <linux/export.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include "qib.h"
#include "qib_common.h"
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
/*
* Each client that opens the diag device must read then write
* offset 0, to prevent lossage from random cat or od. diag_state
@@ -542,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct qib_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct qib_devdata *dd;
@@ -586,19 +590,24 @@ static ssize_t qib_diagpkt_write(struct file *fp,
}
ppd = &dd->pport[dp.port - 1];
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
- clen = dp.len >> 2;
-
- if ((plen + 4) > ppd->ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
- qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, "
- "failing\n");
+ qib_devinfo(dd->pcidev,
+ "Unable to allocate tmp buffer, failing\n");
ret = -ENOMEM;
goto bail;
}
@@ -634,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp,
*/
if (dd->flags & QIB_PIO_FLUSH_WC) {
qib_flush_wc();
- qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
qib_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- qib_pio_copy(piobuf + 2, tmpbuf, clen);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen);
if (dd->flags & QIB_USE_SPCL_TRIG) {
u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
@@ -685,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd,
const struct diag_observer *op)
{
struct diag_observer_list_elt *olp;
- int ret = -EINVAL;
+ unsigned long flags;
if (!dd || !op)
- goto bail;
- ret = -ENOMEM;
+ return -EINVAL;
olp = vmalloc(sizeof *olp);
if (!olp) {
- printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n");
- goto bail;
+ pr_err("vmalloc for observer failed\n");
+ return -ENOMEM;
}
- if (olp) {
- unsigned long flags;
- spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
- olp->op = op;
- olp->next = dd->diag_observer_list;
- dd->diag_observer_list = olp;
- spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
- ret = 0;
- }
-bail:
- return ret;
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp->op = op;
+ olp->next = dd->diag_observer_list;
+ dd->diag_observer_list = olp;
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+ return 0;
}
/* Remove all registered observers when device is closed */
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
index 2920bb39a65..59fe092b4b0 100644
--- a/drivers/infiniband/hw/qib/qib_dma.c
+++ b/drivers/infiniband/hw/qib/qib_dma.c
@@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int qib_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size, enum dma_data_direction dir)
{
@@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = {
.unmap_page = qib_dma_unmap_page,
.map_sg = qib_map_sg,
.unmap_sg = qib_unmap_sg,
- .dma_address = qib_sg_dma_address,
- .dma_len = qib_sg_dma_len,
.sync_single_for_cpu = qib_sync_single_for_cpu,
.sync_single_for_device = qib_sync_single_for_device,
.alloc_coherent = qib_dma_alloc_coherent,
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 9cd193603fb..5bee08f16d7 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -37,6 +38,8 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/prefetch.h>
#include "qib.h"
@@ -44,7 +47,7 @@
* The size has to be longer than this string, so we can append
* board/chip information to it in the init code.
*/
-const char ib_qib_version[] = QIB_IDSTR "\n";
+const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
DEFINE_SPINLOCK(qib_devs_lock);
LIST_HEAD(qib_dev_list);
@@ -61,8 +64,9 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
"Attempt pre-IBTA 1.2 DDR speed negotiation");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic IB driver");
+MODULE_AUTHOR("Intel <ibsupport@intel.com>");
+MODULE_DESCRIPTION("Intel IB driver");
+MODULE_VERSION(QIB_DRIVER_VERSION);
/*
* QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
@@ -71,6 +75,11 @@ MODULE_DESCRIPTION("QLogic IB driver");
*/
#define QIB_PIO_MAXIBHDR 128
+/*
+ * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
+ */
+#define QIB_MAX_PKT_RECV 64
+
struct qlogic_ib_stats qib_stats;
const char *qib_get_unit_name(int unit)
@@ -274,24 +283,151 @@ bail:
*/
static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
{
- const u32 chunk = etail / rcd->rcvegrbufs_perchunk;
- const u32 idx = etail % rcd->rcvegrbufs_perchunk;
+ const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
+ const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
- return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize;
+ return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
}
/*
* Returns 1 if error was a CRC, else 0.
* Needed for some chip's synthesized error counters.
*/
-static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt,
- u32 eflags, u32 l, u32 etail, __le32 *rhf_addr,
- struct qib_message_header *hdr)
+static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
+ u32 ctxt, u32 eflags, u32 l, u32 etail,
+ __le32 *rhf_addr, struct qib_message_header *rhdr)
{
u32 ret = 0;
if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
ret = 1;
+ else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
+ /* For TIDERR and RC QPs premptively schedule a NAK */
+ struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
+ struct qib_other_headers *ohdr = NULL;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ struct qib_qp *qp = NULL;
+ u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
+ u16 lid = be16_to_cpu(hdr->lrh[1]);
+ int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ u32 qp_num;
+ u32 opcode;
+ u32 psn;
+ int diff;
+
+ /* Sanity check packet */
+ if (tlen < 24)
+ goto drop;
+
+ if (lid < QIB_MULTICAST_LID_BASE) {
+ lid &= ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid != ppd->lid))
+ goto drop;
+ }
+
+ /* Check for GRH */
+ if (lnh == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else if (lnh == QIB_LRH_GRH) {
+ u32 vtf;
+
+ ohdr = &hdr->u.l.oth;
+ if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else
+ goto drop;
+
+ /* Get opcode and PSN from packet */
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ opcode >>= 24;
+ psn = be32_to_cpu(ohdr->bth[2]);
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ if (qp_num != QIB_MULTICAST_QPN) {
+ int ruc_res;
+ qp = qib_lookup_qpn(ibp, qp_num);
+ if (!qp)
+ goto drop;
+
+ /*
+ * Handle only RC QPs - for other QP types drop error
+ * packet.
+ */
+ spin_lock(&qp->r_lock);
+
+ /* Check for valid receive state. */
+ if (!(ib_qib_state_ops[qp->state] &
+ QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ goto unlock;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ ruc_res =
+ qib_ruc_check_hdr(
+ ibp, hdr,
+ lnh == QIB_LRH_GRH,
+ qp,
+ be32_to_cpu(ohdr->bth[0]));
+ if (ruc_res)
+ goto unlock;
+
+ /* Only deal with RDMA Writes for now */
+ if (opcode <
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
+ diff = qib_cmp24(psn, qp->r_psn);
+ if (!qp->r_nak_state && diff >= 0) {
+ ibp->n_rc_seqnak++;
+ qp->r_nak_state =
+ IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
+ /*
+ * Wait to send the sequence
+ * NAK until all packets
+ * in the receive queue have
+ * been processed.
+ * Otherwise, we end up
+ * propagating congestion.
+ */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |=
+ QIB_R_RSP_NAK;
+ atomic_inc(
+ &qp->refcount);
+ list_add_tail(
+ &qp->rspwait,
+ &rcd->qp_wait_list);
+ }
+ } /* Out of sequence NAK */
+ } /* QP Request NAKs */
+ break;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ case IB_QPT_UC:
+ default:
+ /* For now don't handle any other QP types */
+ break;
+ }
+
+unlock:
+ spin_unlock(&qp->r_lock);
+ /*
+ * Notify qib_destroy_qp() if it is waiting
+ * for us to finish.
+ */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ } /* Unicast QP */
+ } /* Valid packet with TIDErr */
+
+drop:
return ret;
}
@@ -335,7 +471,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
}
- for (last = 0, i = 1; !last && i <= 64; i += !last) {
+ for (last = 0, i = 1; !last; i += !last) {
hdr = dd->f_get_msgheader(dd, rhf_addr);
eflags = qib_hdrget_err_flags(rhf_addr);
etype = qib_hdrget_rcv_type(rhf_addr);
@@ -348,8 +484,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
etail = qib_hdrget_index(rhf_addr);
updegr = 1;
if (tlen > sizeof(*hdr) ||
- etype >= RCVHQ_RCV_TYPE_NON_KD)
+ etype >= RCVHQ_RCV_TYPE_NON_KD) {
ebuf = qib_get_egrbuf(rcd, etail);
+ prefetch_range(ebuf, tlen - sizeof(*hdr));
+ }
}
if (!eflags) {
u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
@@ -371,7 +509,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
* packets; only qibhdrerr should be set.
*/
if (unlikely(eflags))
- crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l,
+ crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
etail, rhf_addr, hdr);
else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
qib_ib_rcv(rcd, hdr, ebuf, tlen);
@@ -384,6 +522,9 @@ move_along:
l += rsize;
if (l >= maxcnt)
l = 0;
+ if (i == QIB_MAX_PKT_RECV)
+ last = 1;
+
rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
if (dd->flags & QIB_NODMA_RTAIL) {
u32 seq = qib_hdrget_seq(rhf_addr);
@@ -402,13 +543,21 @@ move_along:
*/
lval = l;
if (!last && !(i & 0xf)) {
- dd->f_update_usrhead(rcd, lval, updegr, etail);
+ dd->f_update_usrhead(rcd, lval, updegr, etail, i);
updegr = 0;
}
}
+ /*
+ * Notify qib_destroy_qp() if it is waiting
+ * for lookaside_qp to finish.
+ */
+ if (rcd->lookaside_qp) {
+ if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
+ wake_up(&rcd->lookaside_qp->wait);
+ rcd->lookaside_qp = NULL;
+ }
rcd->head = l;
- rcd->pkt_count += i;
/*
* Iterate over all QPs waiting to respond.
@@ -444,7 +593,7 @@ bail:
* if no packets were processed.
*/
lval = (u64)rcd->head | dd->rhdrhead_intr_off;
- dd->f_update_usrhead(rcd, lval, updegr, etail);
+ dd->f_update_usrhead(rcd, lval, updegr, etail, i);
return crcs;
}
@@ -616,8 +765,9 @@ int qib_reset_device(int unit)
qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
- qib_devinfo(dd->pcidev, "Invalid unit number %u or "
- "not initialized or not present\n", unit);
+ qib_devinfo(dd->pcidev,
+ "Invalid unit number %u or not initialized or not present\n",
+ unit);
ret = -ENXIO;
goto bail;
}
@@ -654,11 +804,13 @@ int qib_reset_device(int unit)
else
ret = -EAGAIN;
if (ret)
- qib_dev_err(dd, "Reinitialize unit %u after "
- "reset failed with %d\n", unit, ret);
+ qib_dev_err(dd,
+ "Reinitialize unit %u after reset failed with %d\n",
+ unit, ret);
else
- qib_devinfo(dd->pcidev, "Reinitialized unit %u after "
- "resetting\n", unit);
+ qib_devinfo(dd->pcidev,
+ "Reinitialized unit %u after resetting\n",
+ unit);
bail:
return ret;
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 92d9cfe98a6..4d5d71aaa2b 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -160,10 +161,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
if (oguid > bguid[7]) {
if (bguid[6] == 0xff) {
if (bguid[5] == 0xff) {
- qib_dev_err(dd, "Can't set %s GUID"
- " from base, wraps to"
- " OUI!\n",
- qib_get_unit_name(t));
+ qib_dev_err(dd,
+ "Can't set %s GUID from base, wraps to OUI!\n",
+ qib_get_unit_name(t));
dd->base_guid = 0;
goto bail;
}
@@ -182,8 +182,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
len = sizeof(struct qib_flash);
buf = vmalloc(len);
if (!buf) {
- qib_dev_err(dd, "Couldn't allocate memory to read %u "
- "bytes from eeprom for GUID\n", len);
+ qib_dev_err(dd,
+ "Couldn't allocate memory to read %u bytes from eeprom for GUID\n",
+ len);
goto bail;
}
@@ -201,23 +202,25 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
csum = flash_csum(ifp, 0);
if (csum != ifp->if_csum) {
- qib_devinfo(dd->pcidev, "Bad I2C flash checksum: "
- "0x%x, not 0x%x\n", csum, ifp->if_csum);
+ qib_devinfo(dd->pcidev,
+ "Bad I2C flash checksum: 0x%x, not 0x%x\n",
+ csum, ifp->if_csum);
goto done;
}
if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
*(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
- qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n",
- *(unsigned long long *) ifp->if_guid);
+ qib_dev_err(dd,
+ "Invalid GUID %llx from flash; ignoring\n",
+ *(unsigned long long *) ifp->if_guid);
/* don't allow GUID if all 0 or all 1's */
goto done;
}
/* complain, but allow it */
if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
- qib_devinfo(dd->pcidev, "Warning, GUID %llx is "
- "default, probably not correct!\n",
- *(unsigned long long *) ifp->if_guid);
+ qib_devinfo(dd->pcidev,
+ "Warning, GUID %llx is default, probably not correct!\n",
+ *(unsigned long long *) ifp->if_guid);
bguid = ifp->if_guid;
if (!bguid[0] && !bguid[1] && !bguid[2]) {
@@ -260,8 +263,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
memcpy(dd->serial, ifp->if_serial,
sizeof ifp->if_serial);
if (!strstr(ifp->if_comment, "Tested successfully"))
- qib_dev_err(dd, "Board SN %s did not pass functional "
- "test: %s\n", dd->serial, ifp->if_comment);
+ qib_dev_err(dd,
+ "Board SN %s did not pass functional test: %s\n",
+ dd->serial, ifp->if_comment);
memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT);
/*
@@ -323,8 +327,9 @@ int qib_update_eeprom_log(struct qib_devdata *dd)
buf = vmalloc(len);
ret = 1;
if (!buf) {
- qib_dev_err(dd, "Couldn't allocate memory to read %u "
- "bytes from eeprom for logging\n", len);
+ qib_dev_err(dd,
+ "Couldn't allocate memory to read %u bytes from eeprom for logging\n",
+ len);
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 79d9971aff1..b15e34eeef6 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -39,15 +39,19 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/uio.h>
+#include <linux/aio.h>
#include <linux/jiffies.h>
#include <asm/pgtable.h>
#include <linux/delay.h>
+#include <linux/export.h>
#include "qib.h"
#include "qib_common.h"
#include "qib_user_sdma.h"
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
static int qib_open(struct inode *, struct file *);
static int qib_close(struct inode *, struct file *);
static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *);
@@ -314,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
}
if (cnt > tidcnt) {
/* make sure it all fits in tid_pg_list */
- qib_devinfo(dd->pcidev, "Process tried to allocate %u "
- "TIDs, only trying max (%u)\n", cnt, tidcnt);
+ qib_devinfo(dd->pcidev,
+ "Process tried to allocate %u TIDs, only trying max (%u)\n",
+ cnt, tidcnt);
cnt = tidcnt;
}
pagep = (struct page **) rcd->tid_pg_list;
@@ -749,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd,
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
- qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x "
- "bytes failed: %d\n", what, rcd->ctxt,
- pfn, len, ret);
+ qib_devinfo(dd->pcidev,
+ "%s ctxt%u mmap of %lx, %x bytes failed: %d\n",
+ what, rcd->ctxt, pfn, len, ret);
bail:
return ret;
}
@@ -770,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd,
*/
sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE;
if ((vma->vm_end - vma->vm_start) > sz) {
- qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen "
- "%lx > PAGE\n", vma->vm_end - vma->vm_start);
+ qib_devinfo(dd->pcidev,
+ "FAIL mmap userreg: reqlen %lx > PAGE\n",
+ vma->vm_end - vma->vm_start);
ret = -EFAULT;
} else {
phys = dd->physaddr + ureg;
@@ -801,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma,
* for it.
*/
if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) {
- qib_devinfo(dd->pcidev, "FAIL mmap piobufs: "
- "reqlen %lx > PAGE\n",
+ qib_devinfo(dd->pcidev,
+ "FAIL mmap piobufs: reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
ret = -EINVAL;
goto bail;
@@ -846,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
size = rcd->rcvegrbuf_size;
total_size = rcd->rcvegrbuf_chunks * size;
if ((vma->vm_end - vma->vm_start) > total_size) {
- qib_devinfo(dd->pcidev, "FAIL on egr bufs: "
- "reqlen %lx > actual %lx\n",
+ qib_devinfo(dd->pcidev,
+ "FAIL on egr bufs: reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
ret = -EINVAL;
@@ -855,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
}
if (vma->vm_flags & VM_WRITE) {
- qib_devinfo(dd->pcidev, "Can't map eager buffers as "
- "writable (flags=%lx)\n", vma->vm_flags);
+ qib_devinfo(dd->pcidev,
+ "Can't map eager buffers as writable (flags=%lx)\n",
+ vma->vm_flags);
ret = -EPERM;
goto bail;
}
@@ -964,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
vma->vm_ops = &qib_file_vm_ops;
- vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
ret = 1;
bail:
@@ -1148,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
return pollflag;
}
+static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
+{
+ struct qib_filedata *fd = fp->private_data;
+ const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+ const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
+ int local_cpu;
+
+ /*
+ * If process has NOT already set it's affinity, select and
+ * reserve a processor for it on the local NUMA node.
+ */
+ if ((weight >= qib_cpulist_count) &&
+ (cpumask_weight(local_mask) <= qib_cpulist_count)) {
+ for_each_cpu(local_cpu, local_mask)
+ if (!test_and_set_bit(local_cpu, qib_cpulist)) {
+ fd->rec_cpu_num = local_cpu;
+ return;
+ }
+ }
+
+ /*
+ * If process has NOT already set it's affinity, select and
+ * reserve a processor for it, as a rendevous for all
+ * users of the driver. If they don't actually later
+ * set affinity to this cpu, or set it to some other cpu,
+ * it just means that sooner or later we don't recommend
+ * a cpu, and let the scheduler do it's best.
+ */
+ if (weight >= qib_cpulist_count) {
+ int cpu;
+ cpu = find_first_zero_bit(qib_cpulist,
+ qib_cpulist_count);
+ if (cpu == qib_cpulist_count)
+ qib_dev_err(dd,
+ "no cpus avail for affinity PID %u\n",
+ current->pid);
+ else {
+ __set_bit(cpu, qib_cpulist);
+ fd->rec_cpu_num = cpu;
+ }
+ }
+}
+
/*
* Check that userland and driver are compatible for subcontexts.
*/
@@ -1170,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor)
return user_swminor == 3;
default:
/* >= 4 are compatible (or are expected to be) */
- return user_swminor >= 4;
+ return user_swminor <= QIB_USER_SWMINOR;
}
}
/* make no promises yet for future major versions */
@@ -1252,12 +1302,20 @@ bail:
static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
struct file *fp, const struct qib_user_info *uinfo)
{
+ struct qib_filedata *fd = fp->private_data;
struct qib_devdata *dd = ppd->dd;
struct qib_ctxtdata *rcd;
void *ptmp = NULL;
int ret;
+ int numa_id;
+
+ assign_ctxt_affinity(fp, dd);
- rcd = qib_create_ctxtdata(ppd, ctxt);
+ numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
+ cpu_to_node(fd->rec_cpu_num) :
+ numa_node_id()) : dd->assigned_node_id;
+
+ rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
/*
* Allocate memory for use in qib_tid_update() at open to
@@ -1269,8 +1327,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
GFP_KERNEL);
if (!rcd || !ptmp) {
- qib_dev_err(dd, "Unable to allocate ctxtdata "
- "memory, failing open\n");
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata memory, failing open\n");
ret = -ENOMEM;
goto bailerr;
}
@@ -1284,10 +1342,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
ctxt_fp(fp) = rcd;
qib_stats.sps_ctxts++;
+ dd->freectxts--;
ret = 0;
goto bail;
bailerr:
+ if (fd->rec_cpu_num != -1)
+ __clear_bit(fd->rec_cpu_num, qib_cpulist);
+
dd->rcd[ctxt] = NULL;
kfree(rcd);
kfree(ptmp);
@@ -1379,17 +1441,17 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
/* find device (with ACTIVE ports) with fewest ctxts in use */
for (ndev = 0; ndev < devmax; ndev++) {
struct qib_devdata *dd = qib_lookup(ndev);
- unsigned cused = 0, cfree = 0;
+ unsigned cused = 0, cfree = 0, pusable = 0;
if (!dd)
continue;
if (port && port <= dd->num_pports &&
usable(dd->pport + port - 1))
- dusable = 1;
+ pusable = 1;
else
for (i = 0; i < dd->num_pports; i++)
if (usable(dd->pport + i))
- dusable++;
- if (!dusable)
+ pusable++;
+ if (!pusable)
continue;
for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts;
ctxt++)
@@ -1477,6 +1539,58 @@ static int qib_open(struct inode *in, struct file *fp)
return fp->private_data ? 0 : -ENOMEM;
}
+static int find_hca(unsigned int cpu, int *unit)
+{
+ int ret = 0, devmax, npresent, nup, ndev;
+
+ *unit = -1;
+
+ devmax = qib_count_units(&npresent, &nup);
+ if (!npresent) {
+ ret = -ENXIO;
+ goto done;
+ }
+ if (!nup) {
+ ret = -ENETDOWN;
+ goto done;
+ }
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct qib_devdata *dd = qib_lookup(ndev);
+ if (dd) {
+ if (pcibus_to_node(dd->pcidev->bus) < 0) {
+ ret = -EINVAL;
+ goto done;
+ }
+ if (cpu_to_node(cpu) ==
+ pcibus_to_node(dd->pcidev->bus)) {
+ *unit = ndev;
+ goto done;
+ }
+ }
+ }
+done:
+ return ret;
+}
+
+static int do_qib_user_sdma_queue_create(struct file *fp)
+{
+ struct qib_filedata *fd = fp->private_data;
+ struct qib_ctxtdata *rcd = fd->rcd;
+ struct qib_devdata *dd = rcd->dd;
+
+ if (dd->flags & QIB_HAS_SEND_DMA) {
+
+ fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
+ dd->unit,
+ rcd->ctxt,
+ fd->subctxt);
+ if (!fd->pq)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
/*
* Get ctxt early, so can set affinity prior to memory allocation.
*/
@@ -1509,60 +1623,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
if (qib_compatible_subctxts(swmajor, swminor) &&
uinfo->spu_subctxt_cnt) {
ret = find_shared_ctxt(fp, uinfo);
- if (ret) {
- if (ret > 0)
- ret = 0;
- goto done_chk_sdma;
+ if (ret > 0) {
+ ret = do_qib_user_sdma_queue_create(fp);
+ if (!ret)
+ assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+ goto done_ok;
}
}
- i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE;
+ i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
if (i_minor)
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
- else
+ else {
+ int unit;
+ const unsigned int cpu = cpumask_first(&current->cpus_allowed);
+ const unsigned int weight =
+ cpumask_weight(&current->cpus_allowed);
+
+ if (weight == 1 && !test_bit(cpu, qib_cpulist))
+ if (!find_hca(cpu, &unit) && unit >= 0)
+ if (!find_free_ctxt(unit, fp, uinfo)) {
+ ret = 0;
+ goto done_chk_sdma;
+ }
ret = get_a_ctxt(fp, uinfo, alg);
-
-done_chk_sdma:
- if (!ret) {
- struct qib_filedata *fd = fp->private_data;
- const struct qib_ctxtdata *rcd = fd->rcd;
- const struct qib_devdata *dd = rcd->dd;
-
- if (dd->flags & QIB_HAS_SEND_DMA) {
- fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
- dd->unit,
- rcd->ctxt,
- fd->subctxt);
- if (!fd->pq)
- ret = -ENOMEM;
- }
-
- /*
- * If process has NOT already set it's affinity, select and
- * reserve a processor for it, as a rendevous for all
- * users of the driver. If they don't actually later
- * set affinity to this cpu, or set it to some other cpu,
- * it just means that sooner or later we don't recommend
- * a cpu, and let the scheduler do it's best.
- */
- if (!ret && cpus_weight(current->cpus_allowed) >=
- qib_cpulist_count) {
- int cpu;
- cpu = find_first_zero_bit(qib_cpulist,
- qib_cpulist_count);
- if (cpu != qib_cpulist_count) {
- __set_bit(cpu, qib_cpulist);
- fd->rec_cpu_num = cpu;
- }
- } else if (cpus_weight(current->cpus_allowed) == 1 &&
- test_bit(first_cpu(current->cpus_allowed),
- qib_cpulist))
- qib_devinfo(dd->pcidev, "%s PID %u affinity "
- "set to cpu %d; already allocated\n",
- current->comm, current->pid,
- first_cpu(current->cpus_allowed));
}
+done_chk_sdma:
+ if (!ret)
+ ret = do_qib_user_sdma_queue_create(fp);
+done_ok:
mutex_unlock(&qib_mutex);
done:
@@ -1657,7 +1747,7 @@ static int qib_do_user_init(struct file *fp,
* 0 to 1. So for those chips, we turn it off and then back on.
* This will (very briefly) affect any other open ctxts, but the
* duration is very short, and therefore isn't an issue. We
- * explictly set the in-memory tail copy to 0 beforehand, so we
+ * explicitly set the in-memory tail copy to 0 beforehand, so we
* don't have to wait to be sure the DMA update has happened
* (chip resets head/tail to 0 on transition to enable).
*/
@@ -1791,6 +1881,7 @@ static int qib_close(struct inode *in, struct file *fp)
if (dd->pageshadow)
unlock_expected_tids(rcd);
qib_stats.sps_ctxts--;
+ dd->freectxts++;
}
mutex_unlock(&qib_mutex);
@@ -1904,8 +1995,9 @@ int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit)
struct qib_ctxtdata *rcd;
unsigned ctxt;
int ret = 0;
+ unsigned long flags;
- spin_lock(&ppd->dd->uctxt_lock);
+ spin_lock_irqsave(&ppd->dd->uctxt_lock, flags);
for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts;
ctxt++) {
rcd = ppd->dd->rcd[ctxt];
@@ -1924,7 +2016,7 @@ int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit)
ret = 1;
break;
}
- spin_unlock(&ppd->dd->uctxt_lock);
+ spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags);
return ret;
}
@@ -2180,8 +2272,7 @@ int qib_cdev_init(int minor, const char *name,
cdev = cdev_alloc();
if (!cdev) {
- printk(KERN_ERR QIB_DRV_NAME
- ": Could not allocate cdev for minor %d, %s\n",
+ pr_err("Could not allocate cdev for minor %d, %s\n",
minor, name);
ret = -ENOMEM;
goto done;
@@ -2193,19 +2284,17 @@ int qib_cdev_init(int minor, const char *name,
ret = cdev_add(cdev, dev, 1);
if (ret < 0) {
- printk(KERN_ERR QIB_DRV_NAME
- ": Could not add cdev for minor %d, %s (err %d)\n",
+ pr_err("Could not add cdev for minor %d, %s (err %d)\n",
minor, name, -ret);
goto err_cdev;
}
- device = device_create(qib_class, NULL, dev, NULL, name);
+ device = device_create(qib_class, NULL, dev, NULL, "%s", name);
if (!IS_ERR(device))
goto done;
ret = PTR_ERR(device);
device = NULL;
- printk(KERN_ERR QIB_DRV_NAME ": Could not create "
- "device for minor %d, %s (err %d)\n",
+ pr_err("Could not create device for minor %d, %s (err %d)\n",
minor, name, -ret);
err_cdev:
cdev_del(cdev);
@@ -2240,16 +2329,14 @@ int __init qib_dev_init(void)
ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME);
if (ret < 0) {
- printk(KERN_ERR QIB_DRV_NAME ": Could not allocate "
- "chrdev region (err %d)\n", -ret);
+ pr_err("Could not allocate chrdev region (err %d)\n", -ret);
goto done;
}
qib_class = class_create(THIS_MODULE, "ipath");
if (IS_ERR(qib_class)) {
ret = PTR_ERR(qib_class);
- printk(KERN_ERR QIB_DRV_NAME ": Could not create "
- "device class (err %d)\n", -ret);
+ pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(qib_dev, QIB_NMINORS);
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f99bddc0171..cab610ccd50 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -44,10 +45,10 @@
static struct super_block *qib_super;
-#define private2dd(file) ((file)->f_dentry->d_inode->i_private)
+#define private2dd(file) (file_inode(file)->i_private)
static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
- int mode, const struct file_operations *fops,
+ umode_t mode, const struct file_operations *fops,
void *data)
{
int error;
@@ -60,14 +61,14 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_uid = 0;
- inode->i_gid = 0;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
inode->i_blocks = 0;
inode->i_atime = CURRENT_TIME;
inode->i_mtime = inode->i_atime;
inode->i_ctime = inode->i_atime;
inode->i_private = data;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
inc_nlink(dir);
@@ -82,7 +83,7 @@ bail:
return error;
}
-static int create_file(const char *name, mode_t mode,
+static int create_file(const char *name, umode_t mode,
struct dentry *parent, struct dentry **dentry,
const struct file_operations *fops, void *data)
{
@@ -104,6 +105,7 @@ static int create_file(const char *name, mode_t mode,
static ssize_t driver_stats_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ qib_stats.sps_ints = qib_sps_ints();
return simple_read_from_buffer(buf, count, ppos, &qib_stats,
sizeof qib_stats);
}
@@ -170,7 +172,7 @@ static const struct file_operations cntr_ops[] = {
};
/*
- * Could use file->f_dentry->d_inode->i_ino to figure out which file,
+ * Could use file_inode(file)->i_ino to figure out which file,
* instead of separate routine for each, but for now, this works...
*/
@@ -382,7 +384,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
&simple_dir_operations, dd);
if (ret) {
- printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
+ pr_err("create_file(%s) failed: %d\n", unit, ret);
goto bail;
}
@@ -390,21 +392,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp,
&cntr_ops[0], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/counters) failed: %d\n",
+ pr_err("create_file(%s/counters) failed: %d\n",
unit, ret);
goto bail;
}
ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp,
&cntr_ops[1], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n",
+ pr_err("create_file(%s/counter_names) failed: %d\n",
unit, ret);
goto bail;
}
ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp,
&portcntr_ops[0], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
+ pr_err("create_file(%s/%s) failed: %d\n",
unit, "portcounter_names", ret);
goto bail;
}
@@ -416,7 +418,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
&portcntr_ops[i], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
+ pr_err("create_file(%s/%s) failed: %d\n",
unit, fname, ret);
goto bail;
}
@@ -426,7 +428,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
&qsfp_ops[i - 1], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
+ pr_err("create_file(%s/%s) failed: %d\n",
unit, fname, ret);
goto bail;
}
@@ -435,7 +437,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
&flash_ops, dd);
if (ret)
- printk(KERN_ERR "create_file(%s/flash) failed: %d\n",
+ pr_err("create_file(%s/flash) failed: %d\n",
unit, ret);
bail:
return ret;
@@ -453,18 +455,15 @@ static int remove_file(struct dentry *parent, char *name)
goto bail;
}
- spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_locked(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, tmp);
} else {
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_lock);
}
+ dput(tmp);
ret = 0;
bail:
@@ -489,10 +488,11 @@ static int remove_device_files(struct super_block *sb,
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
- printk(KERN_ERR "Lookup of %s failed\n", unit);
+ pr_err("Lookup of %s failed\n", unit);
goto bail;
}
+ mutex_lock(&dir->d_inode->i_mutex);
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -507,8 +507,10 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- d_delete(dir);
+ mutex_unlock(&dir->d_inode->i_mutex);
ret = simple_rmdir(root->d_inode, dir);
+ d_delete(dir);
+ dput(dir);
bail:
mutex_unlock(&root->d_inode->i_mutex);
@@ -535,7 +537,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent)
ret = simple_fill_super(sb, QIBFS_MAGIC, files);
if (ret) {
- printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
+ pr_err("simple_fill_super failed: %d\n", ret);
goto bail;
}
@@ -606,6 +608,7 @@ static struct file_system_type qibfs_fs_type = {
.mount = qibfs_mount,
.kill_sb = qibfs_kill_super,
};
+MODULE_ALIAS_FS("ipathfs");
int __init qib_init_qibfs(void)
{
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a5e29dbb953..d68266ac761 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
* All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64);
/*
* This file contains all the chip-specific register information and
- * access functions for the QLogic QLogic_IB PCI-Express chip.
+ * access functions for the Intel Intel_IB PCI-Express chip.
*
*/
@@ -753,8 +754,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
if (!hwerrs)
return;
if (hwerrs == ~0ULL) {
- qib_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
return;
}
qib_stats.sps_hwerrs++;
@@ -779,13 +780,14 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
* or it's occurred within the last 5 seconds.
*/
if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID))
- qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
if (hwerrs & ~IB_HWE_BITSEXTANT)
- qib_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~IB_HWE_BITSEXTANT));
+ qib_dev_err(dd,
+ "hwerror interrupt with unknown errors %llx set\n",
+ (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT));
ctrl = qib_read_kreg32(dd, kr_control);
if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) {
@@ -815,8 +817,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcat(msg, "[Memory BIST test failed, InfiniPath hardware"
- " unusable]", msgl);
+ strlcat(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
/* ignore from now on, so disable until driver reloaded */
dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -868,8 +871,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
*msg = 0; /* recovered from all of them */
if (isfatal && !dd->diag_client) {
- qib_dev_err(dd, "Fatal Hardware Error, no longer"
- " usable, SN %.16s\n", dd->serial);
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
/*
* for /sys status file and user programs to print; if no
* trailing brace is copied, we'll know it was truncated.
@@ -1017,9 +1021,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
qib_inc_eeprom_err(dd, log_idx, 1);
if (errs & ~IB_E_BITSEXTANT)
- qib_dev_err(dd, "error interrupt with unknown errors "
- "%llx set\n",
- (unsigned long long) (errs & ~IB_E_BITSEXTANT));
+ qib_dev_err(dd,
+ "error interrupt with unknown errors %llx set\n",
+ (unsigned long long) (errs & ~IB_E_BITSEXTANT));
if (errs & E_SUM_ERRS) {
qib_disarm_6120_senderrbufs(ppd);
@@ -1089,8 +1093,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
}
if (errs & ERR_MASK(ResetNegated)) {
- qib_dev_err(dd, "Got reset, requires re-init "
- "(unload and reload driver)\n");
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
dd->flags &= ~QIB_INITTED; /* needs re-init */
/* mark as having had error */
*dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1541,8 +1545,9 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat)
qib_stats.sps_errints++;
estat = qib_read_kreg64(dd, kr_errstatus);
if (!estat)
- qib_devinfo(dd->pcidev, "error interrupt (%Lx), "
- "but no error bits set!\n", istat);
+ qib_devinfo(dd->pcidev,
+ "error interrupt (%Lx), but no error bits set!\n",
+ istat);
handle_6120_errors(dd, estat);
}
@@ -1629,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
@@ -1715,16 +1718,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd)
}
if (!dd->cspec->irq)
- qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
- "work\n");
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
else {
int ret;
ret = request_irq(dd->cspec->irq, qib_6120intr, 0,
QIB_DRV_NAME, dd);
if (ret)
- qib_dev_err(dd, "Couldn't setup interrupt "
- "(irq=%d): %d\n", dd->cspec->irq,
- ret);
+ qib_dev_err(dd,
+ "Couldn't setup interrupt (irq=%d): %d\n",
+ dd->cspec->irq, ret);
}
}
@@ -1759,8 +1762,9 @@ static void pe_boardname(struct qib_devdata *dd)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
- qib_dev_err(dd, "Unsupported InfiniPath hardware revision "
- "%u.%u!\n", dd->majrev, dd->minrev);
+ qib_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->majrev, dd->minrev);
snprintf(dd->boardversion, sizeof(dd->boardversion),
"ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
@@ -1799,10 +1803,11 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
/*
* Keep chip from being accessed until we are ready. Use
* writeq() directly, to allow the write even though QIB_PRESENT
- * isnt' set.
+ * isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler re-ordering around actual reset */
@@ -1833,8 +1838,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
bail:
if (ret) {
if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
- qib_dev_err(dd, "Reset failed to setup PCIe or "
- "interrupts; continuing anyway\n");
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
/* clear the reset error, init error/hwerror mask */
qib_6120_init_hwerrors(dd);
/* for Rev2 error interrupts; nop for rev 1 */
@@ -1876,8 +1881,9 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
}
pa >>= 11;
if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -1941,8 +1947,9 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr,
}
pa >>= 11;
if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -2074,11 +2081,13 @@ static void qib_6120_config_ctxts(struct qib_devdata *dd)
}
static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
- u32 updegr, u32 egrhd)
+ u32 updegr, u32 egrhd, u32 npkts)
{
- qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
if (updegr)
qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
+ mmiowb();
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ mmiowb();
}
static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd)
@@ -2103,7 +2112,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd)
dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev,
dd->rcd[0]->rcvhdrq_size,
&dd->cspec->dummy_hdrq_phys,
- GFP_KERNEL | __GFP_COMP);
+ GFP_ATOMIC | __GFP_COMP);
if (!dd->cspec->dummy_hdrq) {
qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n");
/* fallback to just 0'ing */
@@ -2171,7 +2180,7 @@ static void rcvctrl_6120_mod(struct qib_pportdata *ppd, unsigned int op,
* Init the context registers also; if we were
* disabled, tail and head should both be zero
* already from the enable, but since we don't
- * know, we have to do it explictly.
+ * know, we have to do it explicitly.
*/
val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt);
qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt);
@@ -2926,8 +2935,9 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what)
ppd->dd->unit, ppd->port);
} else if (!strncmp(what, "off", 3)) {
ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
- qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
- "(normal)\n", ppd->dd->unit, ppd->port);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
} else
ret = -EINVAL;
if (!ret) {
@@ -3130,6 +3140,7 @@ static void get_6120_chip_params(struct qib_devdata *dd)
val = qib_read_kreg64(dd, kr_sendpiobufcnt);
dd->piobcnt2k = val & ~0U;
dd->piobcnt4k = val >> 32;
+ dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1;
/* these may be adjusted in init_chip_wc_pat() */
dd->pio2kbase = (u32 __iomem *)
(((char __iomem *)dd->kregbase) + dd->pio2k_bufbase);
@@ -3183,11 +3194,10 @@ static int qib_late_6120_initreg(struct qib_devdata *dd)
qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
val = qib_read_kreg64(dd, kr_sendpioavailaddr);
if (val != dd->pioavailregs_phys) {
- qib_dev_err(dd, "Catastrophic software error, "
- "SendPIOAvailAddr written as %lx, "
- "read back as %llx\n",
- (unsigned long) dd->pioavailregs_phys,
- (unsigned long long) val);
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
ret = -EINVAL;
}
return ret;
@@ -3215,8 +3225,8 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->revision = readq(&dd->kregbase[kr_revision]);
if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
- qib_dev_err(dd, "Revision register read failure, "
- "giving up initialization\n");
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
ret = -ENODEV;
goto bail;
}
@@ -3255,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR;
ppd->link_width_enabled = IB_WIDTH_4X;
@@ -3273,6 +3285,8 @@ static int init_6120_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
+ BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+ dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_6120_tidtemplate(dd);
@@ -3451,6 +3465,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum)
return -ENXIO;
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ return 0;
+}
+#endif
+
/* Dummy function, as 6120 boards never disable EEPROM Write */
static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen)
{
@@ -3526,6 +3547,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
dd->f_xgxs_reset = qib_6120_xgxs_reset;
dd->f_writescratch = writescratch;
dd->f_tempsense_rd = qib_6120_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_6120_notify_dca;
+#endif
/*
* Do remaining pcie setup and save pcie values in dd.
* Any error printing is already done by the init code.
@@ -3546,8 +3570,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
goto bail;
if (qib_pcie_params(dd, 8, NULL, NULL))
- qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
dd->cspec->irq = pdev->irq; /* save IRQ */
/* clear diagctrl register, in case diags were running and crashed */
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 6fd8d74e739..7dec89fdc12 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -39,6 +39,7 @@
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/module.h>
#include <linux/io.h>
#include <rdma/ib_verbs.h>
@@ -1050,7 +1051,7 @@ static void reenable_7220_chase(unsigned long opaque)
static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst)
{
u8 ibclt;
- u64 tnow;
+ unsigned long tnow;
ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState);
@@ -1065,9 +1066,9 @@ static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst)
case IB_7220_LT_STATE_CFGWAITRMT:
case IB_7220_LT_STATE_TXREVLANES:
case IB_7220_LT_STATE_CFGENH:
- tnow = get_jiffies_64();
+ tnow = jiffies;
if (ppd->cpspec->chase_end &&
- time_after64(tnow, ppd->cpspec->chase_end)) {
+ time_after(tnow, ppd->cpspec->chase_end)) {
ppd->cpspec->chase_end = 0;
qib_set_ib_7220_lstate(ppd,
QLOGIC_IB_IBCC_LINKCMD_DOWN,
@@ -1110,9 +1111,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
sdma_7220_errors(ppd, errs);
if (errs & ~IB_E_BITSEXTANT)
- qib_dev_err(dd, "error interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (errs & ~IB_E_BITSEXTANT));
+ qib_dev_err(dd,
+ "error interrupt with unknown errors %llx set\n",
+ (unsigned long long) (errs & ~IB_E_BITSEXTANT));
if (errs & E_SUM_ERRS) {
qib_disarm_7220_senderrbufs(ppd);
@@ -1191,8 +1192,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
}
if (errs & ERR_MASK(ResetNegated)) {
- qib_dev_err(dd, "Got reset, requires re-init "
- "(unload and reload driver)\n");
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
dd->flags &= ~QIB_INITTED; /* needs re-init */
/* mark as having had error */
*dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1304,8 +1305,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (!hwerrs)
goto bail;
if (hwerrs == ~0ULL) {
- qib_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
goto bail;
}
qib_stats.sps_hwerrs++;
@@ -1328,13 +1329,14 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_inc_eeprom_err(dd, log_idx, 1);
if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC |
RXE_PARITY))
- qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
if (hwerrs & ~IB_HWE_BITSEXTANT)
- qib_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~IB_HWE_BITSEXTANT));
+ qib_dev_err(dd,
+ "hwerror interrupt with unknown errors %llx set\n",
+ (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT));
if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR)
qib_sd7220_clr_ibpar(dd);
@@ -1361,8 +1363,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcat(msg, "[Memory BIST test failed, "
- "InfiniPath hardware unusable]", msgl);
+ strlcat(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
/* ignore from now on, so disable until driver reloaded */
dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -1408,8 +1411,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !dd->diag_client) {
- qib_dev_err(dd, "Fatal Hardware Error, no longer"
- " usable, SN %.16s\n", dd->serial);
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
/*
* For /sys status file and user programs to print; if no
* trailing brace is copied, we'll know it was truncated.
@@ -1692,8 +1696,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd)
ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
spin_unlock_irqrestore(&ppd->lflags_lock, flags);
wake_up(&ppd->cpspec->autoneg_wait);
- cancel_delayed_work(&ppd->cpspec->autoneg_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&ppd->cpspec->autoneg_work);
shutdown_7220_relock_poll(ppd->dd);
val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg);
@@ -1918,8 +1921,9 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat)
qib_stats.sps_errints++;
estat = qib_read_kreg64(dd, kr_errstatus);
if (!estat)
- qib_devinfo(dd->pcidev, "error interrupt (%Lx), "
- "but no error bits set!\n", istat);
+ qib_devinfo(dd->pcidev,
+ "error interrupt (%Lx), but no error bits set!\n",
+ istat);
else
handle_7220_errors(dd, estat);
}
@@ -1958,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
-
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
unlikely_7220_intr(dd, istat);
@@ -2023,17 +2024,18 @@ bail:
static void qib_setup_7220_interrupt(struct qib_devdata *dd)
{
if (!dd->cspec->irq)
- qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
- "work\n");
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
else {
int ret = request_irq(dd->cspec->irq, qib_7220intr,
dd->msi_lo ? 0 : IRQF_SHARED,
QIB_DRV_NAME, dd);
if (ret)
- qib_dev_err(dd, "Couldn't setup %s interrupt "
- "(irq=%d): %d\n", dd->msi_lo ?
- "MSI" : "INTx", dd->cspec->irq, ret);
+ qib_dev_err(dd,
+ "Couldn't setup %s interrupt (irq=%d): %d\n",
+ dd->msi_lo ? "MSI" : "INTx",
+ dd->cspec->irq, ret);
}
}
@@ -2072,9 +2074,9 @@ static void qib_7220_boardname(struct qib_devdata *dd)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
- qib_dev_err(dd, "Unsupported InfiniPath hardware "
- "revision %u.%u!\n",
- dd->majrev, dd->minrev);
+ qib_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->majrev, dd->minrev);
snprintf(dd->boardversion, sizeof(dd->boardversion),
"ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
@@ -2112,10 +2114,11 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
/*
* Keep chip from being accessed until we are ready. Use
* writeq() directly, to allow the write even though QIB_PRESENT
- * isnt' set.
+ * isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler reordering around actual reset */
@@ -2146,8 +2149,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
bail:
if (ret) {
if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
- qib_dev_err(dd, "Reset failed to setup PCIe or "
- "interrupts; continuing anyway\n");
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
/* hold IBC in reset, no sends, etc till later */
qib_write_kreg(dd, kr_control, 0ULL);
@@ -2187,8 +2190,9 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
return;
}
if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -2297,7 +2301,7 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
nchipctxts = qib_read_kreg32(dd, kr_portcnt);
dd->cspec->numctxts = nchipctxts;
if (qib_n_krcv_queues > 1) {
- dd->qpn_mask = 0x3f;
+ dd->qpn_mask = 0x3e;
dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
if (dd->first_user_ctxt > nchipctxts)
dd->first_user_ctxt = nchipctxts;
@@ -2435,6 +2439,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
int lsb, ret = 0, setforce = 0;
u16 lcmd, licmd;
unsigned long flags;
+ u32 tmp = 0;
switch (which) {
case QIB_IB_CFG_LIDLMC:
@@ -2468,9 +2473,6 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
maskr = IBA7220_IBC_WIDTH_MASK;
lsb = IBA7220_IBC_WIDTH_SHIFT;
setforce = 1;
- spin_lock_irqsave(&ppd->lflags_lock, flags);
- ppd->lflags |= QIBL_IB_FORCE_NOTIFY;
- spin_unlock_irqrestore(&ppd->lflags_lock, flags);
break;
case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */
@@ -2480,7 +2482,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
* we command the link down. As with width, only write the
* actual register if the link is currently down, otherwise
* takes effect on next link change. Since setting is being
- * explictly requested (via MAD or sysfs), clear autoneg
+ * explicitly requested (via MAD or sysfs), clear autoneg
* failure status if speed autoneg is enabled.
*/
ppd->link_speed_enabled = val;
@@ -2644,6 +2646,28 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
goto bail;
}
qib_set_ib_7220_lstate(ppd, lcmd, licmd);
+
+ maskr = IBA7220_IBC_WIDTH_MASK;
+ lsb = IBA7220_IBC_WIDTH_SHIFT;
+ tmp = (ppd->cpspec->ibcddrctrl >> lsb) & maskr;
+ /* If the width active on the chip does not match the
+ * width in the shadow register, write the new active
+ * width to the chip.
+ * We don't have to worry about speed as the speed is taken
+ * care of by set_7220_ibspeed_fast called by ib_updown.
+ */
+ if (ppd->link_width_enabled-1 != tmp) {
+ ppd->cpspec->ibcddrctrl &= ~(maskr << lsb);
+ ppd->cpspec->ibcddrctrl |=
+ (((u64)(ppd->link_width_enabled-1) & maskr) <<
+ lsb);
+ qib_write_kreg(dd, kr_ibcddrctrl,
+ ppd->cpspec->ibcddrctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_FORCE_NOTIFY;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
goto bail;
case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */
@@ -2686,8 +2710,9 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
/* enable heart beat again */
val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
- qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
- "(normal)\n", ppd->dd->unit, ppd->port);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
} else
ret = -EINVAL;
if (!ret) {
@@ -2703,11 +2728,13 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
}
static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd,
- u32 updegr, u32 egrhd)
+ u32 updegr, u32 egrhd, u32 npkts)
{
- qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
if (updegr)
qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
+ mmiowb();
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ mmiowb();
}
static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd)
@@ -2779,7 +2806,7 @@ static void rcvctrl_7220_mod(struct qib_pportdata *ppd, unsigned int op,
* Init the context registers also; if we were
* disabled, tail and head should both be zero
* already from the enable, but since we don't
- * know, we have to do it explictly.
+ * know, we have to do it explicitly.
*/
val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt);
qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt);
@@ -3285,8 +3312,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd)
if (!dd->msi_lo)
return 0;
- qib_devinfo(dd->pcidev, "MSI interrupt not detected,"
- " trying INTx interrupts\n");
+ qib_devinfo(dd->pcidev,
+ "MSI interrupt not detected, trying INTx interrupts\n");
qib_7220_free_irq(dd);
qib_enable_intx(dd->pcidev);
/*
@@ -3515,8 +3542,8 @@ static void try_7220_autoneg(struct qib_pportdata *ppd)
toggle_7220_rclkrls(ppd->dd);
/* 2 msec is minimum length of a poll cycle */
- schedule_delayed_work(&ppd->cpspec->autoneg_work,
- msecs_to_jiffies(2));
+ queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work,
+ msecs_to_jiffies(2));
}
/*
@@ -3958,11 +3985,10 @@ static int qib_late_7220_initreg(struct qib_devdata *dd)
qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
val = qib_read_kreg64(dd, kr_sendpioavailaddr);
if (val != dd->pioavailregs_phys) {
- qib_dev_err(dd, "Catastrophic software error, "
- "SendPIOAvailAddr written as %lx, "
- "read back as %llx\n",
- (unsigned long) dd->pioavailregs_phys,
- (unsigned long long) val);
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
ret = -EINVAL;
}
qib_register_observer(dd, &sendctrl_observer);
@@ -3992,8 +4018,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
dd->revision = readq(&dd->kregbase[kr_revision]);
if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
- qib_dev_err(dd, "Revision register read failure, "
- "giving up initialization\n");
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
ret = -ENODEV;
goto bail;
}
@@ -4033,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
init_waitqueue_head(&cpspec->autoneg_wait);
INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
@@ -4066,6 +4094,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
+ BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+ dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7220_tidtemplate(dd);
@@ -4133,6 +4163,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
dd->cspec->sdmabufcnt;
dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+ dd->last_pio = dd->cspec->lastbuf_for_pio;
dd->pbufsctxt = dd->lastctxt_piobuf /
(dd->cfgctxts - dd->first_user_ctxt);
@@ -4482,6 +4513,13 @@ bail:
return ret;
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ return 0;
+}
+#endif
+
/* Dummy function, as 7220 boards never disable EEPROM Write */
static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
{
@@ -4556,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
dd->f_xgxs_reset = qib_7220_xgxs_reset;
dd->f_writescratch = writescratch;
dd->f_tempsense_rd = qib_7220_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_7220_notify_dca;
+#endif
/*
* Do remaining pcie setup and save pcie values in dd.
* Any error printing is already done by the init code.
@@ -4588,8 +4629,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
break;
}
if (qib_pcie_params(dd, minwidth, NULL, NULL))
- qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
/* save IRQ for possible later use */
dd->cspec->irq = pdev->irq;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 584d443b533..a7eb32517a0 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -40,14 +41,22 @@
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/jiffies.h>
+#include <linux/module.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
#include "qib.h"
#include "qib_7322_regs.h"
#include "qib_qsfp.h"
#include "qib_mad.h"
+#include "qib_verbs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME " " fmt
static void qib_setup_7322_setextled(struct qib_pportdata *, u32);
static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t);
@@ -71,6 +80,10 @@ static void qib_7322_mini_pcs_reset(struct qib_pportdata *);
static u32 ahb_mod(struct qib_devdata *, int, int, int, u32, u32);
static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
+static void serdes_7322_los_enable(struct qib_pportdata *, int);
+static int serdes_7322_init_old(struct qib_pportdata *);
+static int serdes_7322_init_new(struct qib_pportdata *);
+static void dump_sdma_7322_state(struct qib_pportdata *);
#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
@@ -111,6 +124,25 @@ static ushort qib_singleport;
module_param_named(singleport, qib_singleport, ushort, S_IRUGO);
MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space");
+static ushort qib_krcvq01_no_msi;
+module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO);
+MODULE_PARM_DESC(krcvq01_no_msi, "No MSI for kctx < 2");
+
+/*
+ * Receive header queue sizes
+ */
+static unsigned qib_rcvhdrcnt;
+module_param_named(rcvhdrcnt, qib_rcvhdrcnt, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrcnt, "receive header count");
+
+static unsigned qib_rcvhdrsize;
+module_param_named(rcvhdrsize, qib_rcvhdrsize, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrsize, "receive header size in 32-bit words");
+
+static unsigned qib_rcvhdrentsize;
+module_param_named(rcvhdrentsize, qib_rcvhdrentsize, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrentsize, "receive header entry size in 32-bit words");
+
#define MAX_ATTEN_LEN 64 /* plenty for any real system */
/* for read back, default index is ~5m copper cable */
static char txselect_list[MAX_ATTEN_LEN] = "10";
@@ -314,7 +346,7 @@ MODULE_PARM_DESC(txselect, \
#define krp_serdesctrl KREG_IBPORT_IDX(IBSerdesCtrl)
/*
- * Per-context kernel registers. Acess only with qib_read_kreg_ctxt()
+ * Per-context kernel registers. Access only with qib_read_kreg_ctxt()
* or qib_write_kreg_ctxt()
*/
#define krc_rcvhdraddr KREG_IDX(RcvHdrAddr0)
@@ -379,7 +411,6 @@ MODULE_PARM_DESC(txselect, \
#define crp_txdroppedpkt CREG_IDX(TxDroppedPktCnt)
#define crp_txhdrerr CREG_IDX(TxHeadersErrCnt)
#define crp_txlenerr CREG_IDX(TxLenErrCnt)
-#define crp_txlenerr CREG_IDX(TxLenErrCnt)
#define crp_txminmaxlenerr CREG_IDX(TxMaxMinLenErrCnt)
#define crp_txsdmadesc CREG_IDX(TxSDmaDescCnt)
#define crp_txunderrun CREG_IDX(TxUnderrunCnt)
@@ -451,6 +482,8 @@ static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = {
#define IB_7322_LT_STATE_RECOVERIDLE 0x0f
#define IB_7322_LT_STATE_CFGENH 0x10
#define IB_7322_LT_STATE_CFGTEST 0x11
+#define IB_7322_LT_STATE_CFGWAITRMTTEST 0x12
+#define IB_7322_LT_STATE_CFGWAITENH 0x13
/* link state machine states from IBC */
#define IB_7322_L_STATE_DOWN 0x0
@@ -480,14 +513,24 @@ static const u8 qib_7322_physportstate[0x20] = {
IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
[IB_7322_LT_STATE_CFGENH] = IB_PHYSPORTSTATE_CFG_ENH,
[IB_7322_LT_STATE_CFGTEST] = IB_PHYSPORTSTATE_CFG_TRAIN,
- [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
- [0x13] = IB_PHYSPORTSTATE_CFG_WAIT_ENH,
+ [IB_7322_LT_STATE_CFGWAITRMTTEST] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7322_LT_STATE_CFGWAITENH] =
+ IB_PHYSPORTSTATE_CFG_WAIT_ENH,
[0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
[0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
[0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
};
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify {
+ int rcv;
+ void *arg;
+ struct irq_affinity_notify notify;
+};
+#endif
+
struct qib_chip_specific {
u64 __iomem *cregbase;
u64 *cntrs;
@@ -515,8 +558,13 @@ struct qib_chip_specific {
u32 lastbuf_for_pio;
u32 stay_in_freeze;
u32 recovery_ports_initted;
- struct msix_entry *msix_entries;
- void **msix_arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ u32 dca_ctrl;
+ int rhdr_cpu[18];
+ int sdma_cpu[2];
+ u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */
+#endif
+ struct qib_msix_entry *msix_entries;
unsigned long *sendchkenable;
unsigned long *sendgrhchk;
unsigned long *sendibchk;
@@ -543,7 +591,8 @@ struct vendor_txdds_ent {
static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
-#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */
+#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */
+#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */
#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
#define H1_FORCE_VAL 8
@@ -588,8 +637,8 @@ struct qib_chippport_specific {
u64 ibmalfsnap;
u64 ibcctrl_a; /* krp_ibcctrl_a shadow */
u64 ibcctrl_b; /* krp_ibcctrl_b shadow */
- u64 qdr_dfe_time;
- u64 chase_end;
+ unsigned long qdr_dfe_time;
+ unsigned long chase_end;
u32 autoneg_tries;
u32 recovery_init;
u32 qdr_dfe_on;
@@ -604,6 +653,7 @@ struct qib_chippport_specific {
u8 ibmalfusesnap;
struct qib_qsfp_data qsfp_data;
char epmsgbuf[192]; /* for port error interrupt msg buffer */
+ char sdmamsgbuf[192]; /* for per-port sdma error messages */
};
static struct {
@@ -611,27 +661,75 @@ static struct {
irq_handler_t handler;
int lsb;
int port; /* 0 if not port-specific, else port # */
+ int dca;
} irq_table[] = {
- { QIB_DRV_NAME, qib_7322intr, -1, 0 },
- { QIB_DRV_NAME " (buf avail)", qib_7322bufavail,
- SYM_LSB(IntStatus, SendBufAvail), 0 },
- { QIB_DRV_NAME " (sdma 0)", sdma_intr,
- SYM_LSB(IntStatus, SDmaInt_0), 1 },
- { QIB_DRV_NAME " (sdma 1)", sdma_intr,
- SYM_LSB(IntStatus, SDmaInt_1), 2 },
- { QIB_DRV_NAME " (sdmaI 0)", sdma_idle_intr,
- SYM_LSB(IntStatus, SDmaIdleInt_0), 1 },
- { QIB_DRV_NAME " (sdmaI 1)", sdma_idle_intr,
- SYM_LSB(IntStatus, SDmaIdleInt_1), 2 },
- { QIB_DRV_NAME " (sdmaP 0)", sdma_progress_intr,
- SYM_LSB(IntStatus, SDmaProgressInt_0), 1 },
- { QIB_DRV_NAME " (sdmaP 1)", sdma_progress_intr,
- SYM_LSB(IntStatus, SDmaProgressInt_1), 2 },
- { QIB_DRV_NAME " (sdmaC 0)", sdma_cleanup_intr,
- SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 },
- { QIB_DRV_NAME " (sdmaC 1)", sdma_cleanup_intr,
- SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 },
+ { "", qib_7322intr, -1, 0, 0 },
+ { " (buf avail)", qib_7322bufavail,
+ SYM_LSB(IntStatus, SendBufAvail), 0, 0},
+ { " (sdma 0)", sdma_intr,
+ SYM_LSB(IntStatus, SDmaInt_0), 1, 1 },
+ { " (sdma 1)", sdma_intr,
+ SYM_LSB(IntStatus, SDmaInt_1), 2, 1 },
+ { " (sdmaI 0)", sdma_idle_intr,
+ SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1},
+ { " (sdmaI 1)", sdma_idle_intr,
+ SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1},
+ { " (sdmaP 0)", sdma_progress_intr,
+ SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 },
+ { " (sdmaP 1)", sdma_progress_intr,
+ SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 },
+ { " (sdmaC 0)", sdma_cleanup_intr,
+ SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 },
+ { " (sdmaC 1)", sdma_cleanup_intr,
+ SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0},
+};
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static const struct dca_reg_map {
+ int shadow_inx;
+ int lsb;
+ u64 mask;
+ u16 regno;
+} dca_rcvhdr_reg_map[] = {
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH),
+ ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) },
+ { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH),
+ ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) },
};
+#endif
/* ibcctrl bits */
#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
@@ -655,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *);
static void setup_7322_link_recovery(struct qib_pportdata *, u32);
static void check_7322_rxe_status(struct qib_pportdata *);
static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static void qib_setup_dca(struct qib_devdata *dd);
+static void setup_dca_notifier(struct qib_devdata *dd,
+ struct qib_msix_entry *m);
+static void reset_dca_notifier(struct qib_devdata *dd,
+ struct qib_msix_entry *m);
+#endif
/**
* qib_read_ureg32 - read 32-bit virtualized per-context register
@@ -1084,9 +1189,9 @@ static inline u32 read_7322_creg32_port(const struct qib_pportdata *ppd,
#define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */
#define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \
- .msg = #fldname }
+ .msg = #fldname , .sz = sizeof(#fldname) }
#define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \
- fldname##Mask##_##port), .msg = #fldname }
+ fldname##Mask##_##port), .msg = #fldname , .sz = sizeof(#fldname) }
static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = {
HWE_AUTO_P(IBSerdesPClkNotDetect, 1),
HWE_AUTO_P(IBSerdesPClkNotDetect, 0),
@@ -1104,14 +1209,16 @@ static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = {
HWE_AUTO_P(IBCBusFromSPCParityErr, 0),
HWE_AUTO(statusValidNoEop),
HWE_AUTO(LATriggered),
- { .mask = 0 }
+ { .mask = 0, .sz = 0 }
};
#define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \
- .msg = #fldname }
+ .msg = #fldname, .sz = sizeof(#fldname) }
#define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \
- .msg = #fldname }
+ .msg = #fldname, .sz = sizeof(#fldname) }
static const struct qib_hwerror_msgs qib_7322error_msgs[] = {
+ E_AUTO(RcvEgrFullErr),
+ E_AUTO(RcvHdrFullErr),
E_AUTO(ResetNegated),
E_AUTO(HardwareErr),
E_AUTO(InvalidAddrErr),
@@ -1124,9 +1231,7 @@ static const struct qib_hwerror_msgs qib_7322error_msgs[] = {
E_AUTO(SendSpecialTriggerErr),
E_AUTO(SDmaWrongPortErr),
E_AUTO(SDmaBufMaskDuplicateErr),
- E_AUTO(RcvHdrFullErr),
- E_AUTO(RcvEgrFullErr),
- { .mask = 0 }
+ { .mask = 0, .sz = 0 }
};
static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
@@ -1136,7 +1241,8 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
/*
* SDmaHaltErr is not really an error, make it clearer;
*/
- {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted"},
+ {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted",
+ .sz = 11},
E_P_AUTO(SDmaDescAddrMisalignErr),
E_P_AUTO(SDmaUnexpDataErr),
E_P_AUTO(SDmaMissingDwErr),
@@ -1172,7 +1278,7 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
E_P_AUTO(RcvICRCErr),
E_P_AUTO(RcvVCRCErr),
E_P_AUTO(RcvFormatErr),
- { .mask = 0 }
+ { .mask = 0, .sz = 0 }
};
/*
@@ -1180,17 +1286,17 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
* context
*/
#define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \
- .msg = #fldname }
+ .msg = #fldname, .sz = sizeof(#fldname) }
/* Below generates "auto-message" for interrupts specific to a port */
#define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\
SYM_LSB(IntMask, fldname##Mask##_0), \
SYM_LSB(IntMask, fldname##Mask##_1)), \
- .msg = #fldname "_P" }
+ .msg = #fldname "_P", .sz = sizeof(#fldname "_P") }
/* For some reason, the SerDesTrimDone bits are reversed */
#define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\
SYM_LSB(IntMask, fldname##Mask##_1), \
SYM_LSB(IntMask, fldname##Mask##_0)), \
- .msg = #fldname "_P" }
+ .msg = #fldname "_P", .sz = sizeof(#fldname "_P") }
/*
* Below generates "auto-message" for interrupts specific to a context,
* with ctxt-number appended
@@ -1198,7 +1304,7 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
#define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\
SYM_LSB(IntMask, fldname##0IntMask), \
SYM_LSB(IntMask, fldname##17IntMask)), \
- .msg = #fldname "_C"}
+ .msg = #fldname "_C", .sz = sizeof(#fldname "_C") }
static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
INTR_AUTO_P(SDmaInt),
@@ -1212,11 +1318,12 @@ static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
INTR_AUTO_P(SendDoneInt),
INTR_AUTO(SendBufAvailInt),
INTR_AUTO_C(RcvAvail),
- { .mask = 0 }
+ { .mask = 0, .sz = 0 }
};
#define TXSYMPTOM_AUTO_P(fldname) \
- { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), .msg = #fldname }
+ { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \
+ .msg = #fldname, .sz = sizeof(#fldname) }
static const struct qib_hwerror_msgs hdrchk_msgs[] = {
TXSYMPTOM_AUTO_P(NonKeyPacket),
TXSYMPTOM_AUTO_P(GRHFail),
@@ -1225,7 +1332,7 @@ static const struct qib_hwerror_msgs hdrchk_msgs[] = {
TXSYMPTOM_AUTO_P(SLIDFail),
TXSYMPTOM_AUTO_P(RawIPV6),
TXSYMPTOM_AUTO_P(PacketTooSmall),
- { .mask = 0 }
+ { .mask = 0, .sz = 0 }
};
#define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
@@ -1270,7 +1377,7 @@ static void err_decode(char *msg, size_t len, u64 errs,
u64 these, lmask;
int took, multi, n = 0;
- while (msp && msp->mask) {
+ while (errs && msp && msp->mask) {
multi = (msp->mask & (msp->mask - 1));
while (errs & msp->mask) {
these = (errs & msp->mask);
@@ -1281,9 +1388,14 @@ static void err_decode(char *msg, size_t len, u64 errs,
*msg++ = ',';
len--;
}
- took = scnprintf(msg, len, "%s", msp->msg);
+ BUG_ON(!msp->sz);
+ /* msp->sz counts the nul */
+ took = min_t(size_t, msp->sz - (size_t)1, len);
+ memcpy(msg, msp->msg, took);
len -= took;
msg += took;
+ if (len)
+ *msg = '\0';
}
errs &= ~lmask;
if (len && multi) {
@@ -1484,6 +1596,8 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
struct qib_devdata *dd = ppd->dd;
errs &= QIB_E_P_SDMAERRS;
+ err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf),
+ errs, qib_7322p_error_msgs);
if (errs & QIB_E_P_SDMAUNEXPDATA)
qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit,
@@ -1491,6 +1605,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (errs != QIB_E_P_SDMAHALT) {
+ /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
+ qib_dev_porterr(dd, ppd->port,
+ "SDMA %s 0x%016llx %s\n",
+ qib_sdma_state_names[ppd->sdma_state.current_state],
+ errs, ppd->cpspec->sdmamsgbuf);
+ dump_sdma_7322_state(ppd);
+ }
+
switch (ppd->sdma_state.current_state) {
case qib_sdma_state_s00_hw_down:
break;
@@ -1542,8 +1665,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd)
qib_stats.sps_errints++;
errs = qib_read_kreg64(dd, kr_errstatus);
if (!errs) {
- qib_devinfo(dd->pcidev, "device error interrupt, "
- "but no error bits set!\n");
+ qib_devinfo(dd->pcidev,
+ "device error interrupt, but no error bits set!\n");
goto done;
}
@@ -1589,8 +1712,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd)
if (errs & QIB_E_RESET) {
int pidx;
- qib_dev_err(dd, "Got reset, requires re-init "
- "(unload and reload driver)\n");
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
dd->flags &= ~QIB_INITTED; /* needs re-init */
/* mark as having had error */
*dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1621,6 +1744,14 @@ done:
return;
}
+static void qib_error_tasklet(unsigned long data)
+{
+ struct qib_devdata *dd = (struct qib_devdata *)data;
+
+ handle_7322_errors(dd);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+}
+
static void reenable_chase(unsigned long opaque)
{
struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
@@ -1630,7 +1761,8 @@ static void reenable_chase(unsigned long opaque)
QLOGIC_IB_IBCC_LINKINITCMD_POLL);
}
-static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt)
+static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow,
+ u8 ibclt)
{
ppd->cpspec->chase_end = 0;
@@ -1646,7 +1778,7 @@ static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt)
static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
{
u8 ibclt;
- u64 tnow;
+ unsigned long tnow;
ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState);
@@ -1661,9 +1793,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
case IB_7322_LT_STATE_CFGWAITRMT:
case IB_7322_LT_STATE_TXREVLANES:
case IB_7322_LT_STATE_CFGENH:
- tnow = get_jiffies_64();
+ tnow = jiffies;
if (ppd->cpspec->chase_end &&
- time_after64(tnow, ppd->cpspec->chase_end))
+ time_after(tnow, ppd->cpspec->chase_end))
disable_chase(ppd, tnow, ibclt);
else if (!ppd->cpspec->chase_end)
ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME;
@@ -1673,10 +1805,14 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
break;
}
- if (ibclt == IB_7322_LT_STATE_CFGTEST &&
+ if (((ibclt >= IB_7322_LT_STATE_CFGTEST &&
+ ibclt <= IB_7322_LT_STATE_CFGWAITENH) ||
+ ibclt == IB_7322_LT_STATE_LINKUP) &&
(ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) {
force_h1(ppd);
ppd->cpspec->qdr_reforce = 1;
+ if (!ppd->dd->cspec->r1)
+ serdes_7322_los_enable(ppd, 0);
} else if (ppd->cpspec->qdr_reforce &&
(ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) &&
(ibclt == IB_7322_LT_STATE_CFGENH ||
@@ -1692,18 +1828,37 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
ibclt <= IB_7322_LT_STATE_SLEEPQUIET)))
adj_tx_serdes(ppd);
- if (!ppd->cpspec->qdr_dfe_on && ibclt != IB_7322_LT_STATE_LINKUP &&
- ibclt <= IB_7322_LT_STATE_SLEEPQUIET) {
- ppd->cpspec->qdr_dfe_on = 1;
- ppd->cpspec->qdr_dfe_time = 0;
- /* On link down, reenable QDR adaptation */
- qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
- ppd->dd->cspec->r1 ?
- QDR_STATIC_ADAPT_DOWN_R1 :
- QDR_STATIC_ADAPT_DOWN);
+ if (ibclt != IB_7322_LT_STATE_LINKUP) {
+ u8 ltstate = qib_7322_phys_portstate(ibcst);
+ u8 pibclt = (u8)SYM_FIELD(ppd->lastibcstat, IBCStatusA_0,
+ LinkTrainingState);
+ if (!ppd->dd->cspec->r1 &&
+ pibclt == IB_7322_LT_STATE_LINKUP &&
+ ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE)
+ /* If the link went down (but no into recovery,
+ * turn LOS back on */
+ serdes_7322_los_enable(ppd, 1);
+ if (!ppd->cpspec->qdr_dfe_on &&
+ ibclt <= IB_7322_LT_STATE_SLEEPQUIET) {
+ ppd->cpspec->qdr_dfe_on = 1;
+ ppd->cpspec->qdr_dfe_time = 0;
+ /* On link down, reenable QDR adaptation */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 :
+ QDR_STATIC_ADAPT_DOWN);
+ pr_info(
+ "IB%u:%u re-enabled QDR adaptation ibclt %x\n",
+ ppd->dd->unit, ppd->port, ibclt);
+ }
}
}
+static int qib_7322_set_ib_cfg(struct qib_pportdata *, int, u32);
+
/*
* This is per-pport error handling.
* will likely get it's own MSIx interrupt (one for each port,
@@ -1739,9 +1894,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd)
if (!*msg)
snprintf(msg, sizeof ppd->cpspec->epmsgbuf,
"no others");
- qib_dev_porterr(dd, ppd->port, "error interrupt with unknown"
- " errors 0x%016Lx set (and %s)\n",
- (errs & ~QIB_E_P_BITSEXTANT), msg);
+ qib_dev_porterr(dd, ppd->port,
+ "error interrupt with unknown errors 0x%016Lx set (and %s)\n",
+ (errs & ~QIB_E_P_BITSEXTANT), msg);
*msg = '\0';
}
@@ -1959,8 +2114,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (!hwerrs)
goto bail;
if (hwerrs == ~0ULL) {
- qib_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
goto bail;
}
qib_stats.sps_hwerrs++;
@@ -1974,8 +2129,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
/* no EEPROM logging, yet */
if (hwerrs)
- qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
ctrl = qib_read_kreg32(dd, kr_control);
if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) {
@@ -1999,8 +2155,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcpy(msg, "[Memory BIST test failed, "
- "InfiniPath hardware unusable]", msgl);
+ strlcpy(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
/* ignore from now on, so disable until driver reloaded */
dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -2012,9 +2169,33 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_dev_err(dd, "%s hardware error\n", msg);
+ if (hwerrs &
+ (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
+ int pidx = 0;
+ int err;
+ unsigned long flags;
+ struct qib_pportdata *ppd = dd->pport;
+ for (; pidx < dd->num_pports; ++pidx, ppd++) {
+ err = 0;
+ if (pidx == 0 && (hwerrs &
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
+ err++;
+ if (pidx == 1 && (hwerrs &
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
+ err++;
+ if (err) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ dump_sdma_7322_state(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+ }
+
if (isfatal && !dd->diag_client) {
- qib_dev_err(dd, "Fatal Hardware Error, no longer"
- " usable, SN %.16s\n", dd->serial);
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
/*
* for /sys status file and user programs to print; if no
* trailing brace is copied, we'll know it was truncated.
@@ -2214,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
qib_write_kreg(dd, kr_scratch, 0ULL);
+ /* ensure previous Tx parameters are not still forced */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
if (qib_compat_ddr_negotiate) {
ppd->cpspec->ibdeltainprog = 1;
ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
@@ -2242,16 +2428,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
SYM_LSB(IBCCtrlA_0, MaxPktLen);
ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */
- /* initially come up waiting for TS1, without sending anything. */
- val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
- QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
-
/*
* Reset the PCS interface to the serdes (and also ibc, which is still
* in reset from above). Writes new value of ibcctrl_a as last step.
*/
qib_7322_mini_pcs_reset(ppd);
- qib_write_kreg(dd, kr_scratch, 0ULL);
if (!ppd->cpspec->ibcctrl_b) {
unsigned lse = ppd->link_speed_enabled;
@@ -2317,6 +2498,14 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn);
set_vls(ppd);
+ /* initially come up DISABLED, without sending anything. */
+ val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
+ QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, val);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ /* clear the linkinit cmds */
+ ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd);
+
/* be paranoid against later code motion, etc. */
spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable);
@@ -2348,10 +2537,9 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd)
ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
spin_unlock_irqrestore(&ppd->lflags_lock, flags);
wake_up(&ppd->cpspec->autoneg_wait);
- cancel_delayed_work(&ppd->cpspec->autoneg_work);
+ cancel_delayed_work_sync(&ppd->cpspec->autoneg_work);
if (ppd->dd->cspec->r1)
- cancel_delayed_work(&ppd->cpspec->ipg_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&ppd->cpspec->ipg_work);
ppd->cpspec->chase_end = 0;
if (ppd->cpspec->chase_timer.data) /* if initted */
@@ -2483,6 +2671,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on)
qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink);
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ switch (event) {
+ case DCA_PROVIDER_ADD:
+ if (dd->flags & QIB_DCA_ENABLED)
+ break;
+ if (!dca_add_requester(&dd->pcidev->dev)) {
+ qib_devinfo(dd->pcidev, "DCA enabled\n");
+ dd->flags |= QIB_DCA_ENABLED;
+ qib_setup_dca(dd);
+ }
+ break;
+ case DCA_PROVIDER_REMOVE:
+ if (dd->flags & QIB_DCA_ENABLED) {
+ dca_remove_requester(&dd->pcidev->dev);
+ dd->flags &= ~QIB_DCA_ENABLED;
+ dd->cspec->dca_ctrl = 0;
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA),
+ dd->cspec->dca_ctrl);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu)
+{
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_chip_specific *cspec = dd->cspec;
+
+ if (!(dd->flags & QIB_DCA_ENABLED))
+ return;
+ if (cspec->rhdr_cpu[rcd->ctxt] != cpu) {
+ const struct dca_reg_map *rmp;
+
+ cspec->rhdr_cpu[rcd->ctxt] = cpu;
+ rmp = &dca_rcvhdr_reg_map[rcd->ctxt];
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask;
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |=
+ (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb;
+ qib_devinfo(dd->pcidev,
+ "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu,
+ (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+ qib_write_kreg(dd, rmp->regno,
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+ cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+ }
+}
+
+static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_chip_specific *cspec = dd->cspec;
+ unsigned pidx = ppd->port - 1;
+
+ if (!(dd->flags & QIB_DCA_ENABLED))
+ return;
+ if (cspec->sdma_cpu[pidx] != cpu) {
+ cspec->sdma_cpu[pidx] = cpu;
+ cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ?
+ SYM_MASK(DCACtrlF, SendDma1DCAOPH) :
+ SYM_MASK(DCACtrlF, SendDma0DCAOPH));
+ cspec->dca_rcvhdr_ctrl[4] |=
+ (u64) dca3_get_tag(&dd->pcidev->dev, cpu) <<
+ (ppd->hw_pidx ?
+ SYM_LSB(DCACtrlF, SendDma1DCAOPH) :
+ SYM_LSB(DCACtrlF, SendDma0DCAOPH));
+ qib_devinfo(dd->pcidev,
+ "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu,
+ (long long) cspec->dca_rcvhdr_ctrl[4]);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlF),
+ cspec->dca_rcvhdr_ctrl[4]);
+ cspec->dca_ctrl |= ppd->hw_pidx ?
+ SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) :
+ SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+ }
+}
+
+static void qib_setup_dca(struct qib_devdata *dd)
+{
+ struct qib_chip_specific *cspec = dd->cspec;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++)
+ cspec->rhdr_cpu[i] = -1;
+ for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+ cspec->sdma_cpu[i] = -1;
+ cspec->dca_rcvhdr_ctrl[0] =
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[1] =
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[2] =
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[3] =
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[4] =
+ (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt));
+ for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+ qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i,
+ cspec->dca_rcvhdr_ctrl[i]);
+ for (i = 0; i < cspec->num_msix_entries; i++)
+ setup_dca_notifier(dd, &cspec->msix_entries[i]);
+}
+
+static void qib_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct qib_irq_notify *n =
+ container_of(notify, struct qib_irq_notify, notify);
+ int cpu = cpumask_first(mask);
+
+ if (n->rcv) {
+ struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+ qib_update_rhdrq_dca(rcd, cpu);
+ } else {
+ struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+ qib_update_sdma_dca(ppd, cpu);
+ }
+}
+
+static void qib_irq_notifier_release(struct kref *ref)
+{
+ struct qib_irq_notify *n =
+ container_of(ref, struct qib_irq_notify, notify.kref);
+ struct qib_devdata *dd;
+
+ if (n->rcv) {
+ struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+ dd = rcd->dd;
+ } else {
+ struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+ dd = ppd->dd;
+ }
+ qib_devinfo(dd->pcidev,
+ "release on HCA notify 0x%p n 0x%p\n", ref, n);
+ kfree(n);
+}
+#endif
+
/*
* Disable MSIx interrupt if enabled, call generic MSIx code
* to cleanup, and clear pending MSIx interrupts.
@@ -2499,9 +2843,16 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
int i;
dd->cspec->num_msix_entries = 0;
- for (i = 0; i < n; i++)
- free_irq(dd->cspec->msix_entries[i].vector,
- dd->cspec->msix_arg[i]);
+ for (i = 0; i < n; i++) {
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
+#endif
+ irq_set_affinity_hint(
+ dd->cspec->msix_entries[i].msix.vector, NULL);
+ free_cpumask_var(dd->cspec->msix_entries[i].mask);
+ free_irq(dd->cspec->msix_entries[i].msix.vector,
+ dd->cspec->msix_entries[i].arg);
+ }
qib_nomsix(dd);
}
/* make sure no MSIx interrupts are left pending */
@@ -2523,13 +2874,21 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
{
int i;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ if (dd->flags & QIB_DCA_ENABLED) {
+ dca_remove_requester(&dd->pcidev->dev);
+ dd->flags &= ~QIB_DCA_ENABLED;
+ dd->cspec->dca_ctrl = 0;
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl);
+ }
+#endif
+
qib_7322_free_irq(dd);
kfree(dd->cspec->cntrs);
kfree(dd->cspec->sendchkenable);
kfree(dd->cspec->sendgrhchk);
kfree(dd->cspec->sendibchk);
kfree(dd->cspec->msix_entries);
- kfree(dd->cspec->msix_arg);
for (i = 0; i < dd->num_pports; i++) {
unsigned long flags;
u32 mask = QSFP_GPIO_MOD_PRS_N |
@@ -2597,8 +2956,9 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat)
char msg[128];
kills = istat & ~QIB_I_BITSEXTANT;
- qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:"
- " %s\n", (unsigned long long) kills, msg);
+ qib_dev_err(dd,
+ "Clearing reserved interrupt(s) 0x%016llx: %s\n",
+ (unsigned long long) kills, msg);
qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills));
}
@@ -2647,8 +3007,8 @@ static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd)
pins >>= SYM_LSB(EXTStatus, GPIOIn);
if (!(pins & mask)) {
++handled;
- qd->t_insert = get_jiffies_64();
- schedule_work(&qd->work);
+ qd->t_insert = jiffies;
+ queue_work(ib_wq, &qd->work);
}
}
}
@@ -2675,8 +3035,10 @@ static noinline void unlikely_7322_intr(struct qib_devdata *dd, u64 istat)
unknown_7322_ibits(dd, istat);
if (istat & QIB_I_GPIO)
unknown_7322_gpio_intr(dd);
- if (istat & QIB_I_C_ERROR)
- handle_7322_errors(dd);
+ if (istat & QIB_I_C_ERROR) {
+ qib_write_kreg(dd, kr_errmask, 0ULL);
+ tasklet_schedule(&dd->error_tasklet);
+ }
if (istat & INT_MASK_P(Err, 0) && dd->rcd[0])
handle_7322_p_errors(dd->rcd[0]->ppd);
if (istat & INT_MASK_P(Err, 1) && dd->rcd[1])
@@ -2753,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* handle "errors" of various kinds first, device ahead of port */
if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
@@ -2783,10 +3143,8 @@ static irqreturn_t qib_7322intr(int irq, void *data)
for (i = 0; i < dd->first_user_ctxt; i++) {
if (ctxtrbits & rmask) {
ctxtrbits &= ~rmask;
- if (dd->rcd[i]) {
+ if (dd->rcd[i])
qib_kreceive(dd->rcd[i], NULL, &npkts);
- adjust_rcv_timeout(dd->rcd[i], npkts);
- }
}
rmask <<= 1;
}
@@ -2826,16 +3184,13 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
(1ULL << QIB_I_RCVURG_LSB)) << rcd->ctxt);
qib_kreceive(rcd, NULL, &npkts);
- adjust_rcv_timeout(rcd, npkts);
return IRQ_HANDLED;
}
@@ -2856,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
@@ -2889,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -2918,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -2947,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -2977,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -2990,6 +3335,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
return IRQ_HANDLED;
}
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+ if (!m->dca)
+ return;
+ qib_devinfo(dd->pcidev,
+ "Disabling notifier on HCA %d irq %d\n",
+ dd->unit,
+ m->msix.vector);
+ irq_set_affinity_notifier(
+ m->msix.vector,
+ NULL);
+ m->notifier = NULL;
+}
+
+static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+ struct qib_irq_notify *n;
+
+ if (!m->dca)
+ return;
+ n = kzalloc(sizeof(*n), GFP_KERNEL);
+ if (n) {
+ int ret;
+
+ m->notifier = n;
+ n->notify.irq = m->msix.vector;
+ n->notify.notify = qib_irq_notifier_notify;
+ n->notify.release = qib_irq_notifier_release;
+ n->arg = m->arg;
+ n->rcv = m->rcv;
+ qib_devinfo(dd->pcidev,
+ "set notifier irq %d rcv %d notify %p\n",
+ n->notify.irq, n->rcv, &n->notify);
+ ret = irq_set_affinity_notifier(
+ n->notify.irq,
+ &n->notify);
+ if (ret) {
+ m->notifier = NULL;
+ kfree(n);
+ }
+ }
+}
+
+#endif
+
/*
* Set up our chip-specific interrupt handler.
* The interrupt type has already been setup, so
@@ -3003,6 +3395,8 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
int ret, i, msixnum;
u64 redirect[6];
u64 mask;
+ const struct cpumask *local_mask;
+ int firstcpu, secondcpu = 0, currrcvcpu = 0;
if (!dd->num_pports)
return;
@@ -3030,16 +3424,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
/* Try to get INTx interrupt */
try_intx:
if (!dd->pcidev->irq) {
- qib_dev_err(dd, "irq is 0, BIOS error? "
- "Interrupts won't work\n");
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
goto bail;
}
ret = request_irq(dd->pcidev->irq, qib_7322intr,
IRQF_SHARED, QIB_DRV_NAME, dd);
if (ret) {
- qib_dev_err(dd, "Couldn't setup INTx "
- "interrupt (irq=%d): %d\n",
- dd->pcidev->irq, ret);
+ qib_dev_err(dd,
+ "Couldn't setup INTx interrupt (irq=%d): %d\n",
+ dd->pcidev->irq, ret);
goto bail;
}
dd->cspec->irq = dd->pcidev->irq;
@@ -3051,13 +3445,31 @@ try_intx:
memset(redirect, 0, sizeof redirect);
mask = ~0ULL;
msixnum = 0;
+ local_mask = cpumask_of_pcibus(dd->pcidev->bus);
+ firstcpu = cpumask_first(local_mask);
+ if (firstcpu >= nr_cpu_ids ||
+ cpumask_weight(local_mask) == num_online_cpus()) {
+ local_mask = topology_core_cpumask(0);
+ firstcpu = cpumask_first(local_mask);
+ }
+ if (firstcpu < nr_cpu_ids) {
+ secondcpu = cpumask_next(firstcpu, local_mask);
+ if (secondcpu >= nr_cpu_ids)
+ secondcpu = firstcpu;
+ currrcvcpu = secondcpu;
+ }
for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) {
irq_handler_t handler;
- const char *name;
void *arg;
u64 val;
int lsb, reg, sh;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int dca = 0;
+#endif
+ dd->cspec->msix_entries[msixnum].
+ name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
+ = '\0';
if (i < ARRAY_SIZE(irq_table)) {
if (irq_table[i].port) {
/* skip if for a non-configured port */
@@ -3066,9 +3478,16 @@ try_intx:
arg = dd->pport + irq_table[i].port - 1;
} else
arg = dd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca = irq_table[i].dca;
+#endif
lsb = irq_table[i].lsb;
handler = irq_table[i].handler;
- name = irq_table[i].name;
+ snprintf(dd->cspec->msix_entries[msixnum].name,
+ sizeof(dd->cspec->msix_entries[msixnum].name)
+ - 1,
+ QIB_DRV_NAME "%d%s", dd->unit,
+ irq_table[i].name);
} else {
unsigned ctxt;
@@ -3077,25 +3496,41 @@ try_intx:
arg = dd->rcd[ctxt];
if (!arg)
continue;
+ if (qib_krcvq01_no_msi && ctxt < 2)
+ continue;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca = 1;
+#endif
lsb = QIB_I_RCVAVAIL_LSB + ctxt;
handler = qib_7322pintr;
- name = QIB_DRV_NAME " (kctx)";
+ snprintf(dd->cspec->msix_entries[msixnum].name,
+ sizeof(dd->cspec->msix_entries[msixnum].name)
+ - 1,
+ QIB_DRV_NAME "%d (kctx)", dd->unit);
}
- ret = request_irq(dd->cspec->msix_entries[msixnum].vector,
- handler, 0, name, arg);
+ ret = request_irq(
+ dd->cspec->msix_entries[msixnum].msix.vector,
+ handler, 0, dd->cspec->msix_entries[msixnum].name,
+ arg);
if (ret) {
/*
* Shouldn't happen since the enable said we could
* have as many as we are trying to setup here.
*/
- qib_dev_err(dd, "Couldn't setup MSIx "
- "interrupt (vec=%d, irq=%d): %d\n", msixnum,
- dd->cspec->msix_entries[msixnum].vector,
- ret);
+ qib_dev_err(dd,
+ "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n",
+ msixnum,
+ dd->cspec->msix_entries[msixnum].msix.vector,
+ ret);
qib_7322_nomsix(dd);
goto try_intx;
}
- dd->cspec->msix_arg[msixnum] = arg;
+ dd->cspec->msix_entries[msixnum].arg = arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->cspec->msix_entries[msixnum].dca = dca;
+ dd->cspec->msix_entries[msixnum].rcv =
+ handler == qib_7322pintr;
+#endif
if (lsb >= 0) {
reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -3105,12 +3540,33 @@ try_intx:
}
val = qib_read_kreg64(dd, 2 * msixnum + 1 +
(QIB_7322_MsixTable_OFFS / sizeof(u64)));
+ if (firstcpu < nr_cpu_ids &&
+ zalloc_cpumask_var(
+ &dd->cspec->msix_entries[msixnum].mask,
+ GFP_KERNEL)) {
+ if (handler == qib_7322pintr) {
+ cpumask_set_cpu(currrcvcpu,
+ dd->cspec->msix_entries[msixnum].mask);
+ currrcvcpu = cpumask_next(currrcvcpu,
+ local_mask);
+ if (currrcvcpu >= nr_cpu_ids)
+ currrcvcpu = secondcpu;
+ } else {
+ cpumask_set_cpu(firstcpu,
+ dd->cspec->msix_entries[msixnum].mask);
+ }
+ irq_set_affinity_hint(
+ dd->cspec->msix_entries[msixnum].msix.vector,
+ dd->cspec->msix_entries[msixnum].mask);
+ }
msixnum++;
}
/* Initialize the vector mapping */
for (i = 0; i < ARRAY_SIZE(redirect); i++)
qib_write_kreg(dd, kr_intredirect + i, redirect[i]);
dd->cspec->main_int_mask = mask;
+ tasklet_init(&dd->error_tasklet, qib_error_tasklet,
+ (unsigned long)dd);
bail:;
}
@@ -3157,6 +3613,10 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
case BOARD_QME7342:
n = "InfiniPath_QME7342";
break;
+ case 8:
+ n = "InfiniPath_QME7362";
+ dd->flags |= QIB_HAS_QSFP;
+ break;
case 15:
n = "InfiniPath_QLE7342_TEST";
dd->flags |= QIB_HAS_QSFP;
@@ -3183,8 +3643,9 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
(unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
- qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode"
- " by module parameter\n", dd->unit);
+ qib_devinfo(dd->pcidev,
+ "IB%u: Forced to single port mode by module parameter\n",
+ dd->unit);
features &= PORT_SPD_CAP;
}
@@ -3248,12 +3709,13 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
dd->pport->cpspec->ibsymdelta = 0;
dd->pport->cpspec->iblnkerrdelta = 0;
dd->pport->cpspec->ibmalfdelta = 0;
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
/*
* Keep chip from being accessed until we are ready. Use
* writeq() directly, to allow the write even though QIB_PRESENT
- * isnt' set.
+ * isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR);
dd->flags |= QIB_DOING_RESET;
@@ -3278,8 +3740,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
if (val == dd->revision)
break;
if (i == 5) {
- qib_dev_err(dd, "Failed to initialize after reset, "
- "unusable\n");
+ qib_dev_err(dd,
+ "Failed to initialize after reset, unusable\n");
ret = 0;
goto bail;
}
@@ -3290,7 +3752,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
if (msix_entries) {
/* restore the MSIx vector address and data if saved above */
for (i = 0; i < msix_entries; i++) {
- dd->cspec->msix_entries[i].entry = i;
+ dd->cspec->msix_entries[i].msix.entry = i;
if (!msix_vecsave || !msix_vecsave[2 * i])
continue;
qib_write_kreg(dd, 2 * i +
@@ -3310,8 +3772,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
if (qib_pcie_params(dd, dd->lbus_width,
&dd->cspec->num_msix_entries,
dd->cspec->msix_entries))
- qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
qib_setup_7322_interrupt(dd, 1);
@@ -3352,8 +3814,9 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
return;
}
if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -3475,11 +3938,6 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
nchipctxts = qib_read_kreg32(dd, kr_contextcnt);
dd->cspec->numctxts = nchipctxts;
if (qib_n_krcv_queues > 1 && dd->num_pports) {
- /*
- * Set the mask for which bits from the QPN are used
- * to select a context number.
- */
- dd->qpn_mask = 0x3f;
dd->first_user_ctxt = NUM_IB_PORTS +
(qib_n_krcv_queues - 1) * dd->num_pports;
if (dd->first_user_ctxt > nchipctxts)
@@ -3530,8 +3988,11 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
/* kr_rcvegrcnt changes based on the number of contexts enabled */
dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt);
- dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt,
- dd->num_pports > 1 ? 1024U : 2048U);
+ if (qib_rcvhdrcnt)
+ dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt);
+ else
+ dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt,
+ dd->num_pports > 1 ? 1024U : 2048U);
}
static int qib_7322_get_ib_cfg(struct qib_pportdata *ppd, int which)
@@ -3683,7 +4144,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
/*
* As with width, only write the actual register if the
* link is currently down, otherwise takes effect on next
- * link change. Since setting is being explictly requested
+ * link change. Since setting is being explicitly requested
* (via MAD or sysfs), clear autoneg failure status if speed
* autoneg is enabled.
*/
@@ -3909,8 +4370,9 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what)
Loopback);
/* enable heart beat again */
val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB;
- qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
- "(normal)\n", ppd->dd->unit, ppd->port);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
} else
ret = -EINVAL;
if (!ret) {
@@ -4002,12 +4464,20 @@ static int qib_7322_set_ib_table(struct qib_pportdata *ppd, int which, void *t)
}
static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd,
- u32 updegr, u32 egrhd)
+ u32 updegr, u32 egrhd, u32 npkts)
{
- qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
- qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ /*
+ * Need to write timeout register before updating rcvhdrhead to ensure
+ * that the timer is enabled on reception of a packet.
+ */
+ if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT)
+ adjust_rcv_timeout(rcd, npkts);
if (updegr)
qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
+ mmiowb();
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ mmiowb();
}
static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd)
@@ -4113,7 +4583,7 @@ static void rcvctrl_7322_mod(struct qib_pportdata *ppd, unsigned int op,
* Init the context registers also; if we were
* disabled, tail and head should both be zero
* already from the enable, but since we don't
- * know, we have to do it explictly.
+ * know, we have to do it explicitly.
*/
val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt);
qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt);
@@ -4586,8 +5056,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd)
dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
if (!dd->pport[i].cpspec->portcntrs)
- qib_dev_err(dd, "Failed allocation for"
- " portcounters\n");
+ qib_dev_err(dd,
+ "Failed allocation for portcounters\n");
}
}
@@ -4716,7 +5186,7 @@ static void qib_get_7322_faststats(unsigned long opaque)
(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED |
QIBL_LINKACTIVE)) &&
ppd->cpspec->qdr_dfe_time &&
- time_after64(get_jiffies_64(), ppd->cpspec->qdr_dfe_time)) {
+ time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) {
ppd->cpspec->qdr_dfe_on = 0;
qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
@@ -4737,8 +5207,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd)
if (!dd->cspec->num_msix_entries)
return 0; /* already using INTx */
- qib_devinfo(dd->pcidev, "MSIx interrupt not detected,"
- " trying INTx interrupts\n");
+ qib_devinfo(dd->pcidev,
+ "MSIx interrupt not detected, trying INTx interrupts\n");
qib_7322_nomsix(dd);
qib_enable_intx(dd->pcidev);
qib_setup_7322_interrupt(dd, 0);
@@ -4926,8 +5396,8 @@ static void try_7322_autoneg(struct qib_pportdata *ppd)
set_7322_ibspeed_fast(ppd, QIB_IB_DDR);
qib_7322_mini_pcs_reset(ppd);
/* 2 msec is minimum length of a poll cycle */
- schedule_delayed_work(&ppd->cpspec->autoneg_work,
- msecs_to_jiffies(2));
+ queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work,
+ msecs_to_jiffies(2));
}
/*
@@ -5023,15 +5493,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
goto retry;
if (!ibp->smi_ah) {
- struct ib_ah_attr attr;
struct ib_ah *ah;
- memset(&attr, 0, sizeof attr);
- attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE);
- attr.port_num = ppd->port;
- ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr);
+ ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE));
if (IS_ERR(ah))
- ret = -EINVAL;
+ ret = PTR_ERR(ah);
else {
send_buf->ah = ah;
ibp->smi_ah = to_iah(ah);
@@ -5057,7 +5523,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
ib_free_send_mad(send_buf);
retry:
delay = 2 << ppd->cpspec->ipg_tries;
- schedule_delayed_work(&ppd->cpspec->ipg_work, msecs_to_jiffies(delay));
+ queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work,
+ msecs_to_jiffies(delay));
}
/*
@@ -5149,6 +5616,8 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
QIBL_IB_AUTONEG_INPROG)))
set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled);
if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+ struct qib_qsfp_data *qd =
+ &ppd->cpspec->qsfp_data;
/* unlock the Tx settings, speed may change */
qib_write_kreg_port(ppd, krp_tx_deemph_override,
SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
@@ -5156,6 +5625,12 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
qib_cancel_sends(ppd);
/* on link down, ensure sane pcs state */
qib_7322_mini_pcs_reset(ppd);
+ /* schedule the qsfp refresh which should turn the link
+ off */
+ if (ppd->dd->flags & QIB_HAS_QSFP) {
+ qd->t_insert = jiffies;
+ queue_work(ib_wq, &qd->work);
+ }
spin_lock_irqsave(&ppd->sdma_lock, flags);
if (__qib_sdma_running(ppd))
__qib_sdma_process_event(ppd,
@@ -5505,37 +5980,79 @@ static void qsfp_7322_event(struct work_struct *work)
{
struct qib_qsfp_data *qd;
struct qib_pportdata *ppd;
- u64 pwrup;
+ unsigned long pwrup;
+ unsigned long flags;
int ret;
u32 le2;
qd = container_of(work, struct qib_qsfp_data, work);
ppd = qd->ppd;
- pwrup = qd->t_insert + msecs_to_jiffies(QSFP_PWR_LAG_MSEC);
+ pwrup = qd->t_insert +
+ msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC);
- /*
- * Some QSFP's not only do not respond until the full power-up
- * time, but may behave badly if we try. So hold off responding
- * to insertion.
- */
- while (1) {
- u64 now = get_jiffies_64();
- if (time_after64(now, pwrup))
- break;
- msleep(1);
+ /* Delay for 20 msecs to allow ModPrs resistor to setup */
+ mdelay(QSFP_MODPRS_LAG_MSEC);
+
+ if (!qib_qsfp_mod_present(ppd)) {
+ ppd->cpspec->qsfp_data.modpresent = 0;
+ /* Set the physical link to disabled */
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_LINKV;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else {
+ /*
+ * Some QSFP's not only do not respond until the full power-up
+ * time, but may behave badly if we try. So hold off responding
+ * to insertion.
+ */
+ while (1) {
+ if (time_is_before_jiffies(pwrup))
+ break;
+ msleep(20);
+ }
+
+ ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
+
+ /*
+ * Need to change LE2 back to defaults if we couldn't
+ * read the cable type (to handle cable swaps), so do this
+ * even on failure to read cable information. We don't
+ * get here for QME, so IS_QME check not needed here.
+ */
+ if (!ret && !ppd->dd->cspec->r1) {
+ if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
+ le2 = LE2_QME;
+ else if (qd->cache.atten[1] >= qib_long_atten &&
+ QSFP_IS_CU(qd->cache.tech))
+ le2 = LE2_5m;
+ else
+ le2 = LE2_DEFAULT;
+ } else
+ le2 = LE2_DEFAULT;
+ ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
+ /*
+ * We always change parameteters, since we can choose
+ * values for cables without eeproms, and the cable may have
+ * changed from a cable with full or partial eeprom content
+ * to one with partial or no content.
+ */
+ init_txdds_table(ppd, 0);
+ /* The physical link is being re-enabled only when the
+ * previous state was DISABLED and the VALID bit is not
+ * set. This should only happen when the cable has been
+ * physically pulled. */
+ if (!ppd->cpspec->qsfp_data.modpresent &&
+ (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) {
+ ppd->cpspec->qsfp_data.modpresent = 1;
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_LINKV;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
}
- ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
- /*
- * Need to change LE2 back to defaults if we couldn't
- * read the cable type (to handle cable swaps), so do this
- * even on failure to read cable information. We don't
- * get here for QME, so IS_QME check not needed here.
- */
- le2 = (!ret && qd->cache.atten[1] >= qib_long_atten &&
- !ppd->dd->cspec->r1 && QSFP_IS_CU(qd->cache.tech)) ?
- LE2_5m : LE2_DEFAULT;
- ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
- init_txdds_table(ppd, 0);
}
/*
@@ -5579,6 +6096,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
u32 pidx, unit, port, deflt, h1;
unsigned long val;
int any = 0, seth1;
+ int txdds_size;
str = txselect_list;
@@ -5587,6 +6105,10 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
for (pidx = 0; pidx < dd->num_pports; ++pidx)
dd->pport[pidx].cpspec->no_eep = deflt;
+ txdds_size = TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ;
+ if (IS_QME(dd) || IS_QMH(dd))
+ txdds_size += TXDDS_MFG_SZ;
+
while (*nxt && nxt[1]) {
str = ++nxt;
unit = simple_strtoul(str, &nxt, 0);
@@ -5609,7 +6131,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
;
continue;
}
- if (val >= TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)
+ if (val >= txdds_size)
continue;
seth1 = 0;
h1 = 0; /* gcc thinks it might be used uninitted */
@@ -5633,6 +6155,12 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
ppd->cpspec->h1_val = h1;
/* now change the IBC and serdes, overriding generic */
init_txdds_table(ppd, 1);
+ /* Re-enable the physical state machine on mezz boards
+ * now that the correct settings have been set.
+ * QSFP boards are handles by the QSFP event handler */
+ if (IS_QMH(dd) || IS_QME(dd))
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
any++;
}
if (*nxt == '\n')
@@ -5656,15 +6184,14 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
unsigned long val;
char *n;
if (strlen(str) >= MAX_ATTEN_LEN) {
- printk(KERN_INFO QIB_DRV_NAME " txselect_values string "
- "too long\n");
+ pr_info("txselect_values string too long\n");
return -ENOSPC;
}
val = simple_strtoul(str, &n, 0);
- if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)) {
- printk(KERN_INFO QIB_DRV_NAME
- "txselect_values must start with a number < %d\n",
- TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ TXDDS_MFG_SZ)) {
+ pr_info("txselect_values must start with a number < %d\n",
+ TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
return -EINVAL;
}
strcpy(txselect_list, str);
@@ -5691,11 +6218,10 @@ static int qib_late_7322_initreg(struct qib_devdata *dd)
qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
val = qib_read_kreg64(dd, kr_sendpioavailaddr);
if (val != dd->pioavailregs_phys) {
- qib_dev_err(dd, "Catastrophic software error, "
- "SendPIOAvailAddr written as %lx, "
- "read back as %llx\n",
- (unsigned long) dd->pioavailregs_phys,
- (unsigned long long) val);
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
ret = -EINVAL;
}
@@ -5810,7 +6336,8 @@ static void write_7322_initregs(struct qib_devdata *dd)
unsigned n, regno;
unsigned long flags;
- if (!dd->qpn_mask || !dd->pport[pidx].link_speed_supported)
+ if (dd->n_krcv_queues < 2 ||
+ !dd->pport[pidx].link_speed_supported)
continue;
ppd = &dd->pport[pidx];
@@ -5906,8 +6433,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
dd->revision = readq(&dd->kregbase[kr_revision]);
if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
- qib_dev_err(dd, "Revision register read failure, "
- "giving up initialization\n");
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
ret = -ENODEV;
goto bail;
}
@@ -6017,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
}
dd->num_pports++;
- qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ if (ret) {
+ dd->num_pports--;
+ goto bail;
+ }
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_width_enabled = IB_WIDTH_4X;
@@ -6073,9 +6604,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
*/
if (!(dd->flags & QIB_HAS_QSFP)) {
if (!IS_QMH(dd) && !IS_QME(dd))
- qib_devinfo(dd->pcidev, "IB%u:%u: "
- "Unknown mezzanine card type\n",
- dd->unit, ppd->port);
+ qib_devinfo(dd->pcidev,
+ "IB%u:%u: Unknown mezzanine card type\n",
+ dd->unit, ppd->port);
cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME;
/*
* Choose center value as default tx serdes setting
@@ -6097,12 +6628,16 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
ppd++;
}
- dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE;
- dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE;
+ dd->rcvhdrentsize = qib_rcvhdrentsize ?
+ qib_rcvhdrentsize : QIB_RCVHDR_ENTSIZE;
+ dd->rcvhdrsize = qib_rcvhdrsize ?
+ qib_rcvhdrsize : QIB_DFLT_RCVHDRSIZE;
dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
/* we always allocate at least 2048 bytes for eager buffers */
dd->rcvegrbufsize = max(mtu, 2048);
+ BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+ dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7322_tidtemplate(dd);
@@ -6143,8 +6678,10 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
dd->piobcnt4k * dd->align4k;
dd->piovl15base = ioremap_nocache(vl15off,
NUM_VL15_BUFS * dd->align4k);
- if (!dd->piovl15base)
+ if (!dd->piovl15base) {
+ ret = -ENOMEM;
goto bail;
+ }
}
qib_7322_set_baseaddrs(dd); /* set chip access pointers now */
@@ -6183,6 +6720,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
dd->cspec->sdmabufcnt;
dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+ dd->last_pio = dd->cspec->lastbuf_for_pio;
dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ?
dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0;
@@ -6246,6 +6784,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
}
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+static void dump_sdma_7322_state(struct qib_pportdata *ppd)
+{
+ u64 reg, reg1, reg2;
+
+ reg = qib_read_kreg_port(ppd, krp_senddmastatus);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmastatus: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_sendctrl);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sendctrl: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmabase);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmabase: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+
+ /* get bufuse bits, clear them, and print them again if non-zero */
+ reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
+ /* 0 and 1 should always be zero, so print as short form */
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+ reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+ /* 0 and 1 should always be zero, so print as short form */
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmatail);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmatail: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmahead);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmahead: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmaheadaddr: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmalengen);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmalengen: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmadesccnt: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmaidlecnt: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmapriorityhld: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmareloadcnt: 0x%016llx\n", reg);
+
+ dump_sdma_state(ppd);
+}
+
static struct sdma_set_state_action sdma_7322_action_table[] = {
[qib_sdma_state_s00_hw_down] = {
.go_s99_running_tofalse = 1,
@@ -6495,7 +7113,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
/* make sure we see an updated copy next time around */
sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
sleeps++;
- msleep(1);
+ msleep(20);
}
switch (which) {
@@ -6679,6 +7297,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
dd->f_sdma_init_early = qib_7322_sdma_init_early;
dd->f_writescratch = writescratch;
dd->f_tempsense_rd = qib_7322_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_7322_notify_dca;
+#endif
/*
* Do remaining PCIe setup and save PCIe values in dd.
* Any error printing is already done by the init code.
@@ -6710,21 +7331,23 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
(i >= ARRAY_SIZE(irq_table) &&
dd->rcd[i - ARRAY_SIZE(irq_table)]))
actual_cnt++;
+ /* reduce by ctxt's < 2 */
+ if (qib_krcvq01_no_msi)
+ actual_cnt -= dd->num_pports;
+
tabsize = actual_cnt;
- dd->cspec->msix_entries = kmalloc(tabsize *
- sizeof(struct msix_entry), GFP_KERNEL);
- dd->cspec->msix_arg = kmalloc(tabsize *
- sizeof(void *), GFP_KERNEL);
- if (!dd->cspec->msix_entries || !dd->cspec->msix_arg) {
+ dd->cspec->msix_entries = kzalloc(tabsize *
+ sizeof(struct qib_msix_entry), GFP_KERNEL);
+ if (!dd->cspec->msix_entries) {
qib_dev_err(dd, "No memory for MSIx table\n");
tabsize = 0;
}
for (i = 0; i < tabsize; i++)
- dd->cspec->msix_entries[i].entry = i;
+ dd->cspec->msix_entries[i].msix.entry = i;
if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
- qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
/* may be less than we wanted, if not enough available */
dd->cspec->num_msix_entries = tabsize;
@@ -6733,7 +7356,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
/* clear diagctrl register, in case diags were running and crashed */
qib_write_kreg(dd, kr_hwdiagctrl, 0);
-
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ if (!dca_add_requester(&pdev->dev)) {
+ qib_devinfo(dd->pcidev, "DCA enabled\n");
+ dd->flags |= QIB_DCA_ENABLED;
+ qib_setup_dca(dd);
+ }
+#endif
goto bail;
bail_cleanup:
@@ -6948,15 +7577,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = {
{ 0, 0, 0, 1 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 2 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 2 }, /* QMH7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
- { 0, 0, 0, 11 }, /* QME7342 backplane settings */
{ 0, 0, 0, 3 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 4 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
};
static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
@@ -6965,15 +7599,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
{ 0, 0, 0, 7 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 8 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 8 }, /* QMH7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
- { 0, 0, 0, 13 }, /* QME7342 backplane settings */
{ 0, 0, 0, 9 }, /* QMH7342 backplane settings */
{ 0, 0, 0, 10 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
};
static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
@@ -6982,15 +7621,26 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
{ 0, 1, 0, 5 }, /* QMH7342 backplane settings */
{ 0, 1, 0, 6 }, /* QMH7342 backplane settings */
{ 0, 1, 0, 8 }, /* QMH7342 backplane settings */
- { 0, 1, 12, 10 }, /* QME7342 backplane setting */
- { 0, 1, 12, 11 }, /* QME7342 backplane setting */
- { 0, 1, 12, 12 }, /* QME7342 backplane setting */
- { 0, 1, 12, 14 }, /* QME7342 backplane setting */
- { 0, 1, 12, 6 }, /* QME7342 backplane setting */
- { 0, 1, 12, 7 }, /* QME7342 backplane setting */
- { 0, 1, 12, 8 }, /* QME7342 backplane setting */
{ 0, 1, 0, 10 }, /* QMH7342 backplane settings */
{ 0, 1, 0, 12 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
+};
+
+static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
+ /* amp, pre, main, post */
+ { 0, 0, 0, 0 }, /* QME7342 mfg settings */
+ { 0, 0, 0, 6 }, /* QME7342 P2 mfg settings */
};
static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds,
@@ -7035,7 +7685,8 @@ static void find_best_ent(struct qib_pportdata *ppd,
}
}
- /* Lookup serdes setting by cable type and attenuation */
+ /* Active cables don't have attenuation so we only set SERDES
+ * settings to account for the attenuation of the board traces. */
if (!override && QSFP_IS_ACTIVE(qd->tech)) {
*sdr_dds = txdds_sdr + ppd->dd->board_atten;
*ddr_dds = txdds_ddr + ppd->dd->board_atten;
@@ -7066,6 +7717,15 @@ static void find_best_ent(struct qib_pportdata *ppd,
*sdr_dds = &txdds_extra_sdr[idx];
*ddr_dds = &txdds_extra_ddr[idx];
*qdr_dds = &txdds_extra_qdr[idx];
+ } else if ((IS_QME(ppd->dd) || IS_QMH(ppd->dd)) &&
+ ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ TXDDS_MFG_SZ)) {
+ idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
+ pr_info("IB%u:%u use idx %u into txdds_mfg\n",
+ ppd->dd->unit, ppd->port, idx);
+ *sdr_dds = &txdds_extra_mfg[idx];
+ *ddr_dds = &txdds_extra_mfg[idx];
+ *qdr_dds = &txdds_extra_mfg[idx];
} else {
/* this shouldn't happen, it's range checked */
*sdr_dds = txdds_sdr + qib_long_atten;
@@ -7210,9 +7870,35 @@ static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data,
}
}
+static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable)
+{
+ u64 data = qib_read_kreg_port(ppd, krp_serdesctrl);
+ u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN);
+
+ if (enable && !state) {
+ pr_info("IB%u:%u Turning LOS on\n",
+ ppd->dd->unit, ppd->port);
+ data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+ } else if (!enable && state) {
+ pr_info("IB%u:%u Turning LOS off\n",
+ ppd->dd->unit, ppd->port);
+ data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+ }
+ qib_write_kreg_port(ppd, krp_serdesctrl, data);
+}
+
static int serdes_7322_init(struct qib_pportdata *ppd)
{
- u64 data;
+ int ret = 0;
+ if (ppd->dd->cspec->r1)
+ ret = serdes_7322_init_old(ppd);
+ else
+ ret = serdes_7322_init_new(ppd);
+ return ret;
+}
+
+static int serdes_7322_init_old(struct qib_pportdata *ppd)
+{
u32 le_val;
/*
@@ -7270,11 +7956,7 @@ static int serdes_7322_init(struct qib_pportdata *ppd)
ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */
ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */
- data = qib_read_kreg_port(ppd, krp_serdesctrl);
- /* Turn off IB latency mode */
- data &= ~SYM_MASK(IBSerdesCtrl_0, IB_LAT_MODE);
- qib_write_kreg_port(ppd, krp_serdesctrl, data |
- SYM_MASK(IBSerdesCtrl_0, RXLOSEN));
+ serdes_7322_los_enable(ppd, 1);
/* rxbistena; set 0 to avoid effects of it switch later */
ibsd_wr_allchans(ppd, 9, 0 << 15, 1 << 15);
@@ -7314,6 +7996,202 @@ static int serdes_7322_init(struct qib_pportdata *ppd)
return 0;
}
+static int serdes_7322_init_new(struct qib_pportdata *ppd)
+{
+ unsigned long tend;
+ u32 le_val, rxcaldone;
+ int chan, chan_done = (1 << SERDES_CHANS) - 1;
+
+ /* Clear cmode-override, may be set from older driver */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14);
+
+ /* ensure no tx overrides from earlier driver loads */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
+ /* START OF LSI SUGGESTED SERDES BRINGUP */
+ /* Reset - Calibration Setup */
+ /* Stop DFE adaptaion */
+ ibsd_wr_allchans(ppd, 1, 0, BMASK(9, 1));
+ /* Disable LE1 */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(5, 5));
+ /* Disable autoadapt for LE1 */
+ ibsd_wr_allchans(ppd, 1, 0, BMASK(15, 15));
+ /* Disable LE2 */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(6, 6));
+ /* Disable VGA */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
+ /* Disable AFE Offset Cancel */
+ ibsd_wr_allchans(ppd, 12, 0, BMASK(12, 12));
+ /* Disable Timing Loop */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(3, 3));
+ /* Disable Frequency Loop */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(4, 4));
+ /* Disable Baseline Wander Correction */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(13, 13));
+ /* Disable RX Calibration */
+ ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10));
+ /* Disable RX Offset Calibration */
+ ibsd_wr_allchans(ppd, 12, 0, BMASK(4, 4));
+ /* Select BB CDR */
+ ibsd_wr_allchans(ppd, 2, (1 << 15), BMASK(15, 15));
+ /* CDR Step Size */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(9, 8));
+ /* Enable phase Calibration */
+ ibsd_wr_allchans(ppd, 12, (1 << 5), BMASK(5, 5));
+ /* DFE Bandwidth [2:14-12] */
+ ibsd_wr_allchans(ppd, 2, (4 << 12), BMASK(14, 12));
+ /* DFE Config (4 taps only) */
+ ibsd_wr_allchans(ppd, 16, 0, BMASK(1, 0));
+ /* Gain Loop Bandwidth */
+ if (!ppd->dd->cspec->r1) {
+ ibsd_wr_allchans(ppd, 12, 1 << 12, BMASK(12, 12));
+ ibsd_wr_allchans(ppd, 12, 2 << 8, BMASK(11, 8));
+ } else {
+ ibsd_wr_allchans(ppd, 19, (3 << 11), BMASK(13, 11));
+ }
+ /* Baseline Wander Correction Gain [13:4-0] (leave as default) */
+ /* Baseline Wander Correction Gain [3:7-5] (leave as default) */
+ /* Data Rate Select [5:7-6] (leave as default) */
+ /* RX Parallel Word Width [3:10-8] (leave as default) */
+
+ /* RX REST */
+ /* Single- or Multi-channel reset */
+ /* RX Analog reset */
+ /* RX Digital reset */
+ ibsd_wr_allchans(ppd, 0, 0, BMASK(15, 13));
+ msleep(20);
+ /* RX Analog reset */
+ ibsd_wr_allchans(ppd, 0, (1 << 14), BMASK(14, 14));
+ msleep(20);
+ /* RX Digital reset */
+ ibsd_wr_allchans(ppd, 0, (1 << 13), BMASK(13, 13));
+ msleep(20);
+
+ /* setup LoS params; these are subsystem, so chan == 5 */
+ /* LoS filter threshold_count on, ch 0-3, set to 8 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4));
+
+ /* LoS filter threshold_count off, ch 0-3, set to 4 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8));
+
+ /* LoS filter select enabled */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15);
+
+ /* LoS target data: SDR=4, DDR=2, QDR=1 */
+ ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */
+ ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */
+ ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */
+
+ /* Turn on LOS on initial SERDES init */
+ serdes_7322_los_enable(ppd, 1);
+ /* FLoop LOS gate: PPM filter enabled */
+ ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10);
+
+ /* RX LATCH CALIBRATION */
+ /* Enable Eyefinder Phase Calibration latch */
+ ibsd_wr_allchans(ppd, 15, 1, BMASK(0, 0));
+ /* Enable RX Offset Calibration latch */
+ ibsd_wr_allchans(ppd, 12, (1 << 4), BMASK(4, 4));
+ msleep(20);
+ /* Start Calibration */
+ ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10));
+ tend = jiffies + msecs_to_jiffies(500);
+ while (chan_done && !time_is_before_jiffies(tend)) {
+ msleep(20);
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
+ (chan + (chan >> 1)),
+ 25, 0, 0);
+ if ((~rxcaldone & (u32)BMASK(9, 9)) == 0 &&
+ (~chan_done & (1 << chan)) == 0)
+ chan_done &= ~(1 << chan);
+ }
+ }
+ if (chan_done) {
+ pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n",
+ IBSD(ppd->hw_pidx), chan_done);
+ } else {
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
+ (chan + (chan >> 1)),
+ 25, 0, 0);
+ if ((~rxcaldone & (u32)BMASK(10, 10)) == 0)
+ pr_info("Serdes %d chan %d calibration failed\n",
+ IBSD(ppd->hw_pidx), chan);
+ }
+ }
+
+ /* Turn off Calibration */
+ ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10));
+ msleep(20);
+
+ /* BRING RX UP */
+ /* Set LE2 value (May be overridden in qsfp_7322_event) */
+ le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT;
+ ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7));
+ /* Set LE2 Loop bandwidth */
+ ibsd_wr_allchans(ppd, 3, (7 << 5), BMASK(7, 5));
+ /* Enable LE2 */
+ ibsd_wr_allchans(ppd, 13, (1 << 6), BMASK(6, 6));
+ msleep(20);
+ /* Enable H0 only */
+ ibsd_wr_allchans(ppd, 1, 1, BMASK(9, 1));
+ /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */
+ le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac;
+ ibsd_wr_allchans(ppd, 21, le_val, 0xfffe);
+ /* Enable VGA */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
+ msleep(20);
+ /* Set Frequency Loop Bandwidth */
+ ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5));
+ /* Enable Frequency Loop */
+ ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4));
+ /* Set Timing Loop Bandwidth */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9));
+ /* Enable Timing Loop */
+ ibsd_wr_allchans(ppd, 2, (1 << 3), BMASK(3, 3));
+ msleep(50);
+ /* Enable DFE
+ * Set receive adaptation mode. SDR and DDR adaptation are
+ * always on, and QDR is initially enabled; later disabled.
+ */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN);
+ ppd->cpspec->qdr_dfe_on = 1;
+ /* Disable LE1 */
+ ibsd_wr_allchans(ppd, 13, (0 << 5), (1 << 5));
+ /* Disable auto adapt for LE1 */
+ ibsd_wr_allchans(ppd, 1, (0 << 15), BMASK(15, 15));
+ msleep(20);
+ /* Enable AFE Offset Cancel */
+ ibsd_wr_allchans(ppd, 12, (1 << 12), BMASK(12, 12));
+ /* Enable Baseline Wander Correction */
+ ibsd_wr_allchans(ppd, 12, (1 << 13), BMASK(13, 13));
+ /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */
+ ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11));
+ /* VGA output common mode */
+ ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2));
+
+ /*
+ * Initialize the Tx DDS tables. Also done every QSFP event,
+ * for adapters with QSFP
+ */
+ init_txdds_table(ppd, 0);
+
+ return 0;
+}
+
/* start adjust QMH serdes parameters */
static void set_man_code(struct qib_pportdata *ppd, int chan, int code)
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index f3b50393604..8d3c78ddc90 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -37,9 +37,22 @@
#include <linux/vmalloc.h>
#include <linux/delay.h>
#include <linux/idr.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
#include "qib.h"
#include "qib_common.h"
+#include "qib_mad.h"
+#ifdef CONFIG_DEBUG_FS
+#include "qib_debugfs.h"
+#include "qib_verbs.h"
+#endif
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
/*
* min buffers we want to have per context, after driver
@@ -58,6 +71,11 @@ ushort qib_cfgctxts;
module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
+unsigned qib_numa_aware;
+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
+MODULE_PARM_DESC(numa_aware,
+ "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
+
/*
* If set, do not write to any regs if avoidable, hack to allow
* check for deranged default register values.
@@ -70,6 +88,9 @@ unsigned qib_n_krcv_queues;
module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
+unsigned qib_cc_table_size;
+module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
/*
* qib_wc_pat parameter:
* 0 is WC via MTRR
@@ -80,9 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
-struct workqueue_struct *qib_wq;
-struct workqueue_struct *qib_cq_wq;
-
static void verify_interrupt(unsigned long);
static struct idr qib_unit_table;
@@ -92,14 +110,18 @@ unsigned long *qib_cpulist;
/* set number of contexts we'll actually use */
void qib_set_ctxtcnt(struct qib_devdata *dd)
{
- if (!qib_cfgctxts)
+ if (!qib_cfgctxts) {
dd->cfgctxts = dd->first_user_ctxt + num_online_cpus();
- else if (qib_cfgctxts < dd->num_pports)
+ if (dd->cfgctxts > dd->ctxtcnt)
+ dd->cfgctxts = dd->ctxtcnt;
+ } else if (qib_cfgctxts < dd->num_pports)
dd->cfgctxts = dd->ctxtcnt;
else if (qib_cfgctxts <= dd->ctxtcnt)
dd->cfgctxts = qib_cfgctxts;
else
dd->cfgctxts = dd->ctxtcnt;
+ dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
+ dd->cfgctxts - dd->first_user_ctxt;
}
/*
@@ -108,7 +130,11 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
int qib_create_ctxts(struct qib_devdata *dd)
{
unsigned i;
- int ret;
+ int local_node_id = pcibus_to_node(dd->pcidev->bus);
+
+ if (local_node_id < 0)
+ local_node_id = numa_node_id();
+ dd->assigned_node_id = local_node_id;
/*
* Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -116,10 +142,9 @@ int qib_create_ctxts(struct qib_devdata *dd)
*/
dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL);
if (!dd->rcd) {
- qib_dev_err(dd, "Unable to allocate ctxtdata array, "
- "failing\n");
- ret = -ENOMEM;
- goto done;
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata array, failing\n");
+ return -ENOMEM;
}
/* create (one or more) kctxt */
@@ -131,38 +156,51 @@ int qib_create_ctxts(struct qib_devdata *dd)
continue;
ppd = dd->pport + (i % dd->num_pports);
- rcd = qib_create_ctxtdata(ppd, i);
+
+ rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
if (!rcd) {
- qib_dev_err(dd, "Unable to allocate ctxtdata"
- " for Kernel ctxt, failing\n");
- ret = -ENOMEM;
- goto done;
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
+ kfree(dd->rcd);
+ dd->rcd = NULL;
+ return -ENOMEM;
}
rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
rcd->seq_cnt = 1;
}
- ret = 0;
-done:
- return ret;
+ return 0;
}
/*
* Common code for user and kernel context setup.
*/
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
+ int node_id)
{
struct qib_devdata *dd = ppd->dd;
struct qib_ctxtdata *rcd;
- rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+ rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
if (rcd) {
INIT_LIST_HEAD(&rcd->qp_wait_list);
+ rcd->node_id = node_id;
rcd->ppd = ppd;
rcd->dd = dd;
rcd->cnt = 1;
rcd->ctxt = ctxt;
dd->rcd[ctxt] = rcd;
-
+#ifdef CONFIG_DEBUG_FS
+ if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+ GFP_KERNEL, node_id);
+ if (!rcd->opstats) {
+ kfree(rcd);
+ qib_dev_err(dd,
+ "Unable to allocate per ctxt stats buffer\n");
+ return NULL;
+ }
+ }
+#endif
dd->f_init_ctxt(rcd);
/*
@@ -182,6 +220,9 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
rcd->rcvegrbufs_perchunk - 1) /
rcd->rcvegrbufs_perchunk;
+ BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
+ rcd->rcvegrbufs_perchunk_shift =
+ ilog2(rcd->rcvegrbufs_perchunk);
}
return rcd;
}
@@ -189,20 +230,101 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
/*
* Common code for initializing the physical port structure.
*/
-void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port)
{
+ int size;
ppd->dd = dd;
ppd->hw_pidx = hw_pidx;
ppd->port = port; /* IB port number, not index */
spin_lock_init(&ppd->sdma_lock);
spin_lock_init(&ppd->lflags_lock);
+ spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
init_timer(&ppd->symerr_clear_timer);
ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
ppd->symerr_clear_timer.data = (unsigned long)ppd;
+
+ ppd->qib_wq = NULL;
+ ppd->ibport_data.pmastats =
+ alloc_percpu(struct qib_pma_counters);
+ if (!ppd->ibport_data.pmastats)
+ return -ENOMEM;
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
+ goto bail;
+
+ ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
+ IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
+
+ ppd->cc_max_table_entries =
+ ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
+
+ size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
+ * IB_CCT_ENTRIES;
+ ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion control table for port %d!\n",
+ port);
+ goto bail;
+ }
+
+ size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
+ ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion setting list for port %d!\n",
+ port);
+ goto bail_1;
+ }
+
+ size = sizeof(struct cc_table_shadow);
+ ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow ccti list for port %d!\n",
+ port);
+ goto bail_2;
+ }
+
+ size = sizeof(struct ib_cc_congestion_setting_attr);
+ ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow congestion setting list for port %d!\n",
+ port);
+ goto bail_3;
+ }
+
+ return 0;
+
+bail_3:
+ kfree(ppd->ccti_entries_shadow);
+ ppd->ccti_entries_shadow = NULL;
+bail_2:
+ kfree(ppd->congestion_entries);
+ ppd->congestion_entries = NULL;
+bail_1:
+ kfree(ppd->ccti_entries);
+ ppd->ccti_entries = NULL;
+bail:
+ /* User is intentionally disabling the congestion control agent */
+ if (!qib_cc_table_size)
+ return 0;
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
+ qib_cc_table_size = 0;
+ qib_dev_err(dd,
+ "Congestion Control table size %d less than minimum %d for port %d\n",
+ qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
+ }
+
+ qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
+ port);
+ return 0;
}
static int init_pioavailregs(struct qib_devdata *dd)
@@ -214,8 +336,8 @@ static int init_pioavailregs(struct qib_devdata *dd)
&dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys,
GFP_KERNEL);
if (!dd->pioavailregs_dma) {
- qib_dev_err(dd, "failed to allocate PIOavail reg area "
- "in memory\n");
+ qib_dev_err(dd,
+ "failed to allocate PIOavail reg area in memory\n");
ret = -ENOMEM;
goto done;
}
@@ -268,23 +390,20 @@ static void init_shadow_tids(struct qib_devdata *dd)
struct page **pages;
dma_addr_t *addrs;
- pages = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
+ pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
if (!pages) {
- qib_dev_err(dd, "failed to allocate shadow page * "
- "array, no expected sends!\n");
+ qib_dev_err(dd,
+ "failed to allocate shadow page * array, no expected sends!\n");
goto bail;
}
- addrs = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
+ addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
if (!addrs) {
- qib_dev_err(dd, "failed to allocate shadow dma handle "
- "array, no expected sends!\n");
+ qib_dev_err(dd,
+ "failed to allocate shadow dma handle array, no expected sends!\n");
goto bail_free;
}
- memset(pages, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
- memset(addrs, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
-
dd->pageshadow = pages;
dd->physshadow = addrs;
return;
@@ -305,13 +424,13 @@ static int loadtime_init(struct qib_devdata *dd)
if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) &
QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) {
- qib_dev_err(dd, "Driver only handles version %d, "
- "chip swversion is %d (%llx), failng\n",
- QIB_CHIP_SWVERSION,
- (int)(dd->revision >>
+ qib_dev_err(dd,
+ "Driver only handles version %d, chip swversion is %d (%llx), failng\n",
+ QIB_CHIP_SWVERSION,
+ (int)(dd->revision >>
QLOGIC_IB_R_SOFTWARE_SHIFT) &
- QLOGIC_IB_R_SOFTWARE_MASK,
- (unsigned long long) dd->revision);
+ QLOGIC_IB_R_SOFTWARE_MASK,
+ (unsigned long long) dd->revision);
ret = -ENOSYS;
goto done;
}
@@ -339,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd)
dd->intrchk_timer.function = verify_interrupt;
dd->intrchk_timer.data = (unsigned long) dd;
+ ret = qib_cq_init(dd);
done:
return ret;
}
@@ -348,7 +468,7 @@ done:
* @dd: the qlogic_ib device
*
* sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explictly, in case reset
+ * ensure no receive or transmit (explicitly, in case reset
* failed
*/
static int init_after_reset(struct qib_devdata *dd)
@@ -405,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd)
static void verify_interrupt(unsigned long opaque)
{
struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ u64 int_counter;
if (!dd)
return; /* being torn down */
@@ -413,10 +534,11 @@ static void verify_interrupt(unsigned long opaque)
* If we don't have a lid or any interrupts, let the user know and
* don't bother checking again.
*/
- if (dd->int_counter == 0) {
+ int_counter = qib_int_counter(dd) - dd->z_int_counter;
+ if (int_counter == 0) {
if (!dd->f_intr_fallback(dd))
- dev_err(&dd->pcidev->dev, "No interrupts detected, "
- "not usable.\n");
+ dev_err(&dd->pcidev->dev,
+ "No interrupts detected, not usable.\n");
else /* re-arm the timer to see if fallback works */
mod_timer(&dd->intrchk_timer, jiffies + HZ/2);
}
@@ -479,6 +601,47 @@ static void init_piobuf_state(struct qib_devdata *dd)
}
/**
+ * qib_create_workqueues - create per port workqueues
+ * @dd: the qlogic_ib device
+ */
+static int qib_create_workqueues(struct qib_devdata *dd)
+{
+ int pidx;
+ struct qib_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (!ppd->qib_wq) {
+ char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */
+ snprintf(wq_name, sizeof(wq_name), "qib%d_%d",
+ dd->unit, pidx);
+ ppd->qib_wq =
+ create_singlethread_workqueue(wq_name);
+ if (!ppd->qib_wq)
+ goto wq_error;
+ }
+ }
+ return 0;
+wq_error:
+ pr_err("create_singlethread_workqueue failed for port %d\n",
+ pidx + 1);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (ppd->qib_wq) {
+ destroy_workqueue(ppd->qib_wq);
+ ppd->qib_wq = NULL;
+ }
+ }
+ return -ENOMEM;
+}
+
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+ free_percpu(ppd->ibport_data.pmastats);
+ ppd->ibport_data.pmastats = NULL;
+}
+
+/**
* qib_init - do the actual initialization sequence on the chip
* @dd: the qlogic_ib device
* @reinit: reinitializing, so don't allocate new memory
@@ -543,8 +706,8 @@ int qib_init(struct qib_devdata *dd, int reinit)
if (!lastfail)
lastfail = qib_setup_eagerbufs(rcd);
if (lastfail) {
- qib_dev_err(dd, "failed to allocate kernel ctxt's "
- "rcvhdrq and/or egr bufs\n");
+ qib_dev_err(dd,
+ "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
continue;
}
}
@@ -583,10 +746,6 @@ int qib_init(struct qib_devdata *dd, int reinit)
continue;
}
- /* let link come up, and enable IBC */
- spin_lock_irqsave(&ppd->lflags_lock, flags);
- ppd->lflags &= ~QIBL_IB_LINK_DISABLED;
- spin_unlock_irqrestore(&ppd->lflags_lock, flags);
portok++;
}
@@ -764,6 +923,12 @@ static void qib_shutdown_device(struct qib_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
dd->f_quiet_serdes(ppd);
+
+ if (ppd->qib_wq) {
+ destroy_workqueue(ppd->qib_wq);
+ ppd->qib_wq = NULL;
+ }
+ qib_free_pportdata(ppd);
}
qib_update_eeprom_log(dd);
@@ -818,6 +983,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
+#ifdef CONFIG_DEBUG_FS
+ kfree(rcd->opstats);
+ rcd->opstats = NULL;
+#endif
kfree(rcd);
}
@@ -893,8 +1062,7 @@ static void qib_verify_pioperf(struct qib_devdata *dd)
/* 1 GiB/sec, slightly over IB SDR line rate */
if (lcnt < (emsecs * 1024U))
qib_dev_err(dd,
- "Performance problem: bandwidth to PIO buffers is "
- "only %u MiB/sec\n",
+ "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n",
lcnt / (u32) emsecs);
preempt_enable();
@@ -908,7 +1076,6 @@ done:
dd->f_set_armlaunch(dd, 1);
}
-
void qib_free_devdata(struct qib_devdata *dd)
{
unsigned long flags;
@@ -918,9 +1085,37 @@ void qib_free_devdata(struct qib_devdata *dd)
list_del(&dd->list);
spin_unlock_irqrestore(&qib_devs_lock, flags);
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_exit(&dd->verbs_dev);
+#endif
+ free_percpu(dd->int_counter);
ib_dealloc_device(&dd->verbs_dev.ibdev);
}
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+ int cpu;
+ u64 int_counter = 0;
+
+ for_each_possible_cpu(cpu)
+ int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+ return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+ unsigned long flags;
+ struct qib_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ sps_ints += qib_int_counter(dd);
+ }
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ return sps_ints;
+}
+
/*
* Allocate our primary per-unit data structure. Must be done via verbs
* allocator, because the verbs cleanup process both does cleanup and
@@ -935,28 +1130,34 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
struct qib_devdata *dd;
int ret;
- if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
- if (!dd) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
+ if (!dd)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&dd->list);
+
+ idr_preload(GFP_KERNEL);
spin_lock_irqsave(&qib_devs_lock, flags);
- ret = idr_get_new(&qib_unit_table, dd, &dd->unit);
- if (ret >= 0)
+
+ ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT);
+ if (ret >= 0) {
+ dd->unit = ret;
list_add(&dd->list, &qib_dev_list);
+ }
+
spin_unlock_irqrestore(&qib_devs_lock, flags);
+ idr_preload_end();
if (ret < 0) {
qib_early_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
- ib_dealloc_device(&dd->verbs_dev.ibdev);
- dd = ERR_PTR(ret);
+ goto bail;
+ }
+ dd->int_counter = alloc_percpu(u64);
+ if (!dd->int_counter) {
+ ret = -ENOMEM;
+ qib_early_err(&pdev->dev,
+ "Could not allocate per-cpu int_counter\n");
goto bail;
}
@@ -967,12 +1168,18 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (qib_cpulist)
qib_cpulist_count = count;
else
- qib_early_err(&pdev->dev, "Could not alloc cpulist "
- "info, cpu affinity might be wrong\n");
+ qib_early_err(&pdev->dev,
+ "Could not alloc cpulist info, cpu affinity might be wrong\n");
}
-
-bail:
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
return dd;
+bail:
+ if (!list_empty(&dd->list))
+ list_del_init(&dd->list);
+ ib_dealloc_device(&dd->verbs_dev.ibdev);
+ return ERR_PTR(ret);;
}
/*
@@ -1009,14 +1216,13 @@ void qib_disable_after_error(struct qib_devdata *dd)
*dd->devstatusp |= QIB_STATUS_HWERROR;
}
-static void __devexit qib_remove_one(struct pci_dev *);
-static int __devinit qib_init_one(struct pci_dev *,
- const struct pci_device_id *);
+static void qib_remove_one(struct pci_dev *);
+static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
-#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: "
+#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
#define PFX QIB_DRV_NAME ": "
-static const struct pci_device_id qib_pci_tbl[] = {
+static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) },
{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) },
{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) },
@@ -1025,19 +1231,48 @@ static const struct pci_device_id qib_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
-struct pci_driver qib_driver = {
+static struct pci_driver qib_driver = {
.name = QIB_DRV_NAME,
.probe = qib_init_one,
- .remove = __devexit_p(qib_remove_one),
+ .remove = qib_remove_one,
.id_table = qib_pci_tbl,
.err_handler = &qib_pci_err_handler,
};
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+ .notifier_call = qib_notify_dca,
+ .next = NULL,
+ .priority = 0
+};
+
+static int qib_notify_dca_device(struct device *device, void *data)
+{
+ struct qib_devdata *dd = dev_get_drvdata(device);
+ unsigned long event = *(unsigned long *)data;
+
+ return dd->f_notify_dca(dd, event);
+}
+
+static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
+ void *p)
+{
+ int rval;
+
+ rval = driver_for_each_device(&qib_driver.driver, NULL,
+ &event, qib_notify_dca_device);
+ return rval ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+#endif
+
/*
* Do all the generic driver unit- and chip-independent memory
* allocation and initialization.
*/
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
{
int ret;
@@ -1046,79 +1281,63 @@ static int __init qlogic_ib_init(void)
goto bail;
/*
- * We create our own workqueue mainly because we want to be
- * able to flush it when devices are being removed. We can't
- * use schedule_work()/flush_scheduled_work() because both
- * unregister_netdev() and linkwatch_event take the rtnl lock,
- * so flush_scheduled_work() can deadlock during device
- * removal.
- */
- qib_wq = create_workqueue("qib");
- if (!qib_wq) {
- ret = -ENOMEM;
- goto bail_dev;
- }
-
- qib_cq_wq = create_singlethread_workqueue("qib_cq");
- if (!qib_cq_wq) {
- ret = -ENOMEM;
- goto bail_wq;
- }
-
- /*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
idr_init(&qib_unit_table);
- if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) {
- printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n");
- ret = -ENOMEM;
- goto bail_cq_wq;
- }
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_register_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_init();
+#endif
ret = pci_register_driver(&qib_driver);
if (ret < 0) {
- printk(KERN_ERR QIB_DRV_NAME
- ": Unable to register driver: error %d\n", -ret);
- goto bail_unit;
+ pr_err("Unable to register driver: error %d\n", -ret);
+ goto bail_dev;
}
/* not fatal if it doesn't work */
if (qib_init_qibfs())
- printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n");
+ pr_err("Unable to register ipathfs\n");
goto bail; /* all OK */
-bail_unit:
- idr_destroy(&qib_unit_table);
-bail_cq_wq:
- destroy_workqueue(qib_cq_wq);
-bail_wq:
- destroy_workqueue(qib_wq);
bail_dev:
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_exit();
+#endif
+ idr_destroy(&qib_unit_table);
qib_dev_cleanup();
bail:
return ret;
}
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
/*
* Do the non-unit driver cleanup, memory free, etc. at unload.
*/
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
{
int ret;
ret = qib_exit_qibfs();
if (ret)
- printk(KERN_ERR QIB_DRV_NAME ": "
- "Unable to cleanup counter filesystem: "
- "error %d\n", -ret);
+ pr_err(
+ "Unable to cleanup counter filesystem: error %d\n",
+ -ret);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
pci_unregister_driver(&qib_driver);
-
- destroy_workqueue(qib_wq);
- destroy_workqueue(qib_cq_wq);
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_exit();
+#endif
qib_cpulist_count = 0;
kfree(qib_cpulist);
@@ -1127,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void)
qib_dev_cleanup();
}
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
/* this can only be called after a successful initialization */
static void cleanup_device_data(struct qib_devdata *dd)
@@ -1138,10 +1357,24 @@ static void cleanup_device_data(struct qib_devdata *dd)
unsigned long flags;
/* users can't do anything more with chip */
- for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
if (dd->pport[pidx].statusp)
*dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
+ spin_lock(&dd->pport[pidx].cc_shadow_lock);
+
+ kfree(dd->pport[pidx].congestion_entries);
+ dd->pport[pidx].congestion_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries);
+ dd->pport[pidx].ccti_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries_shadow);
+ dd->pport[pidx].ccti_entries_shadow = NULL;
+ kfree(dd->pport[pidx].congestion_entries_shadow);
+ dd->pport[pidx].congestion_entries_shadow = NULL;
+
+ spin_unlock(&dd->pport[pidx].cc_shadow_lock);
+ }
+
if (!qib_wc_pat)
qib_disable_wc(dd);
@@ -1155,7 +1388,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
if (dd->pageshadow) {
struct page **tmpp = dd->pageshadow;
dma_addr_t *tmpd = dd->physshadow;
- int i, cnt = 0;
+ int i;
for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) {
int ctxt_tidbase = ctxt * dd->rcvtidcnt;
@@ -1168,13 +1401,13 @@ static void cleanup_device_data(struct qib_devdata *dd)
PAGE_SIZE, PCI_DMA_FROMDEVICE);
qib_release_user_pages(&tmpp[i], 1);
tmpp[i] = NULL;
- cnt++;
}
}
- tmpp = dd->pageshadow;
dd->pageshadow = NULL;
vfree(tmpp);
+ dd->physshadow = NULL;
+ vfree(tmpd);
}
/*
@@ -1196,6 +1429,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
}
kfree(tmp);
kfree(dd->boardname);
+ qib_cq_exit(dd);
}
/*
@@ -1221,8 +1455,7 @@ static void qib_postinit_cleanup(struct qib_devdata *dd)
qib_free_devdata(dd);
}
-static int __devinit qib_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret, j, pidx, initfail;
struct qib_devdata *dd = NULL;
@@ -1240,9 +1473,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
#ifdef CONFIG_PCI_MSI
dd = qib_init_iba6120_funcs(pdev, ent);
#else
- qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot "
- "work if CONFIG_PCI_MSI is not enabled\n",
- ent->device);
+ qib_early_err(&pdev->dev,
+ "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
+ ent->device);
dd = ERR_PTR(-ENODEV);
#endif
break;
@@ -1256,8 +1489,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
break;
default:
- qib_early_err(&pdev->dev, "Failing on unknown QLogic "
- "deviceid 0x%x\n", ent->device);
+ qib_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
ret = -ENODEV;
}
@@ -1266,6 +1500,10 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
if (ret)
goto bail; /* error already printed */
+ ret = qib_create_workqueues(dd);
+ if (ret)
+ goto bail;
+
/* do the generic initialization */
initfail = qib_init(dd, 0);
@@ -1290,7 +1528,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
if (qib_mini_init || initfail || ret) {
qib_stop_timers(dd);
- flush_scheduled_work();
+ flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx)
dd->f_quiet_serdes(dd->pport + pidx);
if (qib_mini_init)
@@ -1310,9 +1548,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
if (!qib_wc_pat) {
ret = qib_enable_wc(dd);
if (ret) {
- qib_dev_err(dd, "Write combining not enabled "
- "(err %d): performance may be poor\n",
- -ret);
+ qib_dev_err(dd,
+ "Write combining not enabled (err %d): performance may be poor\n",
+ -ret);
ret = 0;
}
}
@@ -1322,7 +1560,7 @@ bail:
return ret;
}
-static void __devexit qib_remove_one(struct pci_dev *pdev)
+static void qib_remove_one(struct pci_dev *pdev)
{
struct qib_devdata *dd = pci_get_drvdata(pdev);
int ret;
@@ -1339,8 +1577,8 @@ static void __devexit qib_remove_one(struct pci_dev *pdev)
qib_stop_timers(dd);
- /* wait until all of our (qsfp) schedule_work() calls complete */
- flush_scheduled_work();
+ /* wait until all of our (qsfp) queue_work() calls complete */
+ flush_workqueue(ib_wq);
ret = qibfs_remove(dd);
if (ret)
@@ -1364,6 +1602,7 @@ static void __devexit qib_remove_one(struct pci_dev *pdev)
int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
{
unsigned amt;
+ int old_node_id;
if (!rcd->rcvhdrq) {
dma_addr_t phys_hdrqtail;
@@ -1373,14 +1612,18 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
sizeof(u32), PAGE_SIZE);
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
gfp_flags | __GFP_COMP);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrq) {
- qib_dev_err(dd, "attempt to allocate %d bytes "
- "for ctxt %u rcvhdrq failed\n",
- amt, rcd->ctxt);
+ qib_dev_err(dd,
+ "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+ amt, rcd->ctxt);
goto bail;
}
@@ -1391,9 +1634,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
}
if (!(dd->flags & QIB_NODMA_RTAIL)) {
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1409,8 +1654,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
return 0;
bail_free:
- qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u "
- "rcvhdrqtailaddr failed\n", rcd->ctxt);
+ qib_dev_err(dd,
+ "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+ rcd->ctxt);
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
bail_free_hdrq:
@@ -1436,6 +1682,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
gfp_t gfp_flags;
+ int old_node_id;
/*
* GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1454,25 +1701,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
size = rcd->rcvegrbuf_size;
if (!rcd->rcvegrbuf) {
rcd->rcvegrbuf =
- kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]),
- GFP_KERNEL);
+ kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf)
goto bail;
}
if (!rcd->rcvegrbuf_phys) {
rcd->rcvegrbuf_phys =
- kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
- GFP_KERNEL);
+ kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
+ GFP_KERNEL, rcd->node_id);
if (!rcd->rcvegrbuf_phys)
goto bail_rcvegrbuf;
}
for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
if (rcd->rcvegrbuf[e])
continue;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvegrbuf[e] =
dma_alloc_coherent(&dd->pcidev->dev, size,
&rcd->rcvegrbuf_phys[e],
gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvegrbuf[e])
goto bail_rcvegrbuf_phys;
}
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 54a40828a10..f4918f2165e 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -96,8 +96,12 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs)
* states, or if it transitions from any of the up (INIT or better)
* states into any of the down states (except link recovery), then
* call the chip-specific code to take appropriate actions.
+ *
+ * ppd->lflags could be 0 if this is the first time the interrupt
+ * handlers has been called but the link is already up.
*/
- if (lstate >= IB_PORT_INIT && (ppd->lflags & QIBL_LINKDOWN) &&
+ if (lstate >= IB_PORT_INIT &&
+ (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) &&
ltstate == IB_PHYSPORTSTATE_LINKUP) {
/* transitioned to UP */
if (dd->f_ib_updown(ppd, 1, ibcs))
@@ -131,7 +135,8 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs)
/* start a 75msec timer to clear symbol errors */
mod_timer(&ppd->symerr_clear_timer,
msecs_to_jiffies(75));
- } else if (ltstate == IB_PHYSPORTSTATE_LINKUP) {
+ } else if (ltstate == IB_PHYSPORTSTATE_LINKUP &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
/* active, but not active defered */
qib_hol_up(ppd); /* useful only for 6120 now */
*ppd->statusp |=
@@ -219,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd)
* We print the message and disable interrupts, in hope of
* having a better chance of debugging the problem.
*/
- qib_dev_err(dd, "Read of chip interrupt status failed"
- " disabling interrupts\n");
+ qib_dev_err(dd,
+ "Read of chip interrupt status failed disabling interrupts\n");
if (allbits++) {
/* disable interrupt delivery, something is very wrong */
if (allbits == 2)
dd->f_set_intr_state(dd, 0);
if (allbits == 3) {
- qib_dev_err(dd, "2nd bad interrupt status, "
- "unregistering interrupts\n");
+ qib_dev_err(dd,
+ "2nd bad interrupt status, unregistering interrupts\n");
dd->flags |= QIB_BADINTR;
dd->flags &= ~QIB_INITTED;
dd->f_free_irq(dd);
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 4b80eb153d5..3b9afccaaad 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -35,21 +35,41 @@
/**
* qib_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
* @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
*
- * Returns 1 if successful, otherwise returns 0.
*/
-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
{
unsigned long flags;
u32 r;
u32 n;
- int ret;
+ int ret = 0;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
+ /* special case for dma_mr lkey == 0 */
+ if (dma_region) {
+ struct qib_mregion *tmr;
+
+ tmr = rcu_access_pointer(dev->dma_mr);
+ if (!tmr) {
+ qib_get_mr(mr);
+ rcu_assign_pointer(dev->dma_mr, mr);
+ mr->lkey_published = 1;
+ }
+ goto success;
+ }
+
/* Find the next available LKEY */
r = rkt->next;
n = r;
@@ -57,11 +77,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
if (rkt->table[r] == NULL)
break;
r = (r + 1) & (rkt->max - 1);
- if (r == n) {
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = 0;
+ if (r == n)
goto bail;
- }
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
@@ -76,57 +93,58 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
mr->lkey |= 1 << 8;
rkt->gen++;
}
- rkt->table[r] = mr;
+ qib_get_mr(mr);
+ rcu_assign_pointer(rkt->table[r], mr);
+ mr->lkey_published = 1;
+success:
spin_unlock_irqrestore(&rkt->lock, flags);
-
- ret = 1;
-
-bail:
+out:
return ret;
+bail:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = -ENOMEM;
+ goto out;
}
/**
* qib_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
+ * @mr: mr to free from tables
*/
-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr)
+void qib_free_lkey(struct qib_mregion *mr)
{
unsigned long flags;
u32 lkey = mr->lkey;
u32 r;
- int ret;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
- spin_lock_irqsave(&dev->lk_table.lock, flags);
- if (lkey == 0) {
- if (dev->dma_mr && dev->dma_mr == mr) {
- ret = atomic_read(&dev->dma_mr->refcount);
- if (!ret)
- dev->dma_mr = NULL;
- } else
- ret = 0;
- } else {
+ spin_lock_irqsave(&rkt->lock, flags);
+ if (!mr->lkey_published)
+ goto out;
+ if (lkey == 0)
+ rcu_assign_pointer(dev->dma_mr, NULL);
+ else {
r = lkey >> (32 - ib_qib_lkey_table_size);
- ret = atomic_read(&dev->lk_table.table[r]->refcount);
- if (!ret)
- dev->lk_table.table[r] = NULL;
+ rcu_assign_pointer(rkt->table[r], NULL);
}
- spin_unlock_irqrestore(&dev->lk_table.lock, flags);
-
- if (ret)
- ret = -EBUSY;
- return ret;
+ qib_put_mr(mr);
+ mr->lkey_published = 0;
+out:
+ spin_unlock_irqrestore(&rkt->lock, flags);
}
/**
* qib_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
+ * @pd: protection domain
* @isge: outgoing internal SGE
* @sge: SGE to check
* @acc: access flags
*
* Return 1 if valid and successful, otherwise returns 0.
*
+ * increments the reference count upon success
+ *
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
@@ -136,23 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_mregion *mr;
unsigned n, m;
size_t off;
- int ret = 0;
- unsigned long flags;
/*
* We use LKEY == zero for kernel virtual addresses
* (see qib_get_dma_mr and qib_dma.c).
*/
- spin_lock_irqsave(&rkt->lock, flags);
+ rcu_read_lock();
if (sge->lkey == 0) {
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
goto bail;
- if (!dev->dma_mr)
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
+ goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
goto bail;
- atomic_inc(&dev->dma_mr->refcount);
- isge->mr = dev->dma_mr;
+ rcu_read_unlock();
+
+ isge->mr = mr;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
isge->sge_length = sge->length;
@@ -160,9 +180,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->n = 0;
goto ok;
}
- mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
- mr->pd != &pd->ibpd))
+ mr = rcu_dereference(
+ rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
off = sge->addr - mr->user_base;
@@ -170,19 +190,35 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
off += mr->offset;
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= QIB_SEGSZ) {
- m++;
- n = 0;
+ if (mr->page_shift) {
+ /*
+ page sizes are uniform power of 2 so no loop is necessary
+ entries_spanned_by_off is the number of times the loop below
+ would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off/QIB_SEGSZ;
+ n = entries_spanned_by_off%QIB_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
}
}
- atomic_inc(&mr->refcount);
isge->mr = mr;
isge->vaddr = mr->map[m]->segs[n].vaddr + off;
isge->length = mr->map[m]->segs[n].length - off;
@@ -190,22 +226,24 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->m = m;
isge->n = n;
ok:
- ret = 1;
+ return 1;
bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
- return ret;
+ rcu_read_unlock();
+ return 0;
}
/**
* qib_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
+ * @qp: qp for validation
+ * @sge: SGE state
* @len: length of data
* @vaddr: virtual address to place data
* @rkey: rkey to check
* @acc: access flags
*
* Return 1 if successful, otherwise 0.
+ *
+ * increments the reference count upon success
*/
int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc)
@@ -214,24 +252,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
struct qib_mregion *mr;
unsigned n, m;
size_t off;
- int ret = 0;
- unsigned long flags;
/*
* We use RKEY == zero for kernel virtual addresses
* (see qib_get_dma_mr and qib_dma.c).
*/
- spin_lock_irqsave(&rkt->lock, flags);
+ rcu_read_lock();
if (rkey == 0) {
struct qib_pd *pd = to_ipd(qp->ibqp.pd);
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
goto bail;
- if (!dev->dma_mr)
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
+ goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
goto bail;
- atomic_inc(&dev->dma_mr->refcount);
- sge->mr = dev->dma_mr;
+ rcu_read_unlock();
+
+ sge->mr = mr;
sge->vaddr = (void *) vaddr;
sge->length = len;
sge->sge_length = len;
@@ -240,27 +280,44 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
goto ok;
}
- mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ mr = rcu_dereference(
+ rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
off += mr->offset;
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= QIB_SEGSZ) {
- m++;
- n = 0;
+ if (mr->page_shift) {
+ /*
+ page sizes are uniform power of 2 so no loop is necessary
+ entries_spanned_by_off is the number of times the loop below
+ would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off/QIB_SEGSZ;
+ n = entries_spanned_by_off%QIB_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
}
}
- atomic_inc(&mr->refcount);
sge->mr = mr;
sge->vaddr = mr->map[m]->segs[n].vaddr + off;
sge->length = mr->map[m]->segs[n].length - off;
@@ -268,10 +325,10 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
sge->m = m;
sge->n = n;
ok:
- ret = 1;
+ return 1;
bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
- return ret;
+ rcu_read_unlock();
+ return 0;
}
/*
@@ -293,7 +350,9 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
if (pd->user || rkey == 0)
goto bail;
- mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
+ mr = rcu_dereference_protected(
+ rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
+ lockdep_is_held(&rkt->lock));
if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
goto bail;
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 94b0d1f3a8f..22c720e5740 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
+static int reply_failure(struct ib_smp *smp)
+{
+ /*
+ * The verbs framework will handle the directed/LID route
+ * packet changes.
+ */
+ smp->method = IB_MGMT_METHOD_GET_RESP;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ smp->status |= IB_SMP_DIRECTION;
+ return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
+}
+
static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
{
struct ib_mad_send_buf *send_buf;
@@ -90,14 +102,10 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
if (!ibp->sm_ah) {
if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
struct ib_ah *ah;
- struct ib_ah_attr attr;
- memset(&attr, 0, sizeof attr);
- attr.dlid = ibp->sm_lid;
- attr.port_num = ppd_from_ibp(ibp)->port;
- ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr);
+ ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
if (IS_ERR(ah))
- ret = -EINVAL;
+ ret = PTR_ERR(ah);
else {
send_buf->ah = ah;
ibp->sm_ah = to_iah(ah);
@@ -396,6 +404,7 @@ static int get_linkdowndefaultstate(struct qib_pportdata *ppd)
static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
{
+ int valid_mkey = 0;
int ret = 0;
/* Is the mkey in the process of expiring? */
@@ -406,23 +415,36 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
ibp->mkeyprot = 0;
}
- /* M_Key checking depends on Portinfo:M_Key_protect_bits */
- if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 &&
- ibp->mkey != smp->mkey &&
- (smp->method == IB_MGMT_METHOD_SET ||
- smp->method == IB_MGMT_METHOD_TRAP_REPRESS ||
- (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) {
- if (ibp->mkey_violations != 0xFFFF)
- ++ibp->mkey_violations;
- if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
- ibp->mkey_lease_timeout = jiffies +
- ibp->mkey_lease_period * HZ;
- /* Generate a trap notice. */
- qib_bad_mkey(ibp, smp);
- ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
- } else if (ibp->mkey_lease_timeout)
+ if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 ||
+ ibp->mkey == smp->mkey)
+ valid_mkey = 1;
+
+ /* Unset lease timeout on any valid Get/Set/TrapRepress */
+ if (valid_mkey && ibp->mkey_lease_timeout &&
+ (smp->method == IB_MGMT_METHOD_GET ||
+ smp->method == IB_MGMT_METHOD_SET ||
+ smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
ibp->mkey_lease_timeout = 0;
+ if (!valid_mkey) {
+ switch (smp->method) {
+ case IB_MGMT_METHOD_GET:
+ /* Bad mkey not a violation below level 2 */
+ if (ibp->mkeyprot < 2)
+ break;
+ case IB_MGMT_METHOD_SET:
+ case IB_MGMT_METHOD_TRAP_REPRESS:
+ if (ibp->mkey_violations != 0xFFFF)
+ ++ibp->mkey_violations;
+ if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
+ ibp->mkey_lease_timeout = jiffies +
+ ibp->mkey_lease_period * HZ;
+ /* Generate a trap notice. */
+ qib_bad_mkey(ibp, smp);
+ ret = 1;
+ }
+ }
+
return ret;
}
@@ -433,7 +455,6 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
struct ib_port_info *pip = (struct ib_port_info *)smp->data;
- u16 lid;
u8 mtu;
int ret;
u32 state;
@@ -450,8 +471,10 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
if (port_num != port) {
ibp = to_iport(ibdev, port_num);
ret = check_mkey(ibp, smp, 0);
- if (ret)
+ if (ret) {
+ ret = IB_MAD_RESULT_FAILURE;
goto bail;
+ }
}
}
@@ -464,12 +487,12 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
memset(smp->data, 0, sizeof(smp->data));
/* Only return the mkey if the protection field allows it. */
- if (smp->method == IB_MGMT_METHOD_SET || ibp->mkey == smp->mkey ||
- ibp->mkeyprot == 0)
+ if (!(smp->method == IB_MGMT_METHOD_GET &&
+ ibp->mkey != smp->mkey &&
+ ibp->mkeyprot == 1))
pip->mkey = ibp->mkey;
pip->gid_prefix = ibp->gid_prefix;
- lid = ppd->lid;
- pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
+ pip->lid = cpu_to_be16(ppd->lid);
pip->sm_lid = cpu_to_be16(ibp->sm_lid);
pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
/* pip->diag_code; */
@@ -632,7 +655,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
struct qib_devdata *dd;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
- char clientrereg = 0;
+ u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80);
unsigned long flags;
u16 lid, smlid;
u8 lwe;
@@ -668,8 +691,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lid = be16_to_cpu(pip->lid);
/* Must be a valid unicast LID address. */
if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
- goto err;
- if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
if (ppd->lid != lid)
qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT);
if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7))
@@ -683,8 +706,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
msl = pip->neighbormtu_mastersmsl & 0xF;
/* Must be a valid unicast LID address. */
if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
- goto err;
- if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
spin_lock_irqsave(&ibp->lock, flags);
if (ibp->sm_ah) {
if (smlid != ibp->sm_lid)
@@ -705,10 +728,11 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lwe = pip->link_width_enabled;
if (lwe) {
if (lwe == 0xFF)
- lwe = ppd->link_width_supported;
+ set_link_width_enabled(ppd, ppd->link_width_supported);
else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
- goto err;
- set_link_width_enabled(ppd, lwe);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (lwe != ppd->link_width_enabled)
+ set_link_width_enabled(ppd, lwe);
}
lse = pip->linkspeedactive_enabled & 0xF;
@@ -719,10 +743,12 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
* speeds.
*/
if (lse == 15)
- lse = ppd->link_speed_supported;
+ set_link_speed_enabled(ppd,
+ ppd->link_speed_supported);
else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
- goto err;
- set_link_speed_enabled(ppd, lse);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (lse != ppd->link_speed_enabled)
+ set_link_speed_enabled(ppd, lse);
}
/* Set link down default state. */
@@ -738,7 +764,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
IB_LINKINITCMD_POLL);
break;
default:
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
}
ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
@@ -748,15 +774,17 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
if (mtu == -1)
- goto err;
- qib_set_mtu(ppd, mtu);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ qib_set_mtu(ppd, mtu);
/* Set operational VLs */
vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF;
if (vls) {
if (vls > ppd->vls_supported)
- goto err;
- (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls);
}
if (pip->mkey_violations == 0)
@@ -770,19 +798,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
ore = pip->localphyerrors_overrunerrors;
if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
if (set_overrunthreshold(ppd, (ore & 0xF)))
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
- if (pip->clientrereg_resv_subnetto & 0x80) {
- clientrereg = 1;
- event.event = IB_EVENT_CLIENT_REREGISTER;
- ib_dispatch_event(&event);
- }
-
/*
* Do the port state change now that the other link parameters
* have been set.
@@ -792,7 +814,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
state = pip->linkspeed_portstate & 0xF;
lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
/*
* Only state changes of DOWN, ARM, and ACTIVE are valid
@@ -812,8 +834,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lstate = QIB_IB_LINKDOWN;
else if (lstate == 3)
lstate = QIB_IB_LINKDOWN_DISABLE;
- else
- goto err;
+ else {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ break;
+ }
spin_lock_irqsave(&ppd->lflags_lock, flags);
ppd->lflags &= ~QIBL_LINKV;
spin_unlock_irqrestore(&ppd->lflags_lock, flags);
@@ -835,16 +859,20 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
qib_set_linkstate(ppd, QIB_IB_LINKACTIVE);
break;
default:
- /* XXX We have already partially updated our state! */
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
+ }
+
+ if (clientrereg) {
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ ib_dispatch_event(&event);
}
ret = subn_get_portinfo(smp, ibdev, port);
- if (clientrereg)
- pip->clientrereg_resv_subnetto |= 0x80;
+ /* restore re-reg bit per o14-12.2.1 */
+ pip->clientrereg_resv_subnetto |= clientrereg;
- goto done;
+ goto get_only;
err:
smp->status |= IB_SMP_INVALID_FIELD;
@@ -1000,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
event.event = IB_EVENT_PKEY_CHANGE;
event.device = &dd->verbs_dev.ibdev;
- event.element.port_num = 1;
+ event.element.port_num = port;
ib_dispatch_event(&event);
}
return 0;
@@ -1118,22 +1146,22 @@ static int subn_trap_repress(struct ib_smp *smp, struct ib_device *ibdev,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
}
-static int pma_get_classportinfo(struct ib_perf *pmp,
+static int pma_get_classportinfo(struct ib_pma_mad *pmp,
struct ib_device *ibdev)
{
- struct ib_pma_classportinfo *p =
- (struct ib_pma_classportinfo *)pmp->data;
+ struct ib_class_port_info *p =
+ (struct ib_class_port_info *)pmp->data;
struct qib_devdata *dd = dd_from_ibdev(ibdev);
memset(pmp->data, 0, sizeof(pmp->data));
- if (pmp->attr_mod != 0)
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
/* Note that AllPortSelect is not valid */
p->base_version = 1;
p->class_version = 1;
- p->cap_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ p->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
/*
* Set the most significant bit of CM2 to indicate support for
* congestion statistics
@@ -1147,7 +1175,7 @@ static int pma_get_classportinfo(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int pma_get_portsamplescontrol(struct ib_perf *pmp,
+static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplescontrol *p =
@@ -1162,8 +1190,8 @@ static int pma_get_portsamplescontrol(struct ib_perf *pmp,
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 || port_select != port) {
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0 || port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
goto bail;
}
spin_lock_irqsave(&ibp->lock, flags);
@@ -1185,7 +1213,7 @@ bail:
return reply((struct ib_smp *) pmp);
}
-static int pma_set_portsamplescontrol(struct ib_perf *pmp,
+static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplescontrol *p =
@@ -1198,8 +1226,8 @@ static int pma_set_portsamplescontrol(struct ib_perf *pmp,
u8 status, xmit_flags;
int ret;
- if (pmp->attr_mod != 0 || p->port_select != port) {
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
@@ -1314,7 +1342,7 @@ static u64 get_cache_hw_sample_counters(struct qib_pportdata *ppd,
return ret;
}
-static int pma_get_portsamplesresult(struct ib_perf *pmp,
+static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplesresult *p =
@@ -1353,7 +1381,7 @@ static int pma_get_portsamplesresult(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int pma_get_portsamplesresult_ext(struct ib_perf *pmp,
+static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplesresult_ext *p =
@@ -1395,7 +1423,7 @@ static int pma_get_portsamplesresult_ext(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int pma_get_portcounters(struct ib_perf *pmp,
+static int pma_get_portcounters(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1429,8 +1457,8 @@ static int pma_get_portcounters(struct ib_perf *pmp,
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 || port_select != port)
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0 || port_select != port)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
if (cntrs.symbol_error_counter > 0xFFFFUL)
p->symbol_error_counter = cpu_to_be16(0xFFFF);
@@ -1465,7 +1493,7 @@ static int pma_get_portcounters(struct ib_perf *pmp,
cntrs.local_link_integrity_errors = 0xFUL;
if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
cntrs.excessive_buffer_overrun_errors = 0xFUL;
- p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+ p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
cntrs.excessive_buffer_overrun_errors;
if (cntrs.vl15_dropped > 0xFFFFUL)
p->vl15_dropped = cpu_to_be16(0xFFFF);
@@ -1493,7 +1521,7 @@ static int pma_get_portcounters(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int pma_get_portcounters_cong(struct ib_perf *pmp,
+static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
/* Congestion PMA packets start at offset 24 not 64 */
@@ -1503,7 +1531,7 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct qib_devdata *dd = dd_from_ppd(ppd);
- u32 port_select = be32_to_cpu(pmp->attr_mod) & 0xFF;
+ u32 port_select = be32_to_cpu(pmp->mad_hdr.attr_mod) & 0xFF;
u64 xmit_wait_counter;
unsigned long flags;
@@ -1512,9 +1540,9 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp,
* SET method ends up calling this anyway.
*/
if (!dd->psxmitwait_supported)
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
if (port_select != port)
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
qib_get_counters(ppd, &cntrs);
spin_lock_irqsave(&ppd->ibport_data.lock, flags);
@@ -1596,7 +1624,7 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp,
cntrs.local_link_integrity_errors = 0xFUL;
if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
cntrs.excessive_buffer_overrun_errors = 0xFUL;
- p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+ p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
cntrs.excessive_buffer_overrun_errors;
if (cntrs.vl15_dropped > 0xFFFFUL)
p->vl15_dropped = cpu_to_be16(0xFFFF);
@@ -1606,7 +1634,24 @@ static int pma_get_portcounters_cong(struct ib_perf *pmp,
return reply((struct ib_smp *)pmp);
}
-static int pma_get_portcounters_ext(struct ib_perf *pmp,
+static void qib_snapshot_pmacounters(
+ struct qib_ibport *ibp,
+ struct qib_pma_counters *pmacounters)
+{
+ struct qib_pma_counters *p;
+ int cpu;
+
+ memset(pmacounters, 0, sizeof(*pmacounters));
+ for_each_possible_cpu(cpu) {
+ p = per_cpu_ptr(ibp->pmastats, cpu);
+ pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+ pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+ pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+ pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+ }
+}
+
+static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters_ext *p =
@@ -1614,13 +1659,14 @@ static int pma_get_portcounters_ext(struct ib_perf *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
u8 port_select = p->port_select;
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 || port_select != port) {
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0 || port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
goto bail;
}
@@ -1636,16 +1682,23 @@ static int pma_get_portcounters_ext(struct ib_perf *pmp,
p->port_rcv_data = cpu_to_be64(rwords);
p->port_xmit_packets = cpu_to_be64(spkts);
p->port_rcv_packets = cpu_to_be64(rpkts);
- p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit);
- p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv);
- p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit);
- p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv);
+
+ qib_snapshot_pmacounters(ibp, &pma);
+
+ p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+ - ibp->z_unicast_xmit);
+ p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+ - ibp->z_unicast_rcv);
+ p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+ - ibp->z_multicast_xmit);
+ p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+ - ibp->z_multicast_rcv);
bail:
return reply((struct ib_smp *) pmp);
}
-static int pma_set_portcounters(struct ib_perf *pmp,
+static int pma_set_portcounters(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1708,14 +1761,14 @@ static int pma_set_portcounters(struct ib_perf *pmp,
return pma_get_portcounters(pmp, ibdev, port);
}
-static int pma_set_portcounters_cong(struct ib_perf *pmp,
+static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct qib_devdata *dd = dd_from_ppd(ppd);
struct qib_verbs_counters cntrs;
- u32 counter_select = (be32_to_cpu(pmp->attr_mod) >> 24) & 0xFF;
+ u32 counter_select = (be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24) & 0xFF;
int ret = 0;
unsigned long flags;
@@ -1759,7 +1812,7 @@ static int pma_set_portcounters_cong(struct ib_perf *pmp,
return ret;
}
-static int pma_set_portcounters_ext(struct ib_perf *pmp,
+static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1767,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_perf *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
@@ -1782,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_perf *pmp,
if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
ibp->z_port_rcv_packets = rpkts;
+ qib_snapshot_pmacounters(ibp, &pma);
+
if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
- ibp->n_unicast_xmit = 0;
+ ibp->z_unicast_xmit = pma.n_unicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
- ibp->n_unicast_rcv = 0;
+ ibp->z_unicast_rcv = pma.n_unicast_rcv;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
- ibp->n_multicast_xmit = 0;
+ ibp->z_multicast_xmit = pma.n_multicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
- ibp->n_multicast_rcv = 0;
+ ibp->z_multicast_rcv = pma.n_multicast_rcv;
return pma_get_portcounters_ext(pmp, ibdev, port);
}
@@ -1830,6 +1886,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
port_num && port_num <= ibdev->phys_port_cnt &&
port != port_num)
(void) check_mkey(to_iport(ibdev, port_num), smp, 0);
+ ret = IB_MAD_RESULT_FAILURE;
goto bail;
}
@@ -1952,19 +2009,19 @@ static int process_perf(struct ib_device *ibdev, u8 port,
struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
- struct ib_perf *pmp = (struct ib_perf *)out_mad;
+ struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
int ret;
*out_mad = *in_mad;
- if (pmp->class_version != 1) {
- pmp->status |= IB_SMP_UNSUP_VERSION;
+ if (pmp->mad_hdr.class_version != 1) {
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
- switch (pmp->method) {
+ switch (pmp->mad_hdr.method) {
case IB_MGMT_METHOD_GET:
- switch (pmp->attr_id) {
+ switch (pmp->mad_hdr.attr_id) {
case IB_PMA_CLASS_PORT_INFO:
ret = pma_get_classportinfo(pmp, ibdev);
goto bail;
@@ -1987,13 +2044,13 @@ static int process_perf(struct ib_device *ibdev, u8 port,
ret = pma_get_portcounters_cong(pmp, ibdev, port);
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
case IB_MGMT_METHOD_SET:
- switch (pmp->attr_id) {
+ switch (pmp->mad_hdr.attr_id) {
case IB_PMA_PORT_SAMPLES_CONTROL:
ret = pma_set_portsamplescontrol(pmp, ibdev, port);
goto bail;
@@ -2007,7 +2064,7 @@ static int process_perf(struct ib_device *ibdev, u8 port,
ret = pma_set_portcounters_cong(pmp, ibdev, port);
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
@@ -2023,7 +2080,7 @@ static int process_perf(struct ib_device *ibdev, u8 port,
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METHOD;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_smp *) pmp);
}
@@ -2031,6 +2088,298 @@ bail:
return ret;
}
+static int cc_get_classportinfo(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev)
+{
+ struct ib_cc_classportinfo_attr *p =
+ (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->base_version = 1;
+ p->class_version = 1;
+ p->cap_mask = 0;
+
+ /*
+ * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
+ */
+ p->resp_time_value = 18;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_info(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_info_attr *p =
+ (struct ib_cc_info_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->congestion_info = 0;
+ p->control_table_cap = ppd->cc_max_table_entries;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ int i;
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct ib_cc_congestion_entry_shadow *entries;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ entries = ppd->congestion_entries_shadow->entries;
+ p->port_control = cpu_to_be16(
+ ppd->congestion_entries_shadow->port_control);
+ p->control_map = cpu_to_be16(
+ ppd->congestion_entries_shadow->control_map);
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ p->entries[i].ccti_increase = entries[i].ccti_increase;
+ p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
+ p->entries[i].trigger_threshold = entries[i].trigger_threshold;
+ p->entries[i].ccti_min = entries[i].ccti_min;
+ }
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 max_cct_block;
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ max_cct_block =
+ (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
+ max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
+
+ if (cct_block_index > max_cct_block) {
+ spin_unlock(&ppd->cc_shadow_lock);
+ goto bail;
+ }
+
+ ccp->attr_mod = cpu_to_be32(cct_block_index);
+
+ cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
+
+ cct_entry--;
+
+ p->ccti_limit = cpu_to_be16(cct_entry);
+
+ entries = &ppd->ccti_entries_shadow->
+ entries[IB_CCT_ENTRIES * cct_block_index];
+ cct_entry %= IB_CCT_ENTRIES;
+
+ for (i = 0; i <= cct_entry; i++)
+ p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int i;
+
+ ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
+
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ ppd->congestion_entries[i].ccti_increase =
+ p->entries[i].ccti_increase;
+
+ ppd->congestion_entries[i].ccti_timer =
+ be16_to_cpu(p->entries[i].ccti_timer);
+
+ ppd->congestion_entries[i].trigger_threshold =
+ p->entries[i].trigger_threshold;
+
+ ppd->congestion_entries[i].ccti_min =
+ p->entries[i].ccti_min;
+ }
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ /* If this packet is the first in the sequence then
+ * zero the total table entry count.
+ */
+ if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
+ ppd->total_cct_entry = 0;
+
+ cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
+
+ /* ccti_limit is 0 to 63 */
+ ppd->total_cct_entry += (cct_entry + 1);
+
+ if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
+ goto bail;
+
+ ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
+
+ entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
+
+ for (i = 0; i <= cct_entry; i++)
+ entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
+ memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
+ (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
+
+ ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
+ ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
+ memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
+ IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int check_cc_key(struct qib_ibport *ibp,
+ struct ib_cc_mad *ccp, int mad_flags)
+{
+ return 0;
+}
+
+static int process_cc(struct ib_device *ibdev, int mad_flags,
+ u8 port, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ int ret;
+
+ *out_mad = *in_mad;
+
+ if (ccp->class_version != 2) {
+ ccp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_smp *)ccp);
+ goto bail;
+ }
+
+ ret = check_cc_key(ibp, ccp, mad_flags);
+ if (ret)
+ goto bail;
+
+ switch (ccp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CLASSPORTINFO:
+ ret = cc_get_classportinfo(ccp, ibdev);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_INFO:
+ ret = cc_get_congestion_info(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_get_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_get_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_set_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_set_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ case IB_MGMT_METHOD_TRAP:
+ default:
+ ccp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_smp *) ccp);
+ }
+
+bail:
+ return ret;
+}
+
/**
* qib_process_mad - process an incoming MAD packet
* @ibdev: the infiniband device this packet came in on
@@ -2055,6 +2404,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
int ret;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@@ -2066,6 +2417,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
ret = process_perf(ibdev, port, in_mad, out_mad);
goto bail;
+ case IB_MGMT_CLASS_CONG_MGMT:
+ if (!ppd->congestion_entries_shadow ||
+ !qib_cc_table_size) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+ goto bail;
+
default:
ret = IB_MAD_RESULT_SUCCESS;
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 147aff9117d..941d4d50d8e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,6 +31,10 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef _QIB_MAD_H
+#define _QIB_MAD_H
+
+#include <rdma/ib_pma.h>
#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004)
#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008)
@@ -50,7 +54,7 @@ struct ib_node_info {
__be32 revision;
u8 local_port_num;
u8 vendor_id[3];
-} __attribute__ ((packed));
+} __packed;
struct ib_mad_notice_attr {
u8 generic_type;
@@ -69,24 +73,24 @@ struct ib_mad_notice_attr {
__be16 reserved;
__be16 lid; /* where violation happened */
u8 port_num; /* where violation happened */
- } __attribute__ ((packed)) ntc_129_131;
+ } __packed ntc_129_131;
struct {
__be16 reserved;
- __be16 lid; /* LID where change occured */
+ __be16 lid; /* LID where change occurred */
u8 reserved2;
u8 local_changes; /* low bit - local changes */
__be32 new_cap_mask; /* new capability mask */
u8 reserved3;
u8 change_flags; /* low 3 bits only */
- } __attribute__ ((packed)) ntc_144;
+ } __packed ntc_144;
struct {
__be16 reserved;
__be16 lid; /* lid where sys guid changed */
__be16 reserved2;
__be64 new_sys_guid;
- } __attribute__ ((packed)) ntc_145;
+ } __packed ntc_145;
struct {
__be16 reserved;
@@ -100,7 +104,7 @@ struct ib_mad_notice_attr {
u8 reserved3;
u8 dr_trunc_hop;
u8 dr_rtn_path[30];
- } __attribute__ ((packed)) ntc_256;
+ } __packed ntc_256;
struct {
__be16 reserved;
@@ -111,7 +115,7 @@ struct ib_mad_notice_attr {
__be32 qp2; /* high 8 bits reserved */
union ib_gid gid1;
union ib_gid gid2;
- } __attribute__ ((packed)) ntc_257_258;
+ } __packed ntc_257_258;
} details;
};
@@ -180,109 +184,8 @@ struct ib_vl_weight_elem {
#define IB_VLARB_HIGHPRI_0_31 3
#define IB_VLARB_HIGHPRI_32_63 4
-/*
- * PMA class portinfo capability mask bits
- */
-#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
-#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
-#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
-
-#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
-#define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010)
-#define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011)
-#define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012)
-#define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D)
-#define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E)
#define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00)
-struct ib_perf {
- u8 base_version;
- u8 mgmt_class;
- u8 class_version;
- u8 method;
- __be16 status;
- __be16 unused;
- __be64 tid;
- __be16 attr_id;
- __be16 resv;
- __be32 attr_mod;
- u8 reserved[40];
- u8 data[192];
-} __attribute__ ((packed));
-
-struct ib_pma_classportinfo {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- u8 reserved[3];
- u8 resp_time_value; /* only lower 5 bits */
- union ib_gid redirect_gid;
- __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
- __be16 redirect_lid;
- __be16 redirect_pkey;
- __be32 redirect_qp; /* only lower 24 bits */
- __be32 redirect_qkey;
- union ib_gid trap_gid;
- __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
- __be16 trap_lid;
- __be16 trap_pkey;
- __be32 trap_hl_qp; /* 8, 24 bits respectively */
- __be32 trap_qkey;
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplescontrol {
- u8 opcode;
- u8 port_select;
- u8 tick;
- u8 counter_width; /* only lower 3 bits */
- __be32 counter_mask0_9; /* 2, 10 * 3, bits */
- __be16 counter_mask10_14; /* 1, 5 * 3, bits */
- u8 sample_mechanisms;
- u8 sample_status; /* only lower 2 bits */
- __be64 option_mask;
- __be64 vendor_mask;
- __be32 sample_start;
- __be32 sample_interval;
- __be16 tag;
- __be16 counter_select[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplesresult {
- __be16 tag;
- __be16 sample_status; /* only lower 2 bits */
- __be32 counter[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplesresult_ext {
- __be16 tag;
- __be16 sample_status; /* only lower 2 bits */
- __be32 extended_width; /* only upper 2 bits */
- __be64 counter[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portcounters {
- u8 reserved;
- u8 port_select;
- __be16 counter_select;
- __be16 symbol_error_counter;
- u8 link_error_recovery_counter;
- u8 link_downed_counter;
- __be16 port_rcv_errors;
- __be16 port_rcv_remphys_errors;
- __be16 port_rcv_switch_relay_errors;
- __be16 port_xmit_discards;
- u8 port_xmit_constraint_errors;
- u8 port_rcv_constraint_errors;
- u8 reserved1;
- u8 lli_ebor_errors; /* 4, 4, bits */
- __be16 reserved2;
- __be16 vl15_dropped;
- __be32 port_xmit_data;
- __be32 port_rcv_data;
- __be32 port_xmit_packets;
- __be32 port_rcv_packets;
-} __attribute__ ((packed));
-
struct ib_pma_portcounters_cong {
u8 reserved;
u8 reserved1;
@@ -297,7 +200,7 @@ struct ib_pma_portcounters_cong {
u8 port_xmit_constraint_errors;
u8 port_rcv_constraint_errors;
u8 reserved2;
- u8 lli_ebor_errors; /* 4, 4, bits */
+ u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
__be16 reserved3;
__be16 vl15_dropped;
__be64 port_xmit_data;
@@ -306,7 +209,7 @@ struct ib_pma_portcounters_cong {
__be64 port_rcv_packets;
__be64 port_xmit_wait;
__be64 port_adr_events;
-} __attribute__ ((packed));
+} __packed;
#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
@@ -316,48 +219,201 @@ struct ib_pma_portcounters_cong {
/* number of 4nsec cycles equaling 2secs */
#define QIB_CONG_TIMER_PSINTERVAL 0x1DCD64EC
-#define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001)
-#define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002)
-#define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004)
-#define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008)
-#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010)
-#define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040)
-#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200)
-#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400)
-#define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800)
-#define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000)
-#define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000)
-#define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000)
-#define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000)
-
#define IB_PMA_SEL_CONG_ALL 0x01
#define IB_PMA_SEL_CONG_PORT_DATA 0x02
#define IB_PMA_SEL_CONG_XMIT 0x04
#define IB_PMA_SEL_CONG_ROUTING 0x08
-struct ib_pma_portcounters_ext {
- u8 reserved;
- u8 port_select;
- __be16 counter_select;
- __be32 reserved1;
- __be64 port_xmit_data;
- __be64 port_rcv_data;
- __be64 port_xmit_packets;
- __be64 port_rcv_packets;
- __be64 port_unicast_xmit_packets;
- __be64 port_unicast_rcv_packets;
- __be64 port_multicast_xmit_packets;
- __be64 port_multicast_rcv_packets;
-} __attribute__ ((packed));
-
-#define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001)
-#define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002)
-#define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004)
-#define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008)
-#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010)
-#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020)
-#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040)
-#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080)
+/*
+ * Congestion control class attributes
+ */
+#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
+#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
+#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
+#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
+#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
+#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
+#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
+#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
+#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
+#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
+
+/* generalizations for threshold values */
+#define IB_CC_THRESHOLD_NONE 0x0
+#define IB_CC_THRESHOLD_MIN 0x1
+#define IB_CC_THRESHOLD_MAX 0xf
+
+/* CCA MAD header constants */
+#define IB_CC_MAD_LOGDATA_LEN 32
+#define IB_CC_MAD_MGMTDATA_LEN 192
+
+struct ib_cc_mad {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 class_specific;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ __be64 cckey;
+
+ /* For CongestionLog attribute only */
+ u8 log_data[IB_CC_MAD_LOGDATA_LEN];
+
+ u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
+} __packed;
+
+/*
+ * Congestion Control class portinfo capability mask bits
+ */
+#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
+#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
+#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
+#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
+
+struct ib_cc_classportinfo_attr {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ u8 reserved[3];
+ u8 resp_time_value; /* only lower 5 bits */
+ union ib_gid redirect_gid;
+ __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 redirect_lid;
+ __be16 redirect_pkey;
+ __be32 redirect_qp; /* only lower 24 bits */
+ __be32 redirect_qkey;
+ union ib_gid trap_gid;
+ __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 trap_lid;
+ __be16 trap_pkey;
+ __be32 trap_hl_qp; /* 8, 24 bits respectively */
+ __be32 trap_qkey;
+} __packed;
+
+/* Congestion control traps */
+#define IB_CC_TRAP_KEY_VIOLATION 0x0000
+
+struct ib_cc_trap_key_violation_attr {
+ __be16 source_lid;
+ u8 method;
+ u8 reserved1;
+ __be16 attrib_id;
+ __be32 attrib_mod;
+ __be32 qp;
+ __be64 cckey;
+ u8 sgid[16];
+ u8 padding[24];
+} __packed;
+
+/* Congestion info flags */
+#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
+#define IB_CC_TABLE_CAP_DEFAULT 31
+
+struct ib_cc_info_attr {
+ __be16 congestion_info;
+ u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
+} __packed;
+
+struct ib_cc_key_info_attr {
+ __be64 cckey;
+ u8 protect;
+ __be16 lease_period;
+ __be16 violations;
+} __packed;
+
+#define IB_CC_CL_CA_LOGEVENTS_LEN 208
+
+struct ib_cc_log_attr {
+ u8 log_type;
+ u8 congestion_flags;
+ __be16 threshold_event_counter;
+ __be16 threshold_congestion_event_map;
+ __be16 current_time_stamp;
+ u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
+} __packed;
+
+#define IB_CC_CLEC_SERVICETYPE_RC 0x0
+#define IB_CC_CLEC_SERVICETYPE_UC 0x1
+#define IB_CC_CLEC_SERVICETYPE_RD 0x2
+#define IB_CC_CLEC_SERVICETYPE_UD 0x3
+
+struct ib_cc_log_event {
+ u8 local_qp_cn_entry;
+ u8 remote_qp_number_cn_entry[3];
+ u8 sl_cn_entry:4;
+ u8 service_type_cn_entry:4;
+ __be32 remote_lid_cn_entry;
+ __be32 timestamp_cn_entry;
+} __packed;
+
+/* Sixteen congestion entries */
+#define IB_CC_CCS_ENTRIES 16
+
+/* Port control flags */
+#define IB_CC_CCS_PC_SL_BASED 0x01
+
+struct ib_cc_congestion_entry {
+ u8 ccti_increase;
+ __be16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_entry_shadow {
+ u8 ccti_increase;
+ u16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_setting_attr {
+ __be16 port_control;
+ __be16 control_map;
+ struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+struct ib_cc_congestion_setting_attr_shadow {
+ u16 port_control;
+ u16 control_map;
+ struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
+#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
+
+/* 64 Congestion Control table entries in a single MAD */
+#define IB_CCT_ENTRIES 64
+#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
+
+struct ib_cc_table_entry {
+ __be16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_entry_shadow {
+ u16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_attr {
+ __be16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+struct ib_cc_table_attr_shadow {
+ u16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+#define CC_TABLE_SHADOW_MAX \
+ (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
+
+struct cc_table_shadow {
+ u16 ccti_last_entry;
+ struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
+} __packed;
/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
@@ -371,3 +427,5 @@ struct ib_pma_portcounters_ext {
COUNTER_MASK(1, 2) | \
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
+
+#endif /* _QIB_MAD_H */
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 5f95f0f6385..9bbb55347cc 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -39,7 +39,6 @@
/* Fast memory region */
struct qib_fmr {
struct ib_fmr ibfmr;
- u8 page_shift;
struct qib_mregion mr; /* must be last */
};
@@ -48,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
return container_of(ibfmr, struct qib_fmr, ibfmr);
}
+static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
+ int count)
+{
+ int m, i = 0;
+ int rval = 0;
+
+ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
+ for (; i < m; i++) {
+ mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL);
+ if (!mr->map[i])
+ goto bail;
+ }
+ mr->mapsz = m;
+ init_completion(&mr->comp);
+ /* count returning the ptr to user */
+ atomic_set(&mr->refcount, 1);
+ mr->pd = pd;
+ mr->max_segs = count;
+out:
+ return rval;
+bail:
+ while (i)
+ kfree(mr->map[--i]);
+ rval = -ENOMEM;
+ goto out;
+}
+
+static void deinit_qib_mregion(struct qib_mregion *mr)
+{
+ int i = mr->mapsz;
+
+ mr->mapsz = 0;
+ while (i)
+ kfree(mr->map[--i]);
+}
+
+
/**
* qib_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
@@ -59,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
*/
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct qib_ibdev *dev = to_idev(pd->device);
- struct qib_mr *mr;
+ struct qib_mr *mr = NULL;
struct ib_mr *ret;
- unsigned long flags;
+ int rval;
if (to_ipd(pd)->user) {
ret = ERR_PTR(-EPERM);
@@ -75,60 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
goto bail;
}
- mr->mr.access_flags = acc;
- atomic_set(&mr->mr.refcount, 0);
+ rval = init_qib_mregion(&mr->mr, pd, 0);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail;
+ }
- spin_lock_irqsave(&dev->lk_table.lock, flags);
- if (!dev->dma_mr)
- dev->dma_mr = &mr->mr;
- spin_unlock_irqrestore(&dev->lk_table.lock, flags);
+ rval = qib_alloc_lkey(&mr->mr, 1);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail_mregion;
+ }
+
+ mr->mr.access_flags = acc;
ret = &mr->ibmr;
+done:
+ return ret;
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
bail:
- return ret;
+ kfree(mr);
+ goto done;
}
-static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
+static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
{
struct qib_mr *mr;
- int m, i = 0;
+ int rval = -ENOMEM;
+ int m;
/* Allocate struct plus pointers to first level page tables. */
m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
- mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
+ mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
if (!mr)
- goto done;
-
- /* Allocate first level page tables. */
- for (; i < m; i++) {
- mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
- if (!mr->mr.map[i])
- goto bail;
- }
- mr->mr.mapsz = m;
- mr->mr.max_segs = count;
+ goto bail;
+ rval = init_qib_mregion(&mr->mr, pd, count);
+ if (rval)
+ goto bail;
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
- if (!qib_alloc_lkey(lk_table, &mr->mr))
- goto bail;
+ rval = qib_alloc_lkey(&mr->mr, 0);
+ if (rval)
+ goto bail_mregion;
mr->ibmr.lkey = mr->mr.lkey;
mr->ibmr.rkey = mr->mr.lkey;
+done:
+ return mr;
- atomic_set(&mr->mr.refcount, 0);
- goto done;
-
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
bail:
- while (i)
- kfree(mr->mr.map[--i]);
kfree(mr);
- mr = NULL;
-
-done:
- return mr;
+ mr = ERR_PTR(rval);
+ goto done;
}
/**
@@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
int n, m, i;
struct ib_mr *ret;
- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
- if (mr == NULL) {
- ret = ERR_PTR(-ENOMEM);
+ mr = alloc_mr(num_phys_buf, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
goto bail;
}
- mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
- mr->mr.length = 0;
- mr->mr.offset = 0;
mr->mr.access_flags = acc;
- mr->umem = NULL;
m = 0;
n = 0;
@@ -186,7 +221,6 @@ bail:
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the QLogic_IB driver
*
@@ -198,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct qib_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ struct scatterlist *sg;
+ int n, m, entry;
struct ib_mr *ret;
if (length == 0) {
@@ -212,18 +246,15 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
+ n = umem->nmap;
- mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
- if (!mr) {
- ret = ERR_PTR(-ENOMEM);
+ mr = alloc_mr(n, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
ib_umem_release(umem);
goto bail;
}
- mr->mr.pd = pd;
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
@@ -231,13 +262,14 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
+ if (is_power_of_2(umem->page_size))
+ mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
- vaddr = page_address(sg_page(&chunk->page_list[i]));
+ vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
@@ -249,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m++;
n = 0;
}
- }
}
ret = &mr->ibmr;
@@ -269,21 +300,25 @@ bail:
int qib_dereg_mr(struct ib_mr *ibmr)
{
struct qib_mr *mr = to_imr(ibmr);
- struct qib_ibdev *dev = to_idev(ibmr->device);
- int ret;
- int i;
-
- ret = qib_free_lkey(dev, &mr->mr);
- if (ret)
- return ret;
-
- i = mr->mr.mapsz;
- while (i)
- kfree(mr->mr.map[--i]);
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&mr->mr);
+
+ qib_put_mr(&mr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&mr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&mr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&mr->mr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
- return 0;
+out:
+ return ret;
}
/*
@@ -296,17 +331,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct qib_mr *mr;
- mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table);
- if (mr == NULL)
- return ERR_PTR(-ENOMEM);
-
- mr->mr.pd = pd;
- mr->mr.user_base = 0;
- mr->mr.iova = 0;
- mr->mr.length = 0;
- mr->mr.offset = 0;
- mr->mr.access_flags = 0;
- mr->umem = NULL;
+ mr = alloc_mr(max_page_list_len, pd);
+ if (IS_ERR(mr))
+ return (struct ib_mr *)mr;
return &mr->ibmr;
}
@@ -320,11 +347,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
if (size > PAGE_SIZE)
return ERR_PTR(-EINVAL);
- pl = kmalloc(sizeof *pl, GFP_KERNEL);
+ pl = kzalloc(sizeof *pl, GFP_KERNEL);
if (!pl)
return ERR_PTR(-ENOMEM);
- pl->page_list = kmalloc(size, GFP_KERNEL);
+ pl->page_list = kzalloc(size, GFP_KERNEL);
if (!pl->page_list)
goto err_free;
@@ -353,57 +380,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct qib_fmr *fmr;
- int m, i = 0;
+ int m;
struct ib_fmr *ret;
+ int rval = -ENOMEM;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
- fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
+ fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
if (!fmr)
goto bail;
- /* Allocate first level page tables. */
- for (; i < m; i++) {
- fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
- GFP_KERNEL);
- if (!fmr->mr.map[i])
- goto bail;
- }
- fmr->mr.mapsz = m;
+ rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ if (rval)
+ goto bail;
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
- if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
- goto bail;
+ rval = qib_alloc_lkey(&fmr->mr, 0);
+ if (rval)
+ goto bail_mregion;
fmr->ibfmr.rkey = fmr->mr.lkey;
fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
- fmr->mr.pd = pd;
- fmr->mr.user_base = 0;
- fmr->mr.iova = 0;
- fmr->mr.length = 0;
- fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
- fmr->page_shift = fmr_attr->page_shift;
+ fmr->mr.page_shift = fmr_attr->page_shift;
- atomic_set(&fmr->mr.refcount, 0);
ret = &fmr->ibfmr;
- goto done;
+done:
+ return ret;
+bail_mregion:
+ deinit_qib_mregion(&fmr->mr);
bail:
- while (i)
- kfree(fmr->mr.map[--i]);
kfree(fmr);
- ret = ERR_PTR(-ENOMEM);
-
-done:
- return ret;
+ ret = ERR_PTR(rval);
+ goto done;
}
/**
@@ -426,7 +443,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
u32 ps;
int ret;
- if (atomic_read(&fmr->mr.refcount))
+ i = atomic_read(&fmr->mr.refcount);
+ if (i > 2)
return -EBUSY;
if (list_len > fmr->mr.max_segs) {
@@ -437,7 +455,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova;
fmr->mr.iova = iova;
- ps = 1 << fmr->page_shift;
+ ps = 1 << fmr->mr.page_shift;
fmr->mr.length = list_len * ps;
m = 0;
n = 0;
@@ -488,16 +506,27 @@ int qib_unmap_fmr(struct list_head *fmr_list)
int qib_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct qib_fmr *fmr = to_ifmr(ibfmr);
- int ret;
- int i;
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&fmr->mr);
+ qib_put_mr(&fmr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&fmr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&fmr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&fmr->mr);
+ kfree(fmr);
+out:
+ return ret;
+}
- ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr);
- if (ret)
- return ret;
+void mr_rcu_callback(struct rcu_head *list)
+{
+ struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
- i = fmr->mr.mapsz;
- while (i)
- kfree(fmr->mr.map[--i]);
- kfree(fmr);
- return 0;
+ complete(&mr->comp);
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 48b6674cbc4..61a0046efb7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -35,6 +35,7 @@
#include <linux/delay.h>
#include <linux/vmalloc.h>
#include <linux/aer.h>
+#include <linux/module.h>
#include "qib.h"
@@ -50,8 +51,8 @@
* file calls, even though this violates some
* expectations of harmlessness.
*/
-static int qib_tune_pcie_caps(struct qib_devdata *);
-static int qib_tune_pcie_coalesce(struct qib_devdata *);
+static void qib_tune_pcie_caps(struct qib_devdata *);
+static void qib_tune_pcie_coalesce(struct qib_devdata *);
/*
* Do all the common PCIe setup and initialization.
@@ -193,31 +194,50 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd)
}
static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
- struct msix_entry *msix_entry)
+ struct qib_msix_entry *qib_msix_entry)
{
int ret;
- u32 tabsize = 0;
- u16 msix_flags;
-
- pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags);
- tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE);
- if (tabsize > *msixcnt)
- tabsize = *msixcnt;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- if (ret > 0) {
- tabsize = ret;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- }
- if (ret) {
- qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, "
- "falling back to INTx\n", tabsize, ret);
- tabsize = 0;
- }
- *msixcnt = tabsize;
-
- if (ret)
- qib_enable_intx(dd->pcidev);
-
+ int nvec = *msixcnt;
+ struct msix_entry *msix_entry;
+ int i;
+
+ ret = pci_msix_vec_count(dd->pcidev);
+ if (ret < 0)
+ goto do_intx;
+
+ nvec = min(nvec, ret);
+
+ /* We can't pass qib_msix_entry array to qib_msix_setup
+ * so use a dummy msix_entry array and copy the allocated
+ * irq back to the qib_msix_entry array. */
+ msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL);
+ if (!msix_entry)
+ goto do_intx;
+
+ for (i = 0; i < nvec; i++)
+ msix_entry[i] = qib_msix_entry[i].msix;
+
+ ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
+ if (ret < 0)
+ goto free_msix_entry;
+ else
+ nvec = ret;
+
+ for (i = 0; i < nvec; i++)
+ qib_msix_entry[i].msix = msix_entry[i];
+
+ kfree(msix_entry);
+ *msixcnt = nvec;
+ return;
+
+free_msix_entry:
+ kfree(msix_entry);
+
+do_intx:
+ qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, "
+ "falling back to INTx\n", nvec, ret);
+ *msixcnt = 0;
+ qib_enable_intx(dd->pcidev);
}
/**
@@ -233,8 +253,9 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos)
ret = pci_enable_msi(pdev);
if (ret)
- qib_dev_err(dd, "pci_enable_msi failed: %d, "
- "interrupts may not work\n", ret);
+ qib_dev_err(dd,
+ "pci_enable_msi failed: %d, interrupts may not work\n",
+ ret);
/* continue even if it fails, we may still be OK... */
pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
@@ -250,13 +271,12 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos)
}
int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
- struct msix_entry *entry)
+ struct qib_msix_entry *entry)
{
u16 linkstat, speed;
- int pos = 0, pose, ret = 1;
+ int pos = 0, ret = 1;
- pose = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
- if (!pose) {
+ if (!pci_is_pcie(dd->pcidev)) {
qib_dev_err(dd, "Can't find PCI Express capability!\n");
/* set up something... */
dd->lbus_width = 1;
@@ -264,12 +284,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
goto bail;
}
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX);
+ pos = dd->pcidev->msix_cap;
if (nent && *nent && pos) {
qib_msix_setup(dd, pos, nent, entry);
ret = 0; /* did it, either MSIx or INTx */
} else {
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (pos)
ret = qib_msi_setup(dd, pos);
else
@@ -278,7 +298,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
if (!pos)
qib_enable_intx(dd->pcidev);
- pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat);
+ pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
/*
* speed is bits 0-3, linkwidth is bits 4-8
* no defines for them in headers
@@ -338,10 +358,10 @@ int qib_reinit_intr(struct qib_devdata *dd)
if (!dd->msi_lo)
goto bail;
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ pos = dd->pcidev->msi_cap;
if (!pos) {
- qib_dev_err(dd, "Can't find MSI capability, "
- "can't restore MSI settings\n");
+ qib_dev_err(dd,
+ "Can't find MSI capability, can't restore MSI settings\n");
ret = 0;
/* nothing special for MSIx, just MSI */
goto bail;
@@ -407,7 +427,7 @@ void qib_enable_intx(struct pci_dev *pdev)
if (new != cw)
pci_write_config_word(pdev, PCI_COMMAND, new);
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ pos = pdev->msi_cap;
if (pos) {
/* then turn off MSI */
pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
@@ -415,7 +435,7 @@ void qib_enable_intx(struct pci_dev *pdev)
if (new != cw)
pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
}
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ pos = pdev->msix_cap;
if (pos) {
/* then turn off MSIx */
pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw);
@@ -453,34 +473,10 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
r = pci_enable_device(dd->pcidev);
if (r)
- qib_dev_err(dd, "pci_enable_device failed after "
- "reset: %d\n", r);
-}
-
-/* code to adjust PCIe capabilities. */
-
-static int fld2val(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- wd &= mask;
- lsbmask = mask ^ (mask & (mask - 1));
- wd /= lsbmask;
- return wd;
+ qib_dev_err(dd,
+ "pci_enable_device failed after reset: %d\n", r);
}
-static int val2fld(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- lsbmask = mask ^ (mask & (mask - 1));
- wd *= lsbmask;
- return wd;
-}
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
@@ -492,28 +488,26 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
* of these chipsets, with some BIOS settings, and enabling it on those
* systems may result in the system crashing, and/or data corruption.
*/
-static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
+static void qib_tune_pcie_coalesce(struct qib_devdata *dd)
{
int r;
struct pci_dev *parent;
- int ppos;
u16 devid;
u32 mask, bits, val;
if (!qib_pcie_coalesce)
- return 0;
+ return;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
if (parent->bus->parent) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- return 1;
+ return;
}
- ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
- if (!ppos)
- return 1;
+ if (!pci_is_pcie(parent))
+ return;
if (parent->vendor != 0x8086)
- return 1;
+ return;
/*
* - bit 12: Max_rdcmp_Imt_EN: need to set to 1
@@ -526,11 +520,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
*/
devid = parent->device;
if (devid >= 0x25e2 && devid <= 0x25fa) {
- u8 rev;
-
/* 5000 P/V/X/Z */
- pci_read_config_byte(parent, PCI_REVISION_ID, &rev);
- if (rev <= 0xb2)
+ if (parent->revision <= 0xb2)
bits = 1U << 10;
else
bits = 7U << 10;
@@ -549,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
mask = (3U << 24) | (7U << 10);
} else {
/* not one of the chipsets that we know about */
- return 1;
+ return;
}
pci_read_config_dword(parent, 0x48, &val);
val &= ~mask;
val |= bits;
r = pci_write_config_dword(parent, 0x48, val);
- return 0;
}
/*
@@ -564,62 +554,46 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
*/
static int qib_pcie_caps;
module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
-MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)");
+MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-static int qib_tune_pcie_caps(struct qib_devdata *dd)
+static void qib_tune_pcie_caps(struct qib_devdata *dd)
{
- int ret = 1; /* Assume the worst */
struct pci_dev *parent;
- int ppos, epos;
- u16 pcaps, pctl, ecaps, ectl;
- int rc_sup, ep_sup;
- int rc_cur, ep_cur;
+ u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
+ u16 rc_mrrs, ep_mrrs, max_mrrs;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
- if (parent->bus->parent) {
+ if (!pci_is_root_bus(parent->bus)) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- goto bail;
+ return;
}
- ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
- if (ppos) {
- pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps);
- pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
- } else
- goto bail;
+
+ if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
+ return;
+
+ rc_mpss = parent->pcie_mpss;
+ rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
- epos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
- if (epos) {
- pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps);
- pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl);
- } else
- goto bail;
- ret = 0;
- /* Find max payload supported by root, endpoint */
- rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD);
- ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD);
- if (rc_sup > ep_sup)
- rc_sup = ep_sup;
+ ep_mpss = dd->pcidev->pcie_mpss;
+ ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD);
+ /* Find max payload supported by root, endpoint */
+ if (rc_mpss > ep_mpss)
+ rc_mpss = ep_mpss;
/* If Supported greater than limit in module param, limit it */
- if (rc_sup > (qib_pcie_caps & 7))
- rc_sup = qib_pcie_caps & 7;
+ if (rc_mpss > (qib_pcie_caps & 7))
+ rc_mpss = qib_pcie_caps & 7;
/* If less than (allowed, supported), bump root payload */
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl);
+ if (rc_mpss > rc_mps) {
+ rc_mps = rc_mpss;
+ pcie_set_mps(parent, 128 << rc_mps);
}
/* If less than (allowed, supported), bump endpoint payload */
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl);
+ if (rc_mpss > ep_mps) {
+ ep_mps = rc_mpss;
+ pcie_set_mps(dd->pcidev, 128 << ep_mps);
}
/*
@@ -627,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
* No field for max supported, but PCIe spec limits it to 4096,
* which is code '5' (log2(4096) - 7)
*/
- rc_sup = 5;
- if (rc_sup > ((qib_pcie_caps >> 4) & 7))
- rc_sup = (qib_pcie_caps >> 4) & 7;
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ);
-
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ);
- pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl);
+ max_mrrs = 5;
+ if (max_mrrs > ((qib_pcie_caps >> 4) & 7))
+ max_mrrs = (qib_pcie_caps >> 4) & 7;
+
+ max_mrrs = 128 << max_mrrs;
+ rc_mrrs = pcie_get_readrq(parent);
+ ep_mrrs = pcie_get_readrq(dd->pcidev);
+
+ if (max_mrrs > rc_mrrs) {
+ rc_mrrs = max_mrrs;
+ pcie_set_readrq(parent, rc_mrrs);
}
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ);
- pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl);
+ if (max_mrrs > ep_mrrs) {
+ ep_mrrs = max_mrrs;
+ pcie_set_readrq(dd->pcidev, ep_mrrs);
}
-bail:
- return ret;
}
/* End of PCIe capability tuning */
@@ -702,15 +672,16 @@ qib_pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
}
- qib_devinfo(pdev, "QIB mmio_enabled function called, "
- "read wordscntr %Lx, returning %d\n", words, ret);
+ qib_devinfo(pdev,
+ "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ words, ret);
return ret;
}
static pci_ers_result_t
qib_pci_slot_reset(struct pci_dev *pdev)
{
- qib_devinfo(pdev, "QIB link_reset function called, ignored\n");
+ qib_devinfo(pdev, "QIB slot_reset function called, ignored\n");
return PCI_ERS_RESULT_CAN_RECOVER;
}
@@ -735,7 +706,7 @@ qib_pci_resume(struct pci_dev *pdev)
qib_init(dd, 1); /* same as re-init after reset */
}
-struct pci_error_handlers qib_pci_err_handler = {
+const struct pci_error_handlers qib_pci_err_handler = {
.error_detected = qib_pci_error_detected,
.mmio_enabled = qib_pci_mmio_enabled,
.link_reset = qib_pci_link_reset,
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 6c39851d2de..7fcc150d603 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,6 +34,10 @@
#include <linux/err.h>
#include <linux/vmalloc.h>
+#include <linux/jhash.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+#endif
#include "qib.h"
@@ -48,13 +52,12 @@ static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
struct qpn_map *map, unsigned off,
- unsigned r)
+ unsigned n)
{
if (qpt->mask) {
off++;
- if ((off & qpt->mask) >> 1 != r)
- off = ((off & qpt->mask) ?
- (off | qpt->mask) + 1 : off) | (r << 1);
+ if (((off & qpt->mask) >> 1) >= n)
+ off = (off | qpt->mask) + 2;
} else
off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
return off;
@@ -123,7 +126,6 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
u32 ret;
- int r;
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -139,15 +141,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
goto bail;
}
- r = smp_processor_id();
- if (r >= dd->n_krcv_queues)
- r %= dd->n_krcv_queues;
- qpn = qpt->last + 1;
+ qpn = qpt->last + 2;
if (qpn >= QPN_MAX)
qpn = 2;
- if (qpt->mask && ((qpn & qpt->mask) >> 1) != r)
- qpn = ((qpn & qpt->mask) ? (qpn | qpt->mask) + 1 : qpn) |
- (r << 1);
+ if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
+ qpn = (qpn | qpt->mask) + 2;
offset = qpn & BITS_PER_PAGE_MASK;
map = &qpt->map[qpn / BITS_PER_PAGE];
max_scan = qpt->nmaps - !offset;
@@ -163,7 +161,8 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
ret = qpn;
goto bail;
}
- offset = find_next_offset(qpt, map, offset, r);
+ offset = find_next_offset(qpt, map, offset,
+ dd->n_krcv_queues);
qpn = mk_qpn(qpt, map, offset);
/*
* This test differs from alloc_pidmap().
@@ -183,13 +182,13 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
if (qpt->nmaps == QPNMAP_ENTRIES)
break;
map = &qpt->map[qpt->nmaps++];
- offset = qpt->mask ? (r << 1) : 0;
+ offset = 0;
} else if (map < &qpt->map[qpt->nmaps]) {
++map;
- offset = qpt->mask ? (r << 1) : 0;
+ offset = 0;
} else {
map = &qpt->map[0];
- offset = qpt->mask ? (r << 1) : 2;
+ offset = 2;
}
qpn = mk_qpn(qpt, map, offset);
}
@@ -209,6 +208,13 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
}
+static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
+{
+ return jhash_1word(qpn, dev->qp_rnd) &
+ (dev->qp_table_size - 1);
+}
+
+
/*
* Put the QP into the hash table.
* The hash table holds a reference to the QP.
@@ -216,20 +222,20 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
{
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- unsigned n = qp->ibqp.qp_num % dev->qp_table_size;
unsigned long flags;
+ unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
+ atomic_inc(&qp->refcount);
spin_lock_irqsave(&dev->qpt_lock, flags);
if (qp->ibqp.qp_num == 0)
- ibp->qp0 = qp;
+ rcu_assign_pointer(ibp->qp0, qp);
else if (qp->ibqp.qp_num == 1)
- ibp->qp1 = qp;
+ rcu_assign_pointer(ibp->qp1, qp);
else {
qp->next = dev->qp_table[n];
- dev->qp_table[n] = qp;
+ rcu_assign_pointer(dev->qp_table[n], qp);
}
- atomic_inc(&qp->refcount);
spin_unlock_irqrestore(&dev->qpt_lock, flags);
}
@@ -241,29 +247,41 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
{
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct qib_qp *q, **qpp;
+ unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
unsigned long flags;
-
- qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size];
+ int removed = 1;
spin_lock_irqsave(&dev->qpt_lock, flags);
- if (ibp->qp0 == qp) {
- ibp->qp0 = NULL;
- atomic_dec(&qp->refcount);
- } else if (ibp->qp1 == qp) {
- ibp->qp1 = NULL;
- atomic_dec(&qp->refcount);
- } else
- for (; (q = *qpp) != NULL; qpp = &q->next)
+ if (rcu_dereference_protected(ibp->qp0,
+ lockdep_is_held(&dev->qpt_lock)) == qp) {
+ rcu_assign_pointer(ibp->qp0, NULL);
+ } else if (rcu_dereference_protected(ibp->qp1,
+ lockdep_is_held(&dev->qpt_lock)) == qp) {
+ rcu_assign_pointer(ibp->qp1, NULL);
+ } else {
+ struct qib_qp *q;
+ struct qib_qp __rcu **qpp;
+
+ removed = 0;
+ qpp = &dev->qp_table[n];
+ for (; (q = rcu_dereference_protected(*qpp,
+ lockdep_is_held(&dev->qpt_lock))) != NULL;
+ qpp = &q->next)
if (q == qp) {
- *qpp = qp->next;
- qp->next = NULL;
- atomic_dec(&qp->refcount);
+ rcu_assign_pointer(*qpp,
+ rcu_dereference_protected(qp->next,
+ lockdep_is_held(&dev->qpt_lock)));
+ removed = 1;
break;
}
+ }
spin_unlock_irqrestore(&dev->qpt_lock, flags);
+ if (removed) {
+ synchronize_rcu();
+ atomic_dec(&qp->refcount);
+ }
}
/**
@@ -285,21 +303,26 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
if (!qib_mcast_tree_empty(ibp))
qp_inuse++;
- if (ibp->qp0)
+ rcu_read_lock();
+ if (rcu_dereference(ibp->qp0))
qp_inuse++;
- if (ibp->qp1)
+ if (rcu_dereference(ibp->qp1))
qp_inuse++;
+ rcu_read_unlock();
}
spin_lock_irqsave(&dev->qpt_lock, flags);
for (n = 0; n < dev->qp_table_size; n++) {
- qp = dev->qp_table[n];
- dev->qp_table[n] = NULL;
+ qp = rcu_dereference_protected(dev->qp_table[n],
+ lockdep_is_held(&dev->qpt_lock));
+ rcu_assign_pointer(dev->qp_table[n], NULL);
- for (; qp; qp = qp->next)
+ for (; qp; qp = rcu_dereference_protected(qp->next,
+ lockdep_is_held(&dev->qpt_lock)))
qp_inuse++;
}
spin_unlock_irqrestore(&dev->qpt_lock, flags);
+ synchronize_rcu();
return qp_inuse;
}
@@ -314,25 +337,28 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
*/
struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
{
- struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
- unsigned long flags;
- struct qib_qp *qp;
+ struct qib_qp *qp = NULL;
- spin_lock_irqsave(&dev->qpt_lock, flags);
+ rcu_read_lock();
+ if (unlikely(qpn <= 1)) {
+ if (qpn == 0)
+ qp = rcu_dereference(ibp->qp0);
+ else
+ qp = rcu_dereference(ibp->qp1);
+ if (qp)
+ atomic_inc(&qp->refcount);
+ } else {
+ struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
+ unsigned n = qpn_hash(dev, qpn);
- if (qpn == 0)
- qp = ibp->qp0;
- else if (qpn == 1)
- qp = ibp->qp1;
- else
- for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp;
- qp = qp->next)
- if (qp->ibqp.qp_num == qpn)
+ for (qp = rcu_dereference(dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next))
+ if (qp->ibqp.qp_num == qpn) {
+ atomic_inc(&qp->refcount);
break;
- if (qp)
- atomic_inc(&qp->refcount);
-
- spin_unlock_irqrestore(&dev->qpt_lock, flags);
+ }
+ }
+ rcu_read_unlock();
return qp;
}
@@ -393,18 +419,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
unsigned n;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (clr_sends) {
while (qp->s_last != qp->s_head) {
@@ -414,7 +431,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
@@ -424,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
qp->s_last = 0;
}
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
}
@@ -437,7 +454,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
}
@@ -468,6 +485,10 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
del_timer(&qp->s_timer);
}
+
+ if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
+ qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
+
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
@@ -478,7 +499,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
if (!(qp->s_flags & QIB_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_tx) {
@@ -564,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
@@ -766,8 +787,10 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
}
- if (attr_mask & IB_QP_PATH_MTU)
+ if (attr_mask & IB_QP_PATH_MTU) {
qp->path_mtu = pmtu;
+ qp->pmtu = ib_mtu_enum_to_int(pmtu);
+ }
if (attr_mask & IB_QP_RETRY_CNT) {
qp->s_retry_cnt = attr->retry_cnt;
@@ -782,8 +805,12 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MIN_RNR_TIMER)
qp->r_min_rnr_timer = attr->min_rnr_timer;
- if (attr_mask & IB_QP_TIMEOUT)
+ if (attr_mask & IB_QP_TIMEOUT) {
qp->timeout = attr->timeout;
+ qp->timeout_jiffies =
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL);
+ }
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
@@ -958,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
struct ib_qp *ret;
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
- init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+ init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+ init_attr->create_flags) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
@@ -1014,6 +1042,15 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
+ RCU_INIT_POINTER(qp->next, NULL);
+ qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+ if (!qp->s_hdr) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ qp->timeout_jiffies =
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL);
if (init_attr->srq)
sz = 0;
else {
@@ -1061,7 +1098,6 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
}
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
- qp->processor_id = smp_processor_id();
qib_reset_qp(qp, init_attr->qp_type);
break;
@@ -1133,6 +1169,7 @@ bail_ip:
vfree(qp->r_rq.wq);
free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
bail_qp:
+ kfree(qp->s_hdr);
kfree(qp);
bail_swq:
vfree(swq);
@@ -1188,6 +1225,7 @@ int qib_destroy_qp(struct ib_qp *ibqp)
else
vfree(qp->r_rq.wq);
vfree(qp->s_wq);
+ kfree(qp->s_hdr);
kfree(qp);
return 0;
}
@@ -1253,3 +1291,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
}
}
}
+
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter {
+ struct qib_ibdev *dev;
+ struct qib_qp *qp;
+ int n;
+};
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
+{
+ struct qib_qp_iter *iter;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->dev = dev;
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+int qib_qp_iter_next(struct qib_qp_iter *iter)
+{
+ struct qib_ibdev *dev = iter->dev;
+ int n = iter->n;
+ int ret = 1;
+ struct qib_qp *pqp = iter->qp;
+ struct qib_qp *qp;
+
+ rcu_read_lock();
+ for (; n < dev->qp_table_size; n++) {
+ if (pqp)
+ qp = rcu_dereference(pqp->next);
+ else
+ qp = rcu_dereference(dev->qp_table[n]);
+ pqp = qp;
+ if (qp) {
+ if (iter->qp)
+ atomic_dec(&iter->qp->refcount);
+ atomic_inc(&qp->refcount);
+ rcu_read_unlock();
+ iter->qp = qp;
+ iter->n = n;
+ return 0;
+ }
+ }
+ rcu_read_unlock();
+ if (iter->qp)
+ atomic_dec(&iter->qp->refcount);
+ return ret;
+}
+
+static const char * const qp_type_str[] = {
+ "SMI", "GSI", "RC", "UC", "UD",
+};
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
+{
+ struct qib_swqe *wqe;
+ struct qib_qp *qp = iter->qp;
+
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ seq_printf(s,
+ "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
+ iter->n,
+ qp->ibqp.qp_num,
+ qp_type_str[qp->ibqp.qp_type],
+ qp->state,
+ wqe->wr.opcode,
+ qp->s_hdrwords,
+ qp->s_flags,
+ atomic_read(&qp->s_dma_busy),
+ !list_empty(&qp->iowait),
+ qp->timeout,
+ wqe->ssn,
+ qp->s_lsn,
+ qp->s_last_psn,
+ qp->s_psn, qp->s_next_psn,
+ qp->s_sending_psn, qp->s_sending_hpsn,
+ qp->s_last, qp->s_acked, qp->s_cur,
+ qp->s_tail, qp->s_head, qp->s_size,
+ qp->remote_qpn,
+ qp->remote_ah_attr.dlid);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 35b3604b691..fa71b1e666c 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -273,18 +273,12 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
int ret;
int idx;
u16 cks;
- u32 mask;
u8 peek[4];
/* ensure sane contents on invalid reads, for cable swaps */
memset(cp, 0, sizeof(*cp));
- mask = QSFP_GPIO_MOD_PRS_N;
- if (ppd->hw_pidx)
- mask <<= QSFP_GPIO_PORT2_SHIFT;
-
- ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
- if (ret & mask) {
+ if (!qib_qsfp_mod_present(ppd)) {
ret = -ENODEV;
goto bail;
}
@@ -444,6 +438,19 @@ const char * const qib_qsfp_devtech[16] = {
static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
+int qib_qsfp_mod_present(struct qib_pportdata *ppd)
+{
+ u32 mask;
+ int ret;
+
+ mask = QSFP_GPIO_MOD_PRS_N <<
+ (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT);
+ ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
+
+ return !((ret & mask) >>
+ ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3));
+}
+
/*
* Initialize structures that control access to QSFP. Called once per port
* on cards that support QSFP.
@@ -452,7 +459,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
void (*fevent)(struct work_struct *))
{
u32 mask, highs;
- int pins;
struct qib_devdata *dd = qd->ppd->dd;
@@ -474,29 +480,15 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
udelay(20); /* Generous RST dwell */
dd->f_gpio_mod(dd, mask, mask, mask);
- /* Spec says module can take up to two seconds! */
- mask = QSFP_GPIO_MOD_PRS_N;
- if (qd->ppd->hw_pidx)
- mask <<= QSFP_GPIO_PORT2_SHIFT;
-
- /* Do not try to wait here. Better to let event handle it */
- pins = dd->f_gpio_mod(dd, 0, 0, 0);
- if (pins & mask)
- goto bail;
- /* We see a module, but it may be unwise to look yet. Just schedule */
- qd->t_insert = get_jiffies_64();
- schedule_work(&qd->work);
-bail:
return;
}
void qib_qsfp_deinit(struct qib_qsfp_data *qd)
{
/*
- * There is nothing to do here for now. our
- * work is scheduled with schedule_work(), and
- * flush_scheduled_work() from remove_one will
- * block until all work ssetup with schedule_work()
+ * There is nothing to do here for now. our work is scheduled
+ * with queue_work(), and flush_workqueue() from remove_one
+ * will block until all work setup with queue_work()
* completes.
*/
}
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 19b527bafd5..91908f533a2 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -34,6 +34,7 @@
#define QSFP_DEV 0xA0
#define QSFP_PWR_LAG_MSEC 2000
+#define QSFP_MODPRS_LAG_MSEC 20
/*
* Below are masks for various QSFP signals, for Port 1.
@@ -79,6 +80,8 @@
extern const char *const qib_qsfp_devtech[16];
/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
+#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
/* Attenuation should be valid for copper other than full/near Eq */
#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
/* Length is only valid if technology is "copper" */
@@ -174,11 +177,13 @@ struct qib_qsfp_data {
struct qib_pportdata *ppd;
struct work_struct work;
struct qib_qsfp_cache cache;
- u64 t_insert;
+ unsigned long t_insert;
+ u8 modpresent;
};
extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
struct qib_qsfp_cache *cp);
+extern int qib_qsfp_mod_present(struct qib_pportdata *ppd);
extern void qib_qsfp_init(struct qib_qsfp_data *qd,
void (*fevent)(struct work_struct *));
extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 955fb715779..2f2501890c4 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -59,8 +59,7 @@ static void start_timer(struct qib_qp *qp)
qp->s_flags |= QIB_S_TIMER;
qp->s_timer.function = rc_timeout;
/* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies +
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL);
+ qp->s_timer.expires = jiffies + qp->timeout_jiffies;
add_timer(&qp->s_timer);
}
@@ -96,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */
@@ -134,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
/* Copy SGE state in case we need to resend */
qp->s_rdma_mr = e->rdma_sge.mr;
if (qp->s_rdma_mr)
- atomic_inc(&qp->s_rdma_mr->refcount);
+ qib_get_mr(qp->s_rdma_mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -173,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
if (qp->s_rdma_mr)
- atomic_inc(&qp->s_rdma_mr->refcount);
+ qib_get_mr(qp->s_rdma_mr);
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
@@ -239,15 +238,15 @@ int qib_make_rc_req(struct qib_qp *qp)
u32 len;
u32 bth0;
u32 bth2;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ u32 pmtu = qp->pmtu;
char newreq;
unsigned long flags;
int ret = 0;
int delta;
- ohdr = &qp->s_hdr.u.oth;
+ ohdr = &qp->s_hdr->u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr.u.l.oth;
+ ohdr = &qp->s_hdr->u.l.oth;
/*
* The lock is needed to synchronize between the sending tasklet,
@@ -272,13 +271,9 @@ int qib_make_rc_req(struct qib_qp *qp)
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
- while (qp->s_last != qp->s_acked) {
- qib_send_complete(qp, wqe, IB_WC_SUCCESS);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
- }
- qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
+ /* will get called again */
goto done;
}
@@ -757,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp)
qib_flush_wc();
qib_sendbuf_done(dd, pbufn);
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
goto done;
queue_ack:
@@ -1005,7 +1000,8 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
* there are still requests that haven't been acked.
*/
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
- !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)))
+ !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
+ (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
start_timer(qp);
while (qp->s_last != qp->s_acked) {
@@ -1016,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
@@ -1072,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
@@ -1407,6 +1403,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
struct qib_ctxtdata *rcd)
{
struct qib_swqe *wqe;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
enum ib_wc_status status;
unsigned long flags;
int diff;
@@ -1414,7 +1411,32 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
u32 aeth;
u64 val;
+ if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
+ /*
+ * If ACK'd PSN on SDMA busy list try to make progress to
+ * reclaim SDMA credits.
+ */
+ if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
+ (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
+
+ /*
+ * If send tasklet not running attempt to progress
+ * SDMA queue.
+ */
+ if (!(qp->s_flags & QIB_S_BUSY)) {
+ /* Acquire SDMA Lock */
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /* Invoke sdma make progress */
+ qib_sdma_make_progress(ppd);
+ /* Release SDMA Lock */
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto ack_done;
/* Ignore invalid responses. */
if (qib_cmp24(psn, qp->s_next_psn) >= 0)
@@ -1492,9 +1514,7 @@ read_middle:
* 4.096 usec. * (1 << qp->timeout)
*/
qp->s_flags |= QIB_S_TIMER;
- mod_timer(&qp->s_timer, jiffies +
- usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
- 1000UL));
+ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
if (qp->s_flags & QIB_S_WAIT_ACK) {
qp->s_flags &= ~QIB_S_WAIT_ACK;
qib_schedule_send(qp);
@@ -1705,12 +1725,12 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
* same request.
*/
offset = ((psn - e->psn) & QIB_PSN_MASK) *
- ib_mtu_enum_to_int(qp->path_mtu);
+ qp->pmtu;
len = be32_to_cpu(reth->length);
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
if (e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
if (len != 0) {
@@ -1849,7 +1869,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
u32 psn;
u32 pad;
struct ib_wc wc;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ u32 pmtu = qp->pmtu;
int diff;
struct ib_reth *reth;
unsigned long flags;
@@ -1865,10 +1885,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
}
opcode = be32_to_cpu(ohdr->bth[0]);
- spin_lock_irqsave(&qp->s_lock, flags);
if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
- goto sunlock;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
psn = be32_to_cpu(ohdr->bth[2]);
opcode >>= 24;
@@ -1928,8 +1946,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
break;
}
- memset(&wc, 0, sizeof wc);
-
if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
qp->r_flags |= QIB_R_COMM_EST;
if (qp->ibqp.event_handler) {
@@ -1982,16 +1998,19 @@ send_middle:
goto rnr_nak;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
- goto send_last;
- /* FALLTHROUGH */
+ goto no_immediate_data;
+ /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
- /* FALLTHROUGH */
+ goto send_last;
case OP(SEND_LAST):
case OP(RDMA_WRITE_LAST):
+no_immediate_data:
+ wc.wc_flags = 0;
+ wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
@@ -2005,11 +2024,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
break;
@@ -2024,6 +2039,11 @@ send_last:
wc.src_qp = qp->remote_qpn;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
+ /* zero fields that are N/A */
+ wc.vendor_err = 0;
+ wc.pkey_index = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
@@ -2062,7 +2082,7 @@ send_last:
if (opcode == OP(RDMA_WRITE_FIRST))
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
- goto send_last;
+ goto no_immediate_data;
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
goto nack_op_err;
@@ -2092,7 +2112,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
@@ -2164,7 +2184,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
@@ -2186,7 +2206,7 @@ send_last:
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
be64_to_cpu(ateth->compare_data),
sdata);
- atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index eb78d9367f0..4c07a8b34ff 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -110,7 +110,7 @@ bad_lkey:
while (j) {
struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
@@ -260,12 +260,15 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
/*
*
- * This should be called with the QP s_lock held.
+ * This should be called with the QP r_lock held.
+ *
+ * The s_lock will be acquired around the qib_migrate_qp() call.
*/
int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
int has_grh, struct qib_qp *qp, u32 bth0)
{
__be64 guid;
+ unsigned long flags;
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
@@ -295,7 +298,9 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
goto err;
+ spin_lock_irqsave(&qp->s_lock, flags);
qib_migrate_qp(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -496,7 +501,7 @@ again:
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->wr.wr.atomic.swap);
- atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -520,7 +525,7 @@ again:
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
@@ -537,11 +542,7 @@ again:
sqp->s_len -= len;
}
if (release)
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
@@ -683,17 +684,17 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = QIB_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+ qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
&qp->remote_ah_attr.grh,
qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
}
lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
qp->remote_ah_attr.sl << 4;
- qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
qp->remote_ah_attr.src_path_bits);
bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
@@ -702,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
}
/**
@@ -753,7 +755,7 @@ void qib_do_send(struct work_struct *work)
* If the packet cannot be sent now, return and
* the send tasklet will be woken up later.
*/
- if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+ if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size))
break;
/* Record that s_hdr is empty. */
@@ -777,7 +779,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index e9f9f8bc320..911205d3d5a 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/module.h>
#include <linux/firmware.h>
#include "qib.h"
@@ -299,7 +300,7 @@ bail:
}
static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
- const char *where)
+ const char *where)
{
int ret, chn, baduns;
u64 val;
@@ -341,15 +342,17 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0, 0);
if (ret < 0)
- qib_dev_err(dd, "Failed checking TRIMDONE, chn %d"
- " (%s)\n", chn, where);
+ qib_dev_err(dd,
+ "Failed checking TRIMDONE, chn %d (%s)\n",
+ chn, where);
if (!(ret & 0x10)) {
int probe;
baduns |= (1 << chn);
- qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
- " (%s)\n", chn, ret, where);
+ qib_dev_err(dd,
+ "TRIMDONE cleared on chn %d (%02X). (%s)\n",
+ chn, ret, where);
probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_PGUDP(0), 0, 0);
qib_dev_err(dd, "probe is %d (%02X)\n",
@@ -369,13 +372,13 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
/* Read CTRL reg for each channel to check TRIMDONE */
if (baduns & (1 << chn)) {
qib_dev_err(dd,
- "Reseting TRIMDONE on chn %d (%s)\n",
+ "Resetting TRIMDONE on chn %d (%s)\n",
chn, where);
ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0x10, 0x10);
if (ret < 0)
- qib_dev_err(dd, "Failed re-setting "
- "TRIMDONE, chn %d (%s)\n",
+ qib_dev_err(dd,
+ "Failed re-setting TRIMDONE, chn %d (%s)\n",
chn, where);
}
}
@@ -1143,10 +1146,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- qib_dev_err(dd, "pre-read failed: elt %d,"
- " addr 0x%X, chnl %d\n",
- (sloc & 0xF),
- (sloc >> 9) & 0x3f, chnl);
+ qib_dev_err(dd,
+ "pre-read failed: elt %d, addr 0x%X, chnl %d\n",
+ (sloc & 0xF),
+ (sloc >> 9) & 0x3f, chnl);
return ret;
}
val = (ret & ~mask) | (val & mask);
@@ -1156,9 +1159,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- qib_dev_err(dd, "Global WR failed: elt %d,"
- " addr 0x%X, val %02X\n",
- (sloc & 0xF), (sloc >> 9) & 0x3f, val);
+ qib_dev_err(dd,
+ "Global WR failed: elt %d, addr 0x%X, val %02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, val);
}
return ret;
}
@@ -1172,11 +1175,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- qib_dev_err(dd, "Write failed: elt %d,"
- " addr 0x%X, chnl %d, val 0x%02X,"
- " mask 0x%02X\n",
- (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
- val & 0xFF, mask & 0xFF);
+ qib_dev_err(dd,
+ "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+ val & 0xFF, mask & 0xFF);
break;
}
}
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index cad44491320..c6d6a54d2e1 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -32,6 +33,7 @@
#include <linux/spinlock.h>
#include <linux/netdevice.h>
+#include <linux/moduleparam.h>
#include "qib.h"
#include "qib_common.h"
@@ -275,8 +277,8 @@ static int alloc_sdma(struct qib_pportdata *ppd)
GFP_KERNEL);
if (!ppd->sdma_descq) {
- qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor "
- "FIFO memory\n");
+ qib_dev_err(ppd->dd,
+ "failed to allocate SendDMA descriptor FIFO memory\n");
goto bail;
}
@@ -284,8 +286,8 @@ static int alloc_sdma(struct qib_pportdata *ppd)
ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev,
PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL);
if (!ppd->sdma_head_dma) {
- qib_dev_err(ppd->dd, "failed to allocate SendDMA "
- "head memory\n");
+ qib_dev_err(ppd->dd,
+ "failed to allocate SendDMA head memory\n");
goto cleanup_descq;
}
ppd->sdma_head_dma[0] = 0;
@@ -421,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd)
void __qib_sdma_intr(struct qib_pportdata *ppd)
{
- if (__qib_sdma_running(ppd))
+ if (__qib_sdma_running(ppd)) {
qib_sdma_make_progress(ppd);
+ if (!list_empty(&ppd->sdma_userpending))
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ }
}
int qib_setup_sdma(struct qib_pportdata *ppd)
@@ -450,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd)
ppd->sdma_descq_removed = 0;
ppd->sdma_descq_added = 0;
+ ppd->sdma_intrequest = 0;
+ INIT_LIST_HEAD(&ppd->sdma_userpending);
+
INIT_LIST_HEAD(&ppd->sdma_activelist);
tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
@@ -706,6 +714,62 @@ unlock:
return ret;
}
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+void dump_sdma_state(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_desc *descq;
+ struct qib_sdma_txreq *txp, *txpnext;
+ __le64 *descqp;
+ u64 desc[2];
+ u64 addr;
+ u16 gen, dwlen, dwoffset;
+ u16 head, tail, cnt;
+
+ head = ppd->sdma_descq_head;
+ tail = ppd->sdma_descq_tail;
+ cnt = qib_sdma_descq_freecnt(ppd);
+ descq = ppd->sdma_descq;
+
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA ppd->sdma_descq_head: %u\n", head);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA ppd->sdma_descq_tail: %u\n", tail);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sdma_descq_freecnt: %u\n", cnt);
+
+ /* print info for each entry in the descriptor queue */
+ while (head != tail) {
+ char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
+
+ descqp = &descq[head].qw[0];
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+ flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
+ flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
+ flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
+ flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
+ flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
+ addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
+ gen = (desc[0] >> 30) & 3ULL;
+ dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
+ dwoffset = (desc[0] & 0x7ffULL) << 2;
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
+ head, flags, addr, gen, dwlen, dwoffset);
+ if (++head == ppd->sdma_descq_cnt)
+ head = 0;
+ }
+
+ /* print dma descriptor indices from the TX requests */
+ list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
+ list)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
+ txp->start_idx, txp->next_descq_idx);
+}
+
void qib_sdma_process_event(struct qib_pportdata *ppd,
enum qib_sdma_events event)
{
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
index c3ec8efc2ed..d6235931a1b 100644
--- a/drivers/infiniband/hw/qib/qib_srq.c
+++ b/drivers/infiniband/hw/qib/qib_srq.c
@@ -107,6 +107,11 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
u32 sz;
struct ib_srq *ret;
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+ ret = ERR_PTR(-ENOSYS);
+ goto done;
+ }
+
if (srq_init_attr->attr.max_sge == 0 ||
srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
srq_init_attr->attr.max_wr == 0 ||
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index d50a33fe8bb..3c8e4e3caca 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,41 +34,7 @@
#include <linux/ctype.h>
#include "qib.h"
-
-/**
- * qib_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * Returns the number of bytes consumed, or negative value on error.
- */
-static int qib_parse_ushort(const char *str, unsigned short *valp)
-{
- unsigned long val;
- char *end;
- int ret;
-
- if (!isdigit(str[0])) {
- ret = -EINVAL;
- goto bail;
- }
-
- val = simple_strtoul(str, &end, 0);
-
- if (val > 0xffff) {
- ret = -EINVAL;
- goto bail;
- }
-
- *valp = val;
-
- ret = end + 1 - str;
- if (ret == 0)
- ret = -EINVAL;
-
-bail:
- return ret;
-}
+#include "qib_mad.h"
/* start of per-port functions */
/*
@@ -90,7 +57,11 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
int ret;
u16 val;
- ret = qib_parse_ushort(buf, &val);
+ ret = kstrtou16(buf, 0, &val);
+ if (ret) {
+ qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ return ret;
+ }
/*
* Set the "intentional" heartbeat enable per either of
@@ -99,10 +70,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
* because entering loopback mode overrides it and automatically
* disables heartbeat.
*/
- if (ret >= 0)
- ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
- if (ret < 0)
- qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
return ret < 0 ? ret : count;
}
@@ -126,12 +94,14 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
int ret;
u16 val;
- ret = qib_parse_ushort(buf, &val);
- if (ret > 0)
- qib_set_led_override(ppd, val);
- else
+ ret = kstrtou16(buf, 0, &val);
+ if (ret) {
qib_dev_err(dd, "attempt to set invalid LED override\n");
- return ret < 0 ? ret : count;
+ return ret;
+ }
+
+ qib_set_led_override(ppd, val);
+ return count;
}
static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
@@ -150,7 +120,7 @@ static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
* For userland compatibility, these offsets must remain fixed.
* They are strings for QIB_STATUS_*
*/
-static const char *qib_status_str[] = {
+static const char * const qib_status_str[] = {
"Initted",
"",
"",
@@ -231,6 +201,98 @@ static struct attribute *port_default_attributes[] = {
NULL
};
+/*
+ * Start of per-port congestion control structures and support code
+ */
+
+/*
+ * Congestion control table size followed by table entries
+ */
+static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
+ return -EINVAL;
+
+ ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+ + sizeof(__be16);
+
+ if (pos > ret)
+ return -EINVAL;
+
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->ccti_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+ /* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static struct kobj_type qib_port_cc_ktype = {
+ .release = qib_port_release,
+};
+
+static struct bin_attribute cc_table_bin_attr = {
+ .attr = {.name = "cc_table_bin", .mode = 0444},
+ .read = read_cc_table_bin,
+ .size = PAGE_SIZE,
+};
+
+/*
+ * Congestion settings: port control, control map and an array of 16
+ * entries for the congestion entries - increase, timer, event log
+ * trigger threshold and the minimum injection rate delay.
+ */
+static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return -EINVAL;
+
+ ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
+
+ if (pos > ret)
+ return -EINVAL;
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->congestion_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static struct bin_attribute cc_setting_bin_attr = {
+ .attr = {.name = "cc_settings_bin", .mode = 0444},
+ .read = read_cc_setting_bin,
+ .size = PAGE_SIZE,
+};
+
+
static ssize_t qib_portattr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -253,10 +315,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
return pattr->store(ppd, buf, len);
}
-static void qib_port_release(struct kobject *kobj)
-{
- /* nothing to do since memory is freed by qib_free_devdata() */
-}
static const struct sysfs_ops qib_port_ops = {
.show = qib_portattr_show,
@@ -411,12 +469,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, diagc_kobj);
struct qib_ibport *qibp = &ppd->ibport_data;
- char *endp;
- long val = simple_strtol(buf, &endp, 0);
-
- if (val < 0 || endp == buf)
- return -EINVAL;
+ u32 val;
+ int ret;
+ ret = kstrtou32(buf, 0, &val);
+ if (ret)
+ return ret;
*(u32 *)((char *) qibp + dattr->counter) = val;
return size;
}
@@ -503,8 +561,22 @@ static ssize_t show_nctxts(struct device *device,
struct qib_devdata *dd = dd_from_dev(dev);
/* Return the number of user ports (contexts) available. */
- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
- dd->first_user_ctxt);
+ /* The calculation below deals with a special case where
+ * cfgctxts is set to 1 on a single-port board. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
+ (dd->cfgctxts - dd->first_user_ctxt));
+}
+
+static ssize_t show_nfreectxts(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* Return the number of free user ports (contexts) available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
static ssize_t show_serial(struct device *device,
@@ -604,6 +676,7 @@ static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
+static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
@@ -617,6 +690,7 @@ static struct device_attribute *qib_attributes[] = {
&dev_attr_board_id,
&dev_attr_version,
&dev_attr_nctxts,
+ &dev_attr_nfreectxts,
&dev_attr_serial,
&dev_attr_boardversion,
&dev_attr_logged_errors,
@@ -633,8 +707,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
int ret;
if (!port_num || port_num > dd->num_pports) {
- qib_dev_err(dd, "Skipping infiniband class with "
- "invalid port %u\n", port_num);
+ qib_dev_err(dd,
+ "Skipping infiniband class with invalid port %u\n",
+ port_num);
ret = -ENODEV;
goto bail;
}
@@ -643,8 +718,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj,
"linkcontrol");
if (ret) {
- qib_dev_err(dd, "Skipping linkcontrol sysfs info, "
- "(err %d) port %u\n", ret, port_num);
+ qib_dev_err(dd,
+ "Skipping linkcontrol sysfs info, (err %d) port %u\n",
+ ret, port_num);
goto bail;
}
kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
@@ -652,26 +728,70 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj,
"sl2vl");
if (ret) {
- qib_dev_err(dd, "Skipping sl2vl sysfs info, "
- "(err %d) port %u\n", ret, port_num);
- goto bail_sl;
+ qib_dev_err(dd,
+ "Skipping sl2vl sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_link;
}
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj,
"diag_counters");
if (ret) {
- qib_dev_err(dd, "Skipping diag_counters sysfs info, "
- "(err %d) port %u\n", ret, port_num);
- goto bail_diagc;
+ qib_dev_err(dd,
+ "Skipping diag_counters sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_sl;
}
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return 0;
+
+ ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
+ kobj, "CCMgtA");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_diagc;
+ }
+
+ kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc;
+ }
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc_entry_bin;
+ }
+
+ qib_devinfo(dd->pcidev,
+ "IB%u: Congestion Control Agent enabled for port %d\n",
+ dd->unit, port_num);
+
return 0;
+bail_cc_entry_bin:
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
+bail_cc:
+ kobject_put(&ppd->pport_cc_kobj);
bail_diagc:
- kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->diagc_kobj);
bail_sl:
+ kobject_put(&ppd->sl2vl_kobj);
+bail_link:
kobject_put(&ppd->pport_kobj);
bail:
return ret;
@@ -688,10 +808,14 @@ int qib_verbs_register_sysfs(struct qib_devdata *dd)
for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
ret = device_create_file(&dev->dev, qib_attributes[i]);
if (ret)
- return ret;
+ goto bail;
}
return 0;
+bail:
+ for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
+ device_remove_file(&dev->dev, qib_attributes[i]);
+ return ret;
}
/*
@@ -704,7 +828,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
for (i = 0; i < dd->num_pports; i++) {
ppd = &dd->pport[i];
- kobject_put(&ppd->pport_kobj);
+ if (qib_cc_table_size &&
+ ppd->congestion_entries_shadow) {
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ kobject_put(&ppd->pport_cc_kobj);
+ }
kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->pport_kobj);
}
}
diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
index 6f31ca5039d..647f7beb1b0 100644
--- a/drivers/infiniband/hw/qib/qib_twsi.c
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -41,7 +42,7 @@
* QLogic_IB "Two Wire Serial Interface" driver.
* Originally written for a not-quite-i2c serial eeprom, which is
* still used on some supported boards. Later boards have added a
- * variety of other uses, most board-specific, so teh bit-boffing
+ * variety of other uses, most board-specific, so the bit-boffing
* part has been split off to this file, while the other parts
* have been moved to chip-specific files.
*
@@ -449,8 +450,9 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
goto failed_write;
ret = qib_twsi_wr(dd, addr, 0);
if (ret) {
- qib_dev_err(dd, "Failed to write interface"
- " write addr %02X\n", addr);
+ qib_dev_err(dd,
+ "Failed to write interface write addr %02X\n",
+ addr);
goto failed_write;
}
}
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 7f36454c225..31d3561400a 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -36,6 +36,7 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
#include "qib.h"
@@ -294,6 +295,7 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
nbufs = last - first + 1; /* number in range to check */
if (dd->upd_pio_shadow) {
+update_shadow:
/*
* Minor optimization. If we had no buffers on last call,
* start out by doing the update; continue and do scan even
@@ -303,37 +305,39 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
updated++;
}
i = first;
-rescan:
/*
* While test_and_set_bit() is atomic, we do that and then the
* change_bit(), and the pair is not. See if this is the cause
* of the remaining armlaunch errors.
*/
spin_lock_irqsave(&dd->pioavail_lock, flags);
+ if (dd->last_pio >= first && dd->last_pio <= last)
+ i = dd->last_pio + 1;
+ if (!first)
+ /* adjust to min possible */
+ nbufs = last - dd->min_kernel_pio + 1;
for (j = 0; j < nbufs; j++, i++) {
if (i > last)
- i = first;
+ i = !first ? dd->min_kernel_pio : first;
if (__test_and_set_bit((2 * i) + 1, shadow))
continue;
/* flip generation bit */
__change_bit(2 * i, shadow);
/* remember that the buffer can be written to now */
__set_bit(i, dd->pio_writing);
+ if (!first && first != last) /* first == last on VL15, avoid */
+ dd->last_pio = i;
break;
}
spin_unlock_irqrestore(&dd->pioavail_lock, flags);
if (j == nbufs) {
- if (!updated) {
+ if (!updated)
/*
* First time through; shadow exhausted, but may be
* buffers available, try an update and then rescan.
*/
- update_send_bufs(dd);
- updated++;
- i = first;
- goto rescan;
- }
+ goto update_shadow;
no_send_bufs(dd);
buf = NULL;
} else {
@@ -421,14 +425,20 @@ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
__clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
+ start, dd->pioavailshadow);
__set_bit(start, dd->pioavailkernel);
+ if ((start >> 1) < dd->min_kernel_pio)
+ dd->min_kernel_pio = start >> 1;
} else {
__set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
dd->pioavailshadow);
__clear_bit(start, dd->pioavailkernel);
+ if ((start >> 1) > dd->min_kernel_pio)
+ dd->min_kernel_pio = start >> 1;
}
start += 2;
}
+ if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1)
+ dd->last_pio = dd->min_kernel_pio - 1;
spin_unlock_irqrestore(&dd->pioavail_lock, flags);
dd->f_txchk_change(dd, ostart, len, avail, rcd);
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 32ccf3c824c..aa3a8035bb6 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -51,7 +51,7 @@ int qib_make_uc_req(struct qib_qp *qp)
u32 hwords;
u32 bth0;
u32 len;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ u32 pmtu = qp->pmtu;
int ret = 0;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -72,9 +72,9 @@ int qib_make_uc_req(struct qib_qp *qp)
goto done;
}
- ohdr = &qp->s_hdr.u.oth;
+ ohdr = &qp->s_hdr->u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr.u.l.oth;
+ ohdr = &qp->s_hdr->u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
@@ -243,13 +243,12 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct qib_qp *qp)
{
struct qib_other_headers *ohdr;
- unsigned long flags;
u32 opcode;
u32 hdrsize;
u32 psn;
u32 pad;
struct ib_wc wc;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ u32 pmtu = qp->pmtu;
struct ib_reth *reth;
int ret;
@@ -263,14 +262,11 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
}
opcode = be32_to_cpu(ohdr->bth[0]);
- spin_lock_irqsave(&qp->s_lock, flags);
if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
- goto sunlock;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
psn = be32_to_cpu(ohdr->bth[2]);
opcode >>= 24;
- memset(&wc, 0, sizeof wc);
/* Compare the PSN verses the expected PSN. */
if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
@@ -285,11 +281,7 @@ inv:
set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
} else
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
@@ -370,7 +362,7 @@ send_first:
}
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
- goto send_last;
+ goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
/* FALLTHROUGH */
@@ -389,8 +381,11 @@ send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
- /* FALLTHROUGH */
+ goto send_last;
case OP(SEND_LAST):
+no_immediate_data:
+ wc.ex.imm_data = 0;
+ wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
@@ -404,20 +399,20 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
-last_imm:
qib_copy_sge(&qp->r_sge, data, tlen, 0);
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
+last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
+ /* zero fields that are N/A */
+ wc.vendor_err = 0;
+ wc.pkey_index = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
@@ -489,13 +484,7 @@ rdma_last_imm:
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->
- refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
else {
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
@@ -505,6 +494,8 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ qib_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
@@ -520,11 +511,7 @@ rdma_last:
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
break;
default:
@@ -546,6 +533,4 @@ op_err:
qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
-sunlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index e1b3da2a1f8..aaf7039f8ed 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
struct qib_sge *sge;
struct ib_wc wc;
u32 length;
+ enum ib_qp_type sqptype, dqptype;
qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
}
- if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+
+ sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : sqp->ibqp.qp_type;
+ dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : qp->ibqp.qp_type;
+
+ if (dqptype != sqptype ||
!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
ibp->n_pkt_drops++;
goto drop;
@@ -116,7 +123,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
}
/*
- * A GRH is expected to preceed the data even if not
+ * A GRH is expected to precede the data even if not
* present on the wire.
*/
length = swqe->length;
@@ -194,11 +201,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
}
length -= len;
}
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
@@ -277,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp)
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
if (ah_attr->dlid != QIB_PERMISSIVE_LID)
- ibp->n_multicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
} else {
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
/*
@@ -321,11 +324,11 @@ int qib_make_ud_req(struct qib_qp *qp)
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
- qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+ qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
&ah_attr->grh,
qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
- ohdr = &qp->s_hdr.u.l.oth;
+ ohdr = &qp->s_hdr->u.l.oth;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
@@ -333,7 +336,7 @@ int qib_make_ud_req(struct qib_qp *qp)
} else {
/* Header size in 32-bit words. */
lrh0 = QIB_LRH_BTH;
- ohdr = &qp->s_hdr.u.oth;
+ ohdr = &qp->s_hdr->u.oth;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
@@ -346,15 +349,15 @@ int qib_make_ud_req(struct qib_qp *qp)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
else
lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
- qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
- qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
lid = ppd->lid;
if (lid) {
lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
- qp->s_hdr.lrh[3] = cpu_to_be16(lid);
+ qp->s_hdr->lrh[3] = cpu_to_be16(lid);
} else
- qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
+ qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
@@ -445,13 +448,14 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
- /* Get the number of bytes the message was padded by. */
+ /*
+ * Get the number of bytes the message was padded by
+ * and drop incomplete packets.
+ */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- if (unlikely(tlen < (hdrsize + pad + 4))) {
- /* Drop incomplete packets. */
- ibp->n_pkt_drops++;
- goto bail;
- }
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+
tlen -= hdrsize + pad + 4;
/*
@@ -460,10 +464,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
*/
if (qp->ibqp.qp_num) {
if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
- hdr->lrh[3] == IB_LID_PERMISSIVE)) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ hdr->lrh[3] == IB_LID_PERMISSIVE))
+ goto drop;
if (qp->ibqp.qp_num > 1) {
u16 pkey1, pkey2;
@@ -476,7 +478,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
0xF,
src_qp, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
- goto bail;
+ return;
}
}
if (unlikely(qkey != qp->qkey)) {
@@ -484,30 +486,24 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
src_qp, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
- goto bail;
+ return;
}
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen != 256 ||
- (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
+ goto drop;
} else {
struct ib_smp *smp;
/* Drop invalid MAD packets (see 13.5.3.1). */
- if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)
+ goto drop;
smp = (struct ib_smp *) data;
if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE) &&
- smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ goto drop;
}
/*
@@ -519,17 +515,15 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
wc.ex.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
- hdrsize += sizeof(u32);
+ tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
- } else {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ } else
+ goto drop;
/*
- * A GRH is expected to preceed the data even if not
+ * A GRH is expected to precede the data even if not
* present on the wire.
*/
wc.byte_len = tlen + sizeof(struct ib_grh);
@@ -556,8 +550,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_flags |= QIB_R_REUSE_SGE;
- ibp->n_pkt_drops++;
- return;
+ goto drop;
}
if (has_grh) {
qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
@@ -566,11 +559,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
@@ -594,5 +583,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
-bail:;
+ return;
+
+drop:
+ ibp->n_pkt_drops++;
}
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index d7a26c1d4f3..2bc1d2b9629 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -51,8 +51,8 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages,
/*
* Call with current->mm->mmap_sem held.
*/
-static int __get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
{
unsigned long lock_limit;
size_t got;
@@ -74,7 +74,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
goto bail_release;
}
- current->mm->locked_vm += num_pages;
+ current->mm->pinned_vm += num_pages;
ret = 0;
goto bail;
@@ -136,7 +136,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __get_user_pages(start_page, num_pages, p, NULL);
+ ret = __qib_get_user_pages(start_page, num_pages, p, NULL);
up_write(&current->mm->mmap_sem);
@@ -151,7 +151,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
__qib_release_user_pages(p, num_pages, 1);
if (current->mm) {
- current->mm->locked_vm -= num_pages;
+ current->mm->pinned_vm -= num_pages;
up_write(&current->mm->mmap_sem);
}
}
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 4c19e06b5e8..d2806cae234 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -52,21 +52,48 @@
/* attempt to drain the queue for 5secs */
#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+ struct rb_node node;
+ int refcount;
+ pid_t pid;
+};
+
struct qib_user_sdma_pkt {
- u8 naddr; /* dimension of addr (1..3) ... */
+ struct list_head list; /* list element */
+
+ u8 tiddma; /* if this is NEW tid-sdma */
+ u8 largepkt; /* this is large pkt from kmalloc */
+ u16 frag_size; /* frag size used by PSM */
+ u16 index; /* last header index or push index */
+ u16 naddr; /* dimension of addr (1..3) ... */
+ u16 addrlimit; /* addr array size */
+ u16 tidsmidx; /* current tidsm index */
+ u16 tidsmcount; /* tidsm array item count */
+ u16 payload_size; /* payload size so far for header */
+ u32 bytes_togo; /* bytes for processing */
u32 counter; /* sdma pkts queued counter for this entry */
+ struct qib_tid_session_member *tidsm; /* tid session member array */
+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
u64 added; /* global descq number of entries */
struct {
- u32 offset; /* offset for kvaddr, addr */
- u32 length; /* length in page */
- u8 put_page; /* should we put_page? */
- u8 dma_mapped; /* is page dma_mapped? */
+ u16 offset; /* offset for kvaddr, addr */
+ u16 length; /* length in page */
+ u16 first_desc; /* first desc */
+ u16 last_desc; /* last desc */
+ u16 put_page; /* should we put_page? */
+ u16 dma_mapped; /* is page dma_mapped? */
+ u16 dma_length; /* for dma_unmap_page() */
+ u16 padding;
struct page *page; /* may be NULL (coherent mem) */
void *kvaddr; /* FIXME: only for pio hack */
dma_addr_t addr;
} addr[4]; /* max pages, any more and we coalesce */
- struct list_head list; /* list element */
};
struct qib_user_sdma_queue {
@@ -77,6 +104,12 @@ struct qib_user_sdma_queue {
*/
struct list_head sent;
+ /*
+ * Because above list will be accessed by both process and
+ * signal handler, we need a spinlock for it.
+ */
+ spinlock_t sent_lock ____cacheline_aligned_in_smp;
+
/* headers with expected length are allocated from here... */
char header_cache_name[64];
struct dma_pool *header_cache;
@@ -88,27 +121,83 @@ struct qib_user_sdma_queue {
/* as packets go on the queued queue, they are counted... */
u32 counter;
u32 sent_counter;
+ /* pending packets, not sending yet */
+ u32 num_pending;
+ /* sending packets, not complete yet */
+ u32 num_sending;
+ /* global descq number of entry of last sending packet */
+ u64 added;
/* dma page table */
struct rb_root dma_pages_root;
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+
/* protect everything above... */
struct mutex lock;
};
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ sdma_rb_node = container_of(node,
+ struct qib_user_sdma_rb_node, node);
+ if (pid < sdma_rb_node->pid)
+ node = node->rb_left;
+ else if (pid > sdma_rb_node->pid)
+ node = node->rb_right;
+ else
+ return sdma_rb_node;
+ }
+ return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+ struct rb_node **node = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct qib_user_sdma_rb_node *got;
+
+ while (*node) {
+ got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ parent = *node;
+ if (new->pid < got->pid)
+ node = &((*node)->rb_left);
+ else if (new->pid > got->pid)
+ node = &((*node)->rb_right);
+ else
+ return 0;
+ }
+
+ rb_link_node(&new->node, parent, node);
+ rb_insert_color(&new->node, root);
+ return 1;
+}
+
struct qib_user_sdma_queue *
qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
{
struct qib_user_sdma_queue *pq =
kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+ struct qib_user_sdma_rb_node *sdma_rb_node;
if (!pq)
goto done;
pq->counter = 0;
pq->sent_counter = 0;
- INIT_LIST_HEAD(&pq->sent);
+ pq->num_pending = 0;
+ pq->num_sending = 0;
+ pq->added = 0;
+ pq->sdma_rb_node = NULL;
+ INIT_LIST_HEAD(&pq->sent);
+ spin_lock_init(&pq->sent_lock);
mutex_init(&pq->lock);
snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
@@ -131,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->dma_pages_root = RB_ROOT;
+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+ current->pid);
+ if (sdma_rb_node) {
+ sdma_rb_node->refcount++;
+ } else {
+ int ret;
+ sdma_rb_node = kmalloc(sizeof(
+ struct qib_user_sdma_rb_node), GFP_KERNEL);
+ if (!sdma_rb_node)
+ goto err_rb;
+
+ sdma_rb_node->refcount = 1;
+ sdma_rb_node->pid = current->pid;
+
+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+ sdma_rb_node);
+ BUG_ON(ret == 0);
+ }
+ pq->sdma_rb_node = sdma_rb_node;
+
goto done;
+err_rb:
+ dma_pool_destroy(pq->header_cache);
err_slab:
kmem_cache_destroy(pq->pkt_slab);
err_kfree:
@@ -144,34 +255,310 @@ done:
}
static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
- int i, size_t offset, size_t len,
- int put_page, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+ int i, u16 offset, u16 len,
+ u16 first_desc, u16 last_desc,
+ u16 put_page, u16 dma_mapped,
+ struct page *page, void *kvaddr,
+ dma_addr_t dma_addr, u16 dma_length)
{
pkt->addr[i].offset = offset;
pkt->addr[i].length = len;
+ pkt->addr[i].first_desc = first_desc;
+ pkt->addr[i].last_desc = last_desc;
pkt->addr[i].put_page = put_page;
pkt->addr[i].dma_mapped = dma_mapped;
pkt->addr[i].page = page;
pkt->addr[i].kvaddr = kvaddr;
pkt->addr[i].addr = dma_addr;
+ pkt->addr[i].dma_length = dma_length;
+}
+
+static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ size_t len, dma_addr_t *dma_addr)
+{
+ void *hdr;
+
+ if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+ hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ dma_addr);
+ else
+ hdr = NULL;
+
+ if (!hdr) {
+ hdr = kmalloc(len, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ *dma_addr = 0;
+ }
+
+ return hdr;
}
-static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt,
- u32 counter, size_t offset,
- size_t len, int dma_mapped,
- struct page *page,
- void *kvaddr, dma_addr_t dma_addr)
+static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ struct page *page, u16 put,
+ u16 offset, u16 len, void *kvaddr)
{
- pkt->naddr = 1;
- pkt->counter = counter;
- qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
- kvaddr, dma_addr);
+ __le16 *pbc16;
+ void *pbcvaddr;
+ struct qib_message_header *hdr;
+ u16 newlen, pbclen, lastdesc, dma_mapped;
+ u32 vcto;
+ union qib_seqnum seqnum;
+ dma_addr_t pbcdaddr;
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ page, offset, len, DMA_TO_DEVICE);
+ int ret = 0;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ /*
+ * dma mapping error, pkt has not managed
+ * this page yet, return the page here so
+ * the caller can ignore this page.
+ */
+ if (put) {
+ put_page(page);
+ } else {
+ /* coalesce case */
+ kunmap(page);
+ __free_page(page);
+ }
+ ret = -ENOMEM;
+ goto done;
+ }
+ offset = 0;
+ dma_mapped = 1;
+
+
+next_fragment:
+
+ /*
+ * In tid-sdma, the transfer length is restricted by
+ * receiver side current tid page length.
+ */
+ if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
+ newlen = pkt->tidsm[pkt->tidsmidx].length;
+ else
+ newlen = len;
+
+ /*
+ * Then the transfer length is restricted by MTU.
+ * the last descriptor flag is determined by:
+ * 1. the current packet is at frag size length.
+ * 2. the current tid page is done if tid-sdma.
+ * 3. there is no more byte togo if sdma.
+ */
+ lastdesc = 0;
+ if ((pkt->payload_size + newlen) >= pkt->frag_size) {
+ newlen = pkt->frag_size - pkt->payload_size;
+ lastdesc = 1;
+ } else if (pkt->tiddma) {
+ if (newlen == pkt->tidsm[pkt->tidsmidx].length)
+ lastdesc = 1;
+ } else {
+ if (newlen == pkt->bytes_togo)
+ lastdesc = 1;
+ }
+
+ /* fill the next fragment in this page */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ offset, newlen, /* offset, len */
+ 0, lastdesc, /* first last desc */
+ put, dma_mapped, /* put page, dma mapped */
+ page, kvaddr, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->bytes_togo -= newlen;
+ pkt->payload_size += newlen;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* If there is no more byte togo. (lastdesc==1) */
+ if (pkt->bytes_togo == 0) {
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ goto done;
+ }
+
+ /* If tid-sdma, advance tid info. */
+ if (pkt->tiddma) {
+ pkt->tidsm[pkt->tidsmidx].length -= newlen;
+ if (pkt->tidsm[pkt->tidsmidx].length) {
+ pkt->tidsm[pkt->tidsmidx].offset += newlen;
+ } else {
+ pkt->tidsmidx++;
+ if (pkt->tidsmidx == pkt->tidsmcount) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ }
+
+ /*
+ * If this is NOT the last descriptor. (newlen==len)
+ * the current packet is not done yet, but the current
+ * send side page is done.
+ */
+ if (lastdesc == 0)
+ goto done;
+
+ /*
+ * If running this driver under PSM with message size
+ * fitting into one transfer unit, it is not possible
+ * to pass this line. otherwise, it is a buggggg.
+ */
+
+ /*
+ * Since the current packet is done, and there are more
+ * bytes togo, we need to create a new sdma header, copying
+ * from previous sdma header and modify both.
+ */
+ pbclen = pkt->addr[pkt->index].length;
+ pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
+ if (!pbcvaddr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Copy the previous sdma header to new sdma header */
+ pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
+ memcpy(pbcvaddr, pbc16, pbclen);
+
+ /* Modify the previous sdma header */
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* turn on the header suppression */
+ hdr->iph.pkt_flags =
+ cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
+ /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
+ hdr->flags &= ~(0x04|0x20);
+ } else {
+ /* turn off extra bytes: 20-21 bits */
+ hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
+ /* turn off ACK_REQ: 0x04 */
+ hdr->flags &= ~(0x04);
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ /* Modify the new sdma header */
+ pbc16 = (__le16 *)pbcvaddr;
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* Set new tid and offset for new sdma header */
+ hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
+ (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
+ (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
+ (pkt->tidsm[pkt->tidsmidx].offset>>2));
+ } else {
+ /* Middle protocol new packet offset */
+ hdr->uwords[2] += pkt->payload_size;
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* Next sequence number in new sdma header */
+ seqnum.val = be32_to_cpu(hdr->bth[2]);
+ if (pkt->tiddma)
+ seqnum.seq++;
+ else
+ seqnum.pkt++;
+ hdr->bth[2] = cpu_to_be32(seqnum.val);
+
+ /* Init new sdma header. */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ 0, pbclen, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbcvaddr, /* struct page, virt addr */
+ pbcdaddr, pbclen); /* dma addr, dma length */
+ pkt->index = pkt->naddr;
+ pkt->payload_size = 0;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* Prepare for next fragment in this page */
+ if (newlen != len) {
+ if (dma_mapped) {
+ put = 0;
+ dma_mapped = 0;
+ page = NULL;
+ kvaddr = NULL;
+ }
+ len -= newlen;
+ offset += newlen;
+
+ goto next_fragment;
+ }
+
+done:
+ return ret;
}
/* we've too many pages in the iovec, coalesce to a single page */
static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov)
@@ -182,7 +569,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
char *mpage;
int i;
int len = 0;
- dma_addr_t dma_addr;
if (!page) {
ret = -ENOMEM;
@@ -205,17 +591,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
len += iov[i].iov_len;
}
- dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
- ret = -ENOMEM;
- goto free_unmap;
- }
-
- qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
- dma_addr);
- pkt->naddr = 2;
-
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ page, 0, 0, len, mpage_save);
goto done;
free_unmap:
@@ -238,16 +615,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/*
- * Truncate length to page boundry.
- */
-static int qib_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
- const unsigned long offset = addr & ~PAGE_MASK;
-
- return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
static void qib_user_sdma_free_pkt_frag(struct device *dev,
struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
@@ -256,10 +623,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
const int i = frag;
if (pkt->addr[i].page) {
+ /* only user data has page */
if (pkt->addr[i].dma_mapped)
dma_unmap_page(dev,
pkt->addr[i].addr,
- pkt->addr[i].length,
+ pkt->addr[i].dma_length,
DMA_TO_DEVICE);
if (pkt->addr[i].kvaddr)
@@ -269,55 +637,80 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
put_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
- } else if (pkt->addr[i].kvaddr)
- /* free coherent mem from cache... */
- dma_pool_free(pq->header_cache,
+ } else if (pkt->addr[i].kvaddr) {
+ /* for headers */
+ if (pkt->addr[i].dma_mapped) {
+ /* from kmalloc & dma mapped */
+ dma_unmap_single(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+ kfree(pkt->addr[i].kvaddr);
+ } else if (pkt->addr[i].addr) {
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
pkt->addr[i].kvaddr, pkt->addr[i].addr);
+ } else {
+ /* from kmalloc but not dma mapped */
+ kfree(pkt->addr[i].kvaddr);
+ }
+ }
}
/* return number of pages pinned... */
static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
struct qib_user_sdma_pkt *pkt,
unsigned long addr, int tlen, int npages)
{
- struct page *pages[2];
- int j;
- int ret;
-
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
- if (ret != npages) {
- int i;
-
- for (i = 0; i < ret; i++)
- put_page(pages[i]);
-
- ret = -ENOMEM;
- goto done;
- }
+ struct page *pages[8];
+ int i, j;
+ int ret = 0;
- for (j = 0; j < npages; j++) {
- /* map the pages... */
- const int flen = qib_user_sdma_page_length(addr, tlen);
- dma_addr_t dma_addr =
- dma_map_page(&dd->pcidev->dev,
- pages[j], 0, flen, DMA_TO_DEVICE);
- unsigned long fofs = addr & ~PAGE_MASK;
+ while (npages) {
+ if (npages > 8)
+ j = 8;
+ else
+ j = npages;
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = get_user_pages_fast(addr, j, 0, pages);
+ if (ret != j) {
+ i = 0;
+ j = ret;
ret = -ENOMEM;
- goto done;
+ goto free_pages;
}
- qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
- pages[j], kmap(pages[j]), dma_addr);
+ for (i = 0; i < j; i++) {
+ /* map the pages... */
+ unsigned long fofs = addr & ~PAGE_MASK;
+ int flen = ((fofs + tlen) > PAGE_SIZE) ?
+ (PAGE_SIZE - fofs) : tlen;
+
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ pages[i], 1, fofs, flen, NULL);
+ if (ret < 0) {
+ /* current page has beed taken
+ * care of inside above call.
+ */
+ i++;
+ goto free_pages;
+ }
+
+ addr += flen;
+ tlen -= flen;
+ }
- pkt->naddr++;
- addr += flen;
- tlen -= flen;
+ npages -= j;
}
+ goto done;
+
+ /* if error, return all pages not managed by pkt */
+free_pages:
+ while (i < j)
+ put_page(pages[i++]);
+
done:
return ret;
}
@@ -335,7 +728,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
const int npages = qib_user_sdma_num_pages(iov + idx);
const unsigned long addr = (unsigned long) iov[idx].iov_base;
- ret = qib_user_sdma_pin_pages(dd, pkt, addr,
+ ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
iov[idx].iov_len, npages);
if (ret < 0)
goto free_pkt;
@@ -344,9 +737,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
goto done;
free_pkt:
- for (idx = 0; idx < pkt->naddr; idx++)
+ /* we need to ignore the first entry here */
+ for (idx = 1; idx < pkt->naddr; idx++)
qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+ /* need to dma unmap the first entry, this is to restore to
+ * the original state so that caller can free the memory in
+ * error condition. Caller does not know if dma mapped or not*/
+ if (pkt->addr[0].dma_mapped) {
+ dma_unmap_single(&dd->pcidev->dev,
+ pkt->addr[0].addr,
+ pkt->addr[0].dma_length,
+ DMA_TO_DEVICE);
+ pkt->addr[0].addr = 0;
+ pkt->addr[0].dma_mapped = 0;
+ }
+
done:
return ret;
}
@@ -359,8 +765,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
{
int ret = 0;
- if (npages >= ARRAY_SIZE(pkt->addr))
- ret = qib_user_sdma_coalesce(dd, pkt, iov, niov);
+ if (pkt->frag_size == pkt->bytes_togo &&
+ npages >= ARRAY_SIZE(pkt->addr))
+ ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
else
ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
@@ -380,8 +787,12 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
for (i = 0; i < pkt->naddr; i++)
qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
- kmem_cache_free(pq->pkt_slab, pkt);
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
}
+ INIT_LIST_HEAD(list);
}
/*
@@ -392,63 +803,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
* as, if there is an error we clean it...
*/
static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq,
- struct list_head *list,
const struct iovec *iov,
unsigned long niov,
- int maxpkts)
+ struct list_head *list,
+ int *maxpkts, int *ndesc)
{
unsigned long idx = 0;
int ret = 0;
int npkts = 0;
- struct page *page = NULL;
__le32 *pbc;
dma_addr_t dma_addr;
struct qib_user_sdma_pkt *pkt = NULL;
size_t len;
size_t nw;
u32 counter = pq->counter;
- int dma_mapped = 0;
+ u16 frag_size;
- while (idx < niov && npkts < maxpkts) {
+ while (idx < niov && npkts < *maxpkts) {
const unsigned long addr = (unsigned long) iov[idx].iov_base;
const unsigned long idx_save = idx;
unsigned pktnw;
unsigned pktnwc;
int nfrags = 0;
int npages = 0;
+ int bytes_togo = 0;
+ int tiddma = 0;
int cfur;
- dma_mapped = 0;
len = iov[idx].iov_len;
nw = len >> 2;
- page = NULL;
-
- pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
- if (!pkt) {
- ret = -ENOMEM;
- goto free_list;
- }
if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
len > PAGE_SIZE || len & 3 || addr & 3) {
ret = -EINVAL;
- goto free_pkt;
+ goto free_list;
}
- if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
- pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
- &dma_addr);
- else
- pbc = NULL;
-
+ pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
if (!pbc) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto free_pkt;
- }
- pbc = kmap(page);
+ ret = -ENOMEM;
+ goto free_list;
}
cfur = copy_from_user(pbc, iov[idx].iov_base, len);
@@ -473,8 +869,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
* we can verify that the packet is consistent with the
* iovec lengths.
*/
- pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK;
- if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ pktnw = le32_to_cpu(*pbc) & 0xFFFF;
+ if (pktnw < pktnwc) {
ret = -EINVAL;
goto free_pbc;
}
@@ -485,17 +881,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
const unsigned long faddr =
(unsigned long) iov[idx].iov_base;
- if (slen & 3 || faddr & 3 || !slen ||
- slen > PAGE_SIZE) {
+ if (slen & 3 || faddr & 3 || !slen) {
ret = -EINVAL;
goto free_pbc;
}
- npages++;
- if ((faddr & PAGE_MASK) !=
- ((faddr + slen - 1) & PAGE_MASK))
- npages++;
+ npages += qib_user_sdma_num_pages(&iov[idx]);
+ bytes_togo += slen;
pktnwc += slen >> 2;
idx++;
nfrags++;
@@ -506,48 +899,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
goto free_pbc;
}
- if (page) {
- dma_addr = dma_map_page(&dd->pcidev->dev,
- page, 0, len, DMA_TO_DEVICE);
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
+ if (((frag_size ? frag_size : bytes_togo) + len) >
+ ppd->ibmaxlen) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (frag_size) {
+ int pktsize, tidsmsize, n;
+
+ n = npages*((2*PAGE_SIZE/frag_size)+1);
+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n;
+
+ /*
+ * Determine if this is tid-sdma or just sdma.
+ */
+ tiddma = (((le32_to_cpu(pbc[7])>>
+ QLOGIC_IB_I_TID_SHIFT)&
+ QLOGIC_IB_I_TID_MASK) !=
+ QLOGIC_IB_I_TID_MASK);
+
+ if (tiddma)
+ tidsmsize = iov[idx].iov_len;
+ else
+ tidsmsize = 0;
+
+ pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL);
+ if (!pkt) {
ret = -ENOMEM;
goto free_pbc;
}
+ pkt->largepkt = 1;
+ pkt->frag_size = frag_size;
+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
+
+ if (tiddma) {
+ char *tidsm = (char *)pkt + pktsize;
+ cfur = copy_from_user(tidsm,
+ iov[idx].iov_base, tidsmsize);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pkt;
+ }
+ pkt->tidsm =
+ (struct qib_tid_session_member *)tidsm;
+ pkt->tidsmcount = tidsmsize/
+ sizeof(struct qib_tid_session_member);
+ pkt->tidsmidx = 0;
+ idx++;
+ }
- dma_mapped = 1;
+ /*
+ * pbc 'fill1' field is borrowed to pass frag size,
+ * we need to clear it after picking frag size, the
+ * hardware requires this field to be zero.
+ */
+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
+ } else {
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+ pkt->largepkt = 0;
+ pkt->frag_size = bytes_togo;
+ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
}
-
- qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
- page, pbc, dma_addr);
+ pkt->bytes_togo = bytes_togo;
+ pkt->payload_size = 0;
+ pkt->counter = counter;
+ pkt->tiddma = tiddma;
+
+ /* setup the first header */
+ qib_user_sdma_init_frag(pkt, 0, /* index */
+ 0, len, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbc, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->index = 0;
+ pkt->naddr = 1;
if (nfrags) {
ret = qib_user_sdma_init_payload(dd, pq, pkt,
iov + idx_save + 1,
nfrags, npages);
if (ret < 0)
- goto free_pbc_dma;
+ goto free_pkt;
+ } else {
+ /* since there is no payload, mark the
+ * header as the last desc. */
+ pkt->addr[0].last_desc = 1;
+
+ if (dma_addr == 0) {
+ /*
+ * the header is not dma mapped yet.
+ * it should be from kmalloc.
+ */
+ dma_addr = dma_map_single(&dd->pcidev->dev,
+ pbc, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pkt->addr[0].addr = dma_addr;
+ pkt->addr[0].dma_mapped = 1;
+ }
}
counter++;
npkts++;
+ pkt->pq = pq;
+ pkt->index = 0; /* reset index for push on hw */
+ *ndesc += pkt->naddr;
list_add_tail(&pkt->list, list);
}
+ *maxpkts = npkts;
ret = idx;
goto done;
-free_pbc_dma:
- if (dma_mapped)
- dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pkt:
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
free_pbc:
- if (page) {
- kunmap(page);
- __free_page(page);
- } else
+ if (dma_addr)
dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
- kmem_cache_free(pq->pkt_slab, pkt);
+ else
+ kfree(pbc);
free_list:
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
done:
@@ -568,10 +1052,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
struct list_head free_list;
struct qib_user_sdma_pkt *pkt;
struct qib_user_sdma_pkt *pkt_prev;
+ unsigned long flags;
int ret = 0;
+ if (!pq->num_sending)
+ return 0;
+
INIT_LIST_HEAD(&free_list);
+ /*
+ * We need this spin lock here because interrupt handler
+ * might modify this list in qib_user_sdma_send_desc(), also
+ * we can not get interrupted, otherwise it is a deadlock.
+ */
+ spin_lock_irqsave(&pq->sent_lock, flags);
list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
s64 descd = ppd->sdma_descq_removed - pkt->added;
@@ -582,7 +1076,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
/* one more packet cleaned */
ret++;
+ pq->num_sending--;
}
+ spin_unlock_irqrestore(&pq->sent_lock, flags);
if (!list_empty(&free_list)) {
u32 counter;
@@ -603,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
if (!pq)
return;
- kmem_cache_destroy(pq->pkt_slab);
+ pq->sdma_rb_node->refcount--;
+ if (pq->sdma_rb_node->refcount == 0) {
+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+ kfree(pq->sdma_rb_node);
+ }
dma_pool_destroy(pq->header_cache);
+ kmem_cache_destroy(pq->pkt_slab);
kfree(pq);
}
@@ -626,6 +1127,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq)
{
struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
int i;
if (!pq)
@@ -633,7 +1135,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
mutex_lock(&pq->lock);
- if (list_empty(&pq->sent)) {
+ if (!pq->num_pending && !pq->num_sending) {
mutex_unlock(&pq->lock);
break;
}
@@ -643,29 +1145,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
msleep(10);
}
- if (!list_empty(&pq->sent)) {
+ if (pq->num_pending || pq->num_sending) {
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
struct list_head free_list;
+ mutex_lock(&pq->lock);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /*
+ * Since we hold sdma_lock, it is safe without sent_lock.
+ */
+ if (pq->num_pending) {
+ list_for_each_entry_safe(pkt, pkt_prev,
+ &ppd->sdma_userpending, list) {
+ if (pkt->pq == pq) {
+ list_move_tail(&pkt->list, &pq->sent);
+ pq->num_pending--;
+ pq->num_sending++;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
INIT_LIST_HEAD(&free_list);
- mutex_lock(&pq->lock);
list_splice_init(&pq->sent, &free_list);
+ pq->num_sending = 0;
qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
mutex_unlock(&pq->lock);
}
}
-static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd,
+static inline __le64 qib_sdma_make_desc0(u8 gen,
u64 addr, u64 dwlen, u64 dwoffset)
{
- u8 tmpgen;
-
- tmpgen = ppd->sdma_generation;
-
return cpu_to_le64(/* SDmaPhyAddr[31:0] */
((addr & 0xfffffffcULL) << 32) |
/* SDmaGeneration[1:0] */
- ((tmpgen & 3ULL) << 30) |
+ ((gen & 3ULL) << 30) |
/* SDmaDwordCount[10:0] */
((dwlen & 0x7ffULL) << 16) |
/* SDmaBufOffset[12:2] */
@@ -691,7 +1208,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr)
static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
struct qib_user_sdma_pkt *pkt, int idx,
- unsigned ofs, u16 tail)
+ unsigned ofs, u16 tail, u8 gen)
{
const u64 addr = (u64) pkt->addr[idx].addr +
(u64) pkt->addr[idx].offset;
@@ -701,105 +1218,159 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
descqp = &ppd->sdma_descq[tail].qw[0];
- descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs);
- if (idx == 0)
+ descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
+ if (pkt->addr[idx].first_desc)
descq0 = qib_sdma_make_first_desc0(descq0);
- if (idx == pkt->naddr - 1)
+ if (pkt->addr[idx].last_desc) {
descq0 = qib_sdma_make_last_desc0(descq0);
+ if (ppd->sdma_intrequest) {
+ descq0 |= cpu_to_le64(1ULL << 15);
+ ppd->sdma_intrequest = 0;
+ }
+ }
descqp[0] = descq0;
descqp[1] = qib_sdma_make_desc1(addr);
}
-/* pq->lock must be held, get packets on the wire... */
-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
- struct qib_user_sdma_queue *pq,
- struct list_head *pktlist)
+void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
+ struct list_head *pktlist)
{
struct qib_devdata *dd = ppd->dd;
- int ret = 0;
- unsigned long flags;
- u16 tail;
- u8 generation;
- u64 descq_added;
-
- if (list_empty(pktlist))
- return 0;
-
- if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
- return -ECOMM;
-
- spin_lock_irqsave(&ppd->sdma_lock, flags);
+ u16 nfree, nsent;
+ u16 tail, tail_c;
+ u8 gen, gen_c;
- /* keep a copy for restoring purposes in case of problems */
- generation = ppd->sdma_generation;
- descq_added = ppd->sdma_descq_added;
-
- if (unlikely(!__qib_sdma_running(ppd))) {
- ret = -ECOMM;
- goto unlock;
- }
+ nfree = qib_sdma_descq_freecnt(ppd);
+ if (!nfree)
+ return;
- tail = ppd->sdma_descq_tail;
+retry:
+ nsent = 0;
+ tail_c = tail = ppd->sdma_descq_tail;
+ gen_c = gen = ppd->sdma_generation;
while (!list_empty(pktlist)) {
struct qib_user_sdma_pkt *pkt =
list_entry(pktlist->next, struct qib_user_sdma_pkt,
list);
- int i;
+ int i, j, c = 0;
unsigned ofs = 0;
u16 dtail = tail;
- if (pkt->naddr > qib_sdma_descq_freecnt(ppd))
- goto unlock_check_tail;
-
- for (i = 0; i < pkt->naddr; i++) {
- qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail);
+ for (i = pkt->index; i < pkt->naddr && nfree; i++) {
+ qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
ofs += pkt->addr[i].length >> 2;
if (++tail == ppd->sdma_descq_cnt) {
tail = 0;
- ++ppd->sdma_generation;
+ ++gen;
+ ppd->sdma_intrequest = 1;
+ } else if (tail == (ppd->sdma_descq_cnt>>1)) {
+ ppd->sdma_intrequest = 1;
}
- }
-
- if ((ofs << 2) > ppd->ibmaxlen) {
- ret = -EMSGSIZE;
- goto unlock;
- }
+ nfree--;
+ if (pkt->addr[i].last_desc == 0)
+ continue;
- /*
- * If the packet is >= 2KB mtu equivalent, we have to use
- * the large buffers, and have to mark each descriptor as
- * part of a large buffer packet.
- */
- if (ofs > dd->piosize2kmax_dwords) {
- for (i = 0; i < pkt->naddr; i++) {
- ppd->sdma_descq[dtail].qw[0] |=
- cpu_to_le64(1ULL << 14);
- if (++dtail == ppd->sdma_descq_cnt)
- dtail = 0;
+ /*
+ * If the packet is >= 2KB mtu equivalent, we
+ * have to use the large buffers, and have to
+ * mark each descriptor as part of a large
+ * buffer packet.
+ */
+ if (ofs > dd->piosize2kmax_dwords) {
+ for (j = pkt->index; j <= i; j++) {
+ ppd->sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == ppd->sdma_descq_cnt)
+ dtail = 0;
+ }
}
+ c += i + 1 - pkt->index;
+ pkt->index = i + 1; /* index for next first */
+ tail_c = dtail = tail;
+ gen_c = gen;
+ ofs = 0; /* reset for next packet */
}
- ppd->sdma_descq_added += pkt->naddr;
- pkt->added = ppd->sdma_descq_added;
- list_move_tail(&pkt->list, &pq->sent);
- ret++;
+ ppd->sdma_descq_added += c;
+ nsent += c;
+ if (pkt->index == pkt->naddr) {
+ pkt->added = ppd->sdma_descq_added;
+ pkt->pq->added = pkt->added;
+ pkt->pq->num_pending--;
+ spin_lock(&pkt->pq->sent_lock);
+ pkt->pq->num_sending++;
+ list_move_tail(&pkt->list, &pkt->pq->sent);
+ spin_unlock(&pkt->pq->sent_lock);
+ }
+ if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
+ break;
}
-unlock_check_tail:
/* advance the tail on the chip if necessary */
- if (ppd->sdma_descq_tail != tail)
- dd->f_sdma_update_tail(ppd, tail);
+ if (ppd->sdma_descq_tail != tail_c) {
+ ppd->sdma_generation = gen_c;
+ dd->f_sdma_update_tail(ppd, tail_c);
+ }
+
+ if (nfree && !list_empty(pktlist))
+ goto retry;
+
+ return;
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *pktlist, int count)
+{
+ unsigned long flags;
+
+ if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+ return -ECOMM;
-unlock:
- if (unlikely(ret < 0)) {
- ppd->sdma_generation = generation;
- ppd->sdma_descq_added = descq_added;
+ /* non-blocking mode */
+ if (pq->sdma_rb_node->refcount > 1) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return 0;
}
- spin_unlock_irqrestore(&ppd->sdma_lock, flags);
- return ret;
+ /* In this case, descriptors from this process are not
+ * linked to ppd pending queue, interrupt handler
+ * won't update this process, it is OK to directly
+ * modify without sdma lock.
+ */
+
+
+ pq->num_pending += count;
+ /*
+ * Blocking mode for single rail process, we must
+ * release/regain sdma_lock to give other process
+ * chance to make progress. This is important for
+ * performance.
+ */
+ do {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ qib_user_sdma_send_desc(ppd, pktlist);
+ if (!list_empty(pktlist))
+ qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ } while (!list_empty(pktlist));
+
+ return 0;
}
int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -821,19 +1392,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
if (!qib_sdma_running(ppd))
goto done_unlock;
- if (ppd->sdma_descq_added != ppd->sdma_descq_removed) {
+ /* if I have packets not complete yet */
+ if (pq->added > ppd->sdma_descq_removed)
qib_user_sdma_hwqueue_clean(ppd);
+ /* if I have complete packets to be freed */
+ if (pq->num_sending)
qib_user_sdma_queue_clean(ppd, pq);
- }
while (dim) {
- const int mxp = 8;
+ int mxp = 1;
+ int ndesc = 0;
- down_write(&current->mm->mmap_sem);
- ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
- up_write(&current->mm->mmap_sem);
-
- if (ret <= 0)
+ ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
+ iov, dim, &list, &mxp, &ndesc);
+ if (ret < 0)
goto done_unlock;
else {
dim -= ret;
@@ -843,24 +1415,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
/* force packets onto the sdma hw queue... */
if (!list_empty(&list)) {
/*
- * Lazily clean hw queue. the 4 is a guess of about
- * how many sdma descriptors a packet will take (it
- * doesn't have to be perfect).
+ * Lazily clean hw queue.
*/
- if (qib_sdma_descq_freecnt(ppd) < ret * 4) {
+ if (qib_sdma_descq_freecnt(ppd) < ndesc) {
qib_user_sdma_hwqueue_clean(ppd);
- qib_user_sdma_queue_clean(ppd, pq);
+ if (pq->num_sending)
+ qib_user_sdma_queue_clean(ppd, pq);
}
- ret = qib_user_sdma_push_pkts(ppd, pq, &list);
+ ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
if (ret < 0)
goto done_unlock;
else {
- npkts += ret;
- pq->counter += ret;
-
- if (!list_empty(&list))
- goto done_unlock;
+ npkts += mxp;
+ pq->counter += mxp;
}
}
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 9fab4048885..9bcfbd84298 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -35,14 +35,16 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_verbs.h>
#include <linux/io.h>
+#include <linux/module.h>
#include <linux/utsname.h>
#include <linux/rculist.h>
#include <linux/mm.h>
+#include <linux/random.h>
#include "qib.h"
#include "qib_common.h"
-static unsigned int ib_qib_qp_table_size = 251;
+static unsigned int ib_qib_qp_table_size = 256;
module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -181,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
@@ -222,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
@@ -331,7 +333,8 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
* @qp: the QP to post on
* @wr: the work request to send
*/
-static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr)
+static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
+ int *scheduled)
{
struct qib_swqe *wqe;
u32 next;
@@ -433,11 +436,17 @@ bail_inval_free:
while (j) {
struct qib_sge *sge = &wqe->sg_list[--j];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
bail_inval:
ret = -EINVAL;
bail:
+ if (!ret && !wr->next &&
+ !qib_sdma_empty(
+ dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
+ qib_schedule_send(qp);
+ *scheduled = 1;
+ }
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -455,9 +464,10 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct qib_qp *qp = to_iqp(ibqp);
int err = 0;
+ int scheduled = 0;
for (; wr; wr = wr->next) {
- err = qib_post_one_send(qp, wr);
+ err = qib_post_one_send(qp, wr, &scheduled);
if (err) {
*bad_wr = wr;
goto bail;
@@ -465,7 +475,8 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
/* Try to do the send work in the caller's context. */
- qib_do_send(&qp->s_work);
+ if (!scheduled)
+ qib_do_send(&qp->s_work);
bail:
return err;
@@ -634,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
} else
goto drop;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- ibp->opstats[opcode & 0x7f].n_bytes += tlen;
- ibp->opstats[opcode & 0x7f].n_packets++;
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
+#ifdef CONFIG_DEBUG_FS
+ rcd->opstats->stats[opcode].n_bytes += tlen;
+ rcd->opstats->stats[opcode].n_packets++;
+#endif
/* Get the destination QP number. */
qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
@@ -649,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
if (mcast == NULL)
goto drop;
- ibp->n_multicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_multicast_rcv);
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
/*
@@ -659,17 +672,25 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
- qp = qib_lookup_qpn(ibp, qp_num);
- if (!qp)
- goto drop;
- ibp->n_unicast_rcv++;
+ if (rcd->lookaside_qp) {
+ if (rcd->lookaside_qpn != qp_num) {
+ if (atomic_dec_and_test(
+ &rcd->lookaside_qp->refcount))
+ wake_up(
+ &rcd->lookaside_qp->wait);
+ rcd->lookaside_qp = NULL;
+ }
+ }
+ if (!rcd->lookaside_qp) {
+ qp = qib_lookup_qpn(ibp, qp_num);
+ if (!qp)
+ goto drop;
+ rcd->lookaside_qp = qp;
+ rcd->lookaside_qpn = qp_num;
+ } else
+ qp = rcd->lookaside_qp;
+ this_cpu_inc(ibp->pmastats->n_unicast_rcv);
qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
- /*
- * Notify qib_destroy_qp() if it is waiting
- * for us to finish.
- */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
}
return;
@@ -903,8 +924,8 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
__raw_writel(last, piobuf);
}
-static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
- struct qib_qp *qp, int *retp)
+static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
+ struct qib_qp *qp)
{
struct qib_verbs_txreq *tx;
unsigned long flags;
@@ -916,8 +937,9 @@ static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
struct list_head *l = dev->txreq_free.next;
list_del(l);
+ spin_unlock(&dev->pending_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
- *retp = 0;
} else {
if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
list_empty(&qp->iowait)) {
@@ -925,14 +947,33 @@ static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
qp->s_flags |= QIB_S_WAIT_TX;
list_add_tail(&qp->iowait, &dev->txwait);
}
- tx = NULL;
qp->s_flags &= ~QIB_S_BUSY;
- *retp = -EBUSY;
+ spin_unlock(&dev->pending_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ tx = ERR_PTR(-EBUSY);
}
+ return tx;
+}
- spin_unlock(&dev->pending_lock);
- spin_unlock_irqrestore(&qp->s_lock, flags);
+static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
+ struct qib_qp *qp)
+{
+ struct qib_verbs_txreq *tx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ /* assume the list non empty */
+ if (likely(!list_empty(&dev->txreq_free))) {
+ struct list_head *l = dev->txreq_free.next;
+ list_del(l);
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ } else {
+ /* call slow path to get the extra lock */
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+ tx = __get_txreq(dev, qp);
+ }
return tx;
}
@@ -948,7 +989,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
if (tx->mr) {
- atomic_dec(&tx->mr->refcount);
+ qib_put_mr(tx->mr);
tx->mr = NULL;
}
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1112,9 +1153,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
goto bail;
}
- tx = get_txreq(dev, qp, &ret);
- if (!tx)
- goto bail;
+ tx = get_txreq(dev, qp);
+ if (IS_ERR(tx))
+ goto bail_tx;
control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
be16_to_cpu(hdr->lrh[0]) >> 12);
@@ -1185,6 +1226,9 @@ unaligned:
ibp->n_unaligned++;
bail:
return ret;
+bail_tx:
+ ret = PTR_ERR(tx);
+ goto bail;
}
/*
@@ -1303,7 +1347,7 @@ done:
}
qib_sendbuf_done(dd, pbufn);
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_wqe) {
@@ -1812,6 +1856,23 @@ bail:
return ret;
}
+struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
+{
+ struct ib_ah_attr attr;
+ struct ib_ah *ah = ERR_PTR(-EINVAL);
+ struct qib_qp *qp0;
+
+ memset(&attr, 0, sizeof attr);
+ attr.dlid = dlid;
+ attr.port_num = ppd_from_ibp(ibp)->port;
+ rcu_read_lock();
+ qp0 = rcu_dereference(ibp->qp0);
+ if (qp0)
+ ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ rcu_read_unlock();
+ return ah;
+}
+
/**
* qib_destroy_ah - destroy an address handle
* @ibah: the AH to destroy
@@ -1974,6 +2035,8 @@ static void init_ibport(struct qib_pportdata *ppd)
ibp->z_excessive_buffer_overrun_errors =
cntrs.excessive_buffer_overrun_errors;
ibp->z_vl15_dropped = cntrs.vl15_dropped;
+ RCU_INIT_POINTER(ibp->qp0, NULL);
+ RCU_INIT_POINTER(ibp->qp1, NULL);
}
/**
@@ -1990,12 +2053,15 @@ int qib_register_ib_device(struct qib_devdata *dd)
int ret;
dev->qp_table_size = ib_qib_qp_table_size;
- dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table,
+ get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
+ dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table,
GFP_KERNEL);
if (!dev->qp_table) {
ret = -ENOMEM;
goto err_qpt;
}
+ for (i = 0; i < dev->qp_table_size; i++)
+ RCU_INIT_POINTER(dev->qp_table[i], NULL);
for (i = 0; i < dd->num_pports; i++)
init_ibport(ppd + i);
@@ -2022,13 +2088,15 @@ int qib_register_ib_device(struct qib_devdata *dd)
spin_lock_init(&dev->lk_table.lock);
dev->lk_table.max = 1 << ib_qib_lkey_table_size;
lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
- dev->lk_table.table = (struct qib_mregion **)
+ dev->lk_table.table = (struct qib_mregion __rcu **)
__get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
if (dev->lk_table.table == NULL) {
ret = -ENOMEM;
goto err_lk;
}
- memset(dev->lk_table.table, 0, lk_tab_size);
+ RCU_INIT_POINTER(dev->dma_mr, NULL);
+ for (i = 0; i < dev->lk_table.max; i++)
+ RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
INIT_LIST_HEAD(&dev->pending_mmaps);
spin_lock_init(&dev->pending_lock);
dev->mmap_offset = PAGE_SIZE;
@@ -2158,7 +2226,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->dma_ops = &qib_dma_mapping_ops;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
- QIB_IDSTR " %s", init_utsname()->nodename);
+ "Intel Infiniband HCA %s", init_utsname()->nodename);
ret = ib_register_device(ibdev, qib_create_port_files);
if (ret)
@@ -2168,7 +2236,8 @@ int qib_register_ib_device(struct qib_devdata *dd)
if (ret)
goto err_agents;
- if (qib_verbs_register_sysfs(dd))
+ ret = qib_verbs_register_sysfs(dd);
+ if (ret)
goto err_class;
goto bail;
@@ -2251,3 +2320,17 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
get_order(lk_tab_size));
kfree(dev->qp_table);
}
+
+/*
+ * This must be called with s_lock held.
+ */
+void qib_schedule_send(struct qib_qp *qp)
+{
+ if (qib_send_ok(qp)) {
+ struct qib_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ queue_work(ppd->qib_wq, &qp->s_work);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index bd57c127322..bfc8948fdd3 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -41,6 +41,8 @@
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
@@ -148,14 +150,14 @@ struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
-} __attribute__ ((packed));
+} __packed;
struct ib_atomic_eth {
__be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
-} __attribute__ ((packed));
+} __packed;
struct qib_other_headers {
__be32 bth[3];
@@ -176,7 +178,7 @@ struct qib_other_headers {
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
-} __attribute__ ((packed));
+} __packed;
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -193,12 +195,12 @@ struct qib_ib_header {
} l;
struct qib_other_headers oth;
} u;
-} __attribute__ ((packed));
+} __packed;
struct qib_pio_header {
__le32 pbc[2];
struct qib_ib_header hdr;
-} __attribute__ ((packed));
+} __packed;
/*
* There is one struct qib_mcast for each multicast GID.
@@ -266,7 +268,8 @@ struct qib_cq_wc {
*/
struct qib_cq {
struct ib_cq ibcq;
- struct work_struct comptask;
+ struct kthread_work comptask;
+ struct qib_devdata *dd;
spinlock_t lock; /* protect changes in this struct */
u8 notify;
u8 triggered;
@@ -301,6 +304,10 @@ struct qib_mregion {
int access_flags;
u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */
+ u8 page_shift; /* 0 - non unform/non powerof2 sizes */
+ u8 lkey_published; /* in global table */
+ struct completion comp; /* complete when refcount goes to zero */
+ struct rcu_head list;
atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */
};
@@ -366,9 +373,10 @@ struct qib_rwq {
struct qib_rq {
struct qib_rwq *wq;
- spinlock_t lock; /* protect changes in this struct */
u32 size; /* size of RWQE array */
u8 max_sge;
+ spinlock_t lock /* protect changes in this struct */
+ ____cacheline_aligned_in_smp;
};
struct qib_srq {
@@ -411,32 +419,75 @@ struct qib_ack_entry {
*/
struct qib_qp {
struct ib_qp ibqp;
- struct qib_qp *next; /* link list for QPN hash table */
- struct qib_qp *timer_next; /* link list for qib_ib_timer() */
- struct list_head iowait; /* link for wait PIO buf */
- struct list_head rspwait; /* link for waititing to respond */
+ /* read mostly fields above and below */
struct ib_ah_attr remote_ah_attr;
struct ib_ah_attr alt_ah_attr;
- struct qib_ib_header s_hdr; /* next packet header to send */
- atomic_t refcount;
- wait_queue_head_t wait;
- wait_queue_head_t wait_dma;
- struct timer_list s_timer;
- struct work_struct s_work;
+ struct qib_qp __rcu *next; /* link list for QPN hash table */
+ struct qib_swqe *s_wq; /* send work queue */
struct qib_mmap_info *ip;
+ struct qib_ib_header *s_hdr; /* next packet header to send */
+ unsigned long timeout_jiffies; /* computed from timeout */
+
+ enum ib_mtu path_mtu;
+ u32 remote_qpn;
+ u32 pmtu; /* decoded from path_mtu */
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
+
+ u8 state; /* QP state */
+ u8 qp_access_flags;
+ u8 alt_timeout; /* Alternate path timeout for this QP */
+ u8 timeout; /* Timeout for this QP */
+ u8 s_srate;
+ u8 s_mig_state;
+ u8 port_num;
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_draining;
+
+ /* start of read/write fields */
+
+ atomic_t refcount ____cacheline_aligned_in_smp;
+ wait_queue_head_t wait;
+
+
+ struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
+ ____cacheline_aligned_in_smp;
+ struct qib_sge_state s_rdma_read_sge;
+
+ spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
+ unsigned long r_aflags;
+ u64 r_wr_id; /* ID for current receive WQE */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
+
+ u8 r_state; /* opcode of last packet received */
+ u8 r_flags;
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+
+ struct list_head rspwait; /* link for waititing to respond */
+
+ struct qib_sge_state r_sge; /* current receive data */
+ struct qib_rq r_rq; /* receive work queue */
+
+ spinlock_t s_lock ____cacheline_aligned_in_smp;
struct qib_sge_state *s_cur_sge;
+ u32 s_flags;
struct qib_verbs_txreq *s_tx;
- struct qib_mregion *s_rdma_mr;
+ struct qib_swqe *s_wqe;
struct qib_sge_state s_sge; /* current send request data */
- struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
- struct qib_sge_state s_ack_rdma_sge;
- struct qib_sge_state s_rdma_read_sge;
- struct qib_sge_state r_sge; /* current receive data */
- spinlock_t r_lock; /* used for APM */
- spinlock_t s_lock;
+ struct qib_mregion *s_rdma_mr;
atomic_t s_dma_busy;
- unsigned processor_id; /* Processor ID QP is bound to */
- u32 s_flags;
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
@@ -447,58 +498,34 @@ struct qib_qp {
u32 s_psn; /* current packet sequence number */
u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
- u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
- u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
- u64 r_wr_id; /* ID for current receive WQE */
- unsigned long r_aflags;
- u32 r_len; /* total length of r_sge */
- u32 r_rcv_len; /* receive data len processed */
- u32 r_psn; /* expected rcv packet sequence number */
- u32 r_msn; /* message sequence number */
+ u32 s_head; /* new entries added here */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_acked; /* last un-ACK'ed entry */
+ u32 s_last; /* last completed entry */
+ u32 s_ssn; /* SSN of tail entry */
+ u32 s_lsn; /* limit sequence number (credit) */
u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u16 s_rdma_ack_cnt;
- u8 state; /* QP state */
u8 s_state; /* opcode of last packet sent */
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
- u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */
- u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 r_flags;
- u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
- u8 r_head_ack_queue; /* index into s_ack_queue[] */
- u8 qp_access_flags;
- u8 s_max_sge; /* size of s_wq->sg_list */
- u8 s_retry_cnt; /* number of times to retry */
- u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
- u8 s_pkey_index; /* PKEY index to use */
- u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
- u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
- u8 s_srate;
- u8 s_draining;
- u8 s_mig_state;
- u8 timeout; /* Timeout for this QP */
- u8 alt_timeout; /* Alternate path timeout for this QP */
- u8 port_num;
- enum ib_mtu path_mtu;
- u32 remote_qpn;
- u32 qkey; /* QKEY for this QP (for UD or RD) */
- u32 s_size; /* send work queue size */
- u32 s_head; /* new entries added here */
- u32 s_tail; /* next entry to process */
- u32 s_cur; /* current work queue entry */
- u32 s_acked; /* last un-ACK'ed entry */
- u32 s_last; /* last completed entry */
- u32 s_ssn; /* SSN of tail entry */
- u32 s_lsn; /* limit sequence number (credit) */
- struct qib_swqe *s_wq; /* send work queue */
- struct qib_swqe *s_wqe;
- struct qib_rq r_rq; /* receive work queue */
- struct qib_sge r_sg_list[0]; /* verified SGEs */
+
+ struct qib_sge_state s_ack_rdma_sge;
+ struct timer_list s_timer;
+ struct list_head iowait; /* link for wait PIO buf */
+
+ struct work_struct s_work;
+
+ wait_queue_head_t wait_dma;
+
+ struct qib_sge r_sg_list[0] /* verified SGEs */
+ ____cacheline_aligned_in_smp;
};
/*
@@ -625,7 +652,7 @@ struct qib_lkey_table {
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
u32 max; /* size of the table */
- struct qib_mregion **table;
+ struct qib_mregion __rcu **table;
};
struct qib_opcode_stats {
@@ -633,9 +660,20 @@ struct qib_opcode_stats {
u64 n_bytes; /* total number of bytes */
};
+struct qib_opcode_stats_perctx {
+ struct qib_opcode_stats stats[128];
+};
+
+struct qib_pma_counters {
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+};
+
struct qib_ibport {
- struct qib_qp *qp0;
- struct qib_qp *qp1;
+ struct qib_qp __rcu *qp0;
+ struct qib_qp __rcu *qp1;
struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
struct qib_ah *sm_ah;
struct qib_ah *smi_ah;
@@ -649,10 +687,11 @@ struct qib_ibport {
__be64 mkey;
__be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
u64 tid; /* TID for traps */
- u64 n_unicast_xmit; /* total unicast packets sent */
- u64 n_unicast_rcv; /* total unicast packets received */
- u64 n_multicast_xmit; /* total multicast packets sent */
- u64 n_multicast_rcv; /* total multicast packets received */
+ struct qib_pma_counters __percpu *pmastats;
+ u64 z_unicast_xmit; /* starting count for PMA */
+ u64 z_unicast_rcv; /* starting count for PMA */
+ u64 z_multicast_xmit; /* starting count for PMA */
+ u64 z_multicast_rcv; /* starting count for PMA */
u64 z_symbol_error_counter; /* starting count for PMA */
u64 z_link_error_recovery_counter; /* starting count for PMA */
u64 z_link_downed_counter; /* starting count for PMA */
@@ -699,15 +738,15 @@ struct qib_ibport {
u8 vl_high_limit;
u8 sl_to_vl[16];
- struct qib_opcode_stats opstats[128];
};
+
struct qib_ibdev {
struct ib_device ibdev;
struct list_head pending_mmaps;
spinlock_t mmap_offset_lock; /* protect mmap_offset */
u32 mmap_offset;
- struct qib_mregion *dma_mr;
+ struct qib_mregion __rcu *dma_mr;
/* QP numbers are shared by all IB ports */
struct qib_qpn_table qpn_table;
@@ -718,12 +757,13 @@ struct qib_ibdev {
struct list_head memwait; /* list for wait kernel memory */
struct list_head txreq_free;
struct timer_list mem_timer;
- struct qib_qp **qp_table;
+ struct qib_qp __rcu **qp_table;
struct qib_pio_header *pio_hdrs;
dma_addr_t pio_hdrs_phys;
/* list of QPs waiting for RNR timer */
spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
- unsigned qp_table_size; /* size of the hash table */
+ u32 qp_table_size; /* size of the hash table */
+ u32 qp_rnd; /* random bytes for hash */
spinlock_t qpt_lock;
u32 n_piowait;
@@ -741,6 +781,10 @@ struct qib_ibdev {
spinlock_t n_srqs_lock;
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
spinlock_t n_mcast_grps_lock;
+#ifdef CONFIG_DEBUG_FS
+ /* per HCA debugfs */
+ struct dentry *qib_ibdev_dbg;
+#endif
};
struct qib_verbs_counters {
@@ -805,22 +849,10 @@ static inline int qib_send_ok(struct qib_qp *qp)
!(qp->s_flags & QIB_S_ANY_WAIT_SEND));
}
-extern struct workqueue_struct *qib_wq;
-extern struct workqueue_struct *qib_cq_wq;
-
/*
* This must be called with s_lock held.
*/
-static inline void qib_schedule_send(struct qib_qp *qp)
-{
- if (qib_send_ok(qp)) {
- if (qp->processor_id == smp_processor_id())
- queue_work(qib_wq, &qp->s_work);
- else
- queue_work_on(qp->processor_id,
- qib_wq, &qp->s_work);
- }
-}
+void qib_schedule_send(struct qib_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
{
@@ -893,6 +925,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
void qib_free_qpn_table(struct qib_qpn_table *qpt);
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter;
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev);
+
+int qib_qp_iter_next(struct qib_qp_iter *iter);
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
+
+#endif
+
void qib_get_credit(struct qib_qp *qp, u32 aeth);
unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
@@ -917,6 +961,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
+
void qib_rc_rnr_retry(unsigned long arg);
void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
@@ -928,9 +974,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
+void qib_free_lkey(struct qib_mregion *mr);
int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_sge *isge, struct ib_sge *sge, int acc);
@@ -953,6 +999,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int qib_destroy_srq(struct ib_srq *ibsrq);
+int qib_cq_init(struct qib_devdata *dd);
+
+void qib_cq_exit(struct qib_devdata *dd);
+
void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
@@ -998,6 +1048,29 @@ int qib_unmap_fmr(struct list_head *fmr_list);
int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+static inline void qib_get_mr(struct qib_mregion *mr)
+{
+ atomic_inc(&mr->refcount);
+}
+
+void mr_rcu_callback(struct rcu_head *list);
+
+static inline void qib_put_mr(struct qib_mregion *mr)
+{
+ if (unlikely(atomic_dec_and_test(&mr->refcount)))
+ call_rcu(&mr->list, mr_rcu_callback);
+}
+
+static inline void qib_put_ss(struct qib_sge_state *ss)
+{
+ while (ss->num_sge) {
+ qib_put_mr(ss->sge.mr);
+ if (--ss->num_sge)
+ ss->sge = *ss->sg_list++;
+ }
+}
+
+
void qib_release_mmap_info(struct kref *ref);
struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
index 561b8bca406..1d7281c5a02 100644
--- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -102,10 +103,10 @@ int qib_enable_wc(struct qib_devdata *dd)
u64 atmp;
atmp = pioaddr & ~(piolen - 1);
if (atmp < addr || (atmp + piolen) > (addr + len)) {
- qib_dev_err(dd, "No way to align address/size "
- "(%llx/%llx), no WC mtrr\n",
- (unsigned long long) atmp,
- (unsigned long long) piolen << 1);
+ qib_dev_err(dd,
+ "No way to align address/size (%llx/%llx), no WC mtrr\n",
+ (unsigned long long) atmp,
+ (unsigned long long) piolen << 1);
ret = -ENODEV;
} else {
pioaddr = atmp;
@@ -120,8 +121,7 @@ int qib_enable_wc(struct qib_devdata *dd)
if (cookie < 0) {
{
qib_devinfo(dd->pcidev,
- "mtrr_add() WC for PIO bufs "
- "failed (%d)\n",
+ "mtrr_add() WC for PIO bufs failed (%d)\n",
cookie);
ret = -EINVAL;
}