aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/ipath
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig8
-rw-r--r--drivers/infiniband/hw/ipath/Makefile10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h178
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c196
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c184
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c179
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c2042
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c749
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c1490
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c290
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c899
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c1264
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c541
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c1141
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h738
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c366
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c553
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c70
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c78
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c561
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c1345
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h255
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c624
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c818
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c131
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c135
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c586
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c186
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c456
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c116
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c875
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h (renamed from drivers/infiniband/hw/ipath/ipath_layer.h)51
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c1013
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h249
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c42
39 files changed, 11907 insertions, 6609 deletions
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 574a678e7fd..1d9bb115cbf 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,9 +1,11 @@
config INFINIBAND_IPATH
- tristate "QLogic InfiniPath Driver"
- depends on PCI_MSI && 64BIT && INFINIBAND
+ tristate "QLogic HTX HCA support"
+ depends on 64BIT && NET && HT_IRQ
---help---
- This is a driver for QLogic InfiniPath host channel adapters,
+ This is a driver for the obsolete QLogic Hyper-Transport
+ IB host channel adapter (model QHT7140),
including InfiniBand verbs support. This driver allows these
devices to be used with both kernel upper level protocols such
as IP-over-InfiniBand as well as with userspace applications
(in conjunction with InfiniBand userspace access).
+ For QLogic PCIe QLE based cards, use the QIB driver instead.
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index 5e29cb0095e..4496f2820c9 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,4 +1,4 @@
-EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
+ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-DIPATH_KERN_TYPE=0
obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
@@ -6,30 +6,32 @@ obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
ib_ipath-y := \
ipath_cq.o \
ipath_diag.o \
+ ipath_dma.o \
ipath_driver.o \
ipath_eeprom.o \
ipath_file_ops.o \
ipath_fs.o \
- ipath_iba6110.o \
- ipath_iba6120.o \
ipath_init_chip.o \
ipath_intr.o \
ipath_keys.o \
- ipath_layer.o \
ipath_mad.o \
ipath_mmap.o \
ipath_mr.o \
ipath_qp.o \
ipath_rc.o \
ipath_ruc.o \
+ ipath_sdma.o \
ipath_srq.o \
ipath_stats.o \
ipath_sysfs.o \
ipath_uc.o \
ipath_ud.o \
ipath_user_pages.o \
+ ipath_user_sdma.o \
ipath_verbs_mcast.o \
ipath_verbs.o
+ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
+
ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index f577905e3ac..28cfe97cf1e 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -75,9 +75,23 @@
#define IPATH_IB_LINKDOWN 0
#define IPATH_IB_LINKARM 1
#define IPATH_IB_LINKACTIVE 2
-#define IPATH_IB_LINKINIT 3
+#define IPATH_IB_LINKDOWN_ONLY 3
#define IPATH_IB_LINKDOWN_SLEEP 4
#define IPATH_IB_LINKDOWN_DISABLE 5
+#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
+#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
+#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */
+
+/*
+ * These 3 values (SDR and DDR may be ORed for auto-speed
+ * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
+ * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
+ * are also the the possible values for ipath_link_speed_enabled and active
+ * The values were chosen to match values used within the IB spec.
+ */
+#define IPATH_IB_SDR 1
+#define IPATH_IB_DDR 2
/*
* stats maintained by the driver. For now, at least, this is global
@@ -98,8 +112,7 @@ struct infinipath_stats {
__u64 sps_hwerrs;
/* number of times IB link changed state unexpectedly */
__u64 sps_iblink;
- /* kernel receive interrupts that didn't read intstat */
- __u64 sps_fastrcvint;
+ __u64 sps_unused; /* was fastrcvint, no longer implemented */
/* number of kernel (port0) packets received */
__u64 sps_port0pkts;
/* number of "ethernet" packets sent by driver */
@@ -141,8 +154,9 @@ struct infinipath_stats {
* packets if ipath not configured, etc.)
*/
__u64 sps_krdrops;
+ __u64 sps_txeparity; /* PIO buffer parity error, recovered */
/* pad for future growth */
- __u64 __sps_pad[46];
+ __u64 __sps_pad[45];
};
/*
@@ -185,6 +199,11 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
+#define IPATH_RUNTIME_MASTER 0x10
+#define IPATH_RUNTIME_NODMA_RTAIL 0x80
+#define IPATH_RUNTIME_SDMA 0x200
+#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
+#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
/*
* This structure is returned by ipath_userinit() immediately after
@@ -202,7 +221,8 @@ struct ipath_base_info {
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
- __u32 spi_port;
+ __u16 spi_port;
+ __u16 spi_subport;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
@@ -218,7 +238,7 @@ struct ipath_base_info {
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
- /* size of a single receive header queue entry. */
+ /* size of a single receive header queue entry in words. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
@@ -310,6 +330,18 @@ struct ipath_base_info {
__u32 spi_filler_for_align;
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
+
+ /* shared memory pages for subports if port is shared */
+ __u64 spi_subport_uregbase;
+ __u64 spi_subport_rcvegrbuf;
+ __u64 spi_subport_rcvhdr_base;
+
+ /* shared memory page for hardware port if it is shared */
+ __u64 spi_port_uregbase;
+ __u64 spi_port_rcvegrbuf;
+ __u64 spi_port_rcvhdr_base;
+ __u64 spi_port_rcvhdr_tailaddr;
+
} __attribute__ ((aligned(8)));
@@ -328,12 +360,12 @@ struct ipath_base_info {
/*
* Minor version differences are always compatible
- * a within a major version, however if if user software is larger
+ * a within a major version, however if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference
+ * cares, or it must abort after initialization reports the difference.
*/
-#define IPATH_USER_SWMINOR 2
+#define IPATH_USER_SWMINOR 6
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
@@ -379,7 +411,16 @@ struct ipath_user_info {
*/
__u32 spu_rcvhdrsize;
- __u64 spu_unused; /* kept for compatible layout */
+ /*
+ * If two or more processes wish to share a port, each process
+ * must set the spu_subport_cnt and spu_subport_id to the same
+ * values. The only restriction on the spu_subport_id is that
+ * it be unique for a given node.
+ */
+ __u16 spu_subport_cnt;
+ __u16 spu_subport_id;
+
+ __u32 spu_unused; /* kept for compatible layout */
/*
* address of struct base_info to write to
@@ -392,19 +433,37 @@ struct ipath_user_info {
#define IPATH_CMD_MIN 16
-#define IPATH_CMD_USER_INIT 16 /* set up userspace */
+#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
+#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
+#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 24 /* set up userspace */
+#define IPATH_CMD_UNUSED_1 25
+#define IPATH_CMD_UNUSED_2 26
+#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
+#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
+#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */
+/* 30 is unused */
+#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */
+#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */
-#define IPATH_CMD_MAX 21
+/*
+ * Poll types
+ */
+#define IPATH_POLL_TYPE_URGENT 0x01
+#define IPATH_POLL_TYPE_OVERFLOW 0x02
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
- __u32 port; /* port on unit assigned to caller */
+ __u16 port; /* port on unit assigned to caller */
+ __u16 subport; /* subport on unit assigned to caller */
+ __u16 num_ports; /* number of ports available on unit */
+ __u16 num_subports; /* number of subports opened on port */
};
struct ipath_tid_info {
@@ -428,13 +487,30 @@ struct ipath_cmd {
union {
struct ipath_tid_info tid_info;
struct ipath_user_info user_info;
+
+ /*
+ * address in userspace where we should put the sdma
+ * inflight counter
+ */
+ __u64 sdma_inflight;
+ /*
+ * address in userspace where we should put the sdma
+ * completion counter
+ */
+ __u64 sdma_complete;
/* address in userspace of struct ipath_port_info to
write result to */
__u64 port_info;
/* enable/disable receipt of packets */
__u32 recv_ctrl;
+ /* enable/disable armlaunch errors (non-zero to enable) */
+ __u32 armlaunch_ctrl;
/* partition key to set */
__u16 part_key;
+ /* user address of __u32 bitmask of active slaves */
+ __u64 slave_mask_addr;
+ /* type of polling we want */
+ __u16 poll_type;
} cmd;
};
@@ -463,13 +539,30 @@ struct __ipath_sendpkt {
struct ipath_iovec sps_iov[4];
};
-/* Passed into diag data special file's ->write method. */
+/*
+ * diagnostics can send a packet by "writing" one of the following
+ * two structs to diag data special file
+ * The first is the legacy version for backward compatibility
+ */
struct ipath_diag_pkt {
__u32 unit;
__u64 data;
__u32 len;
};
+/* The second diag_pkt struct is the expanded version that allows
+ * more control over the packet, specifically, by allowing a custom
+ * pbc (+ static rate) qword, so that special modes and deliberate
+ * changes to CRCs can be used. The elements were also re-ordered
+ * for better alignment and to avoid padding issues.
+ */
+struct ipath_diag_xpkt {
+ __u64 data;
+ __u64 pbc_wd;
+ __u32 unit;
+ __u32 len;
+};
+
/*
* Data layout in I2C flash (for GUID, etc.)
* All fields are little-endian binary unless otherwise stated
@@ -514,7 +607,7 @@ struct ipath_flash {
struct infinipath_counters {
__u64 LBIntCnt;
__u64 LBFlowStallCnt;
- __u64 Reserved1;
+ __u64 TxSDmaDescCnt; /* was Reserved1 */
__u64 TxUnsupVLErrCnt;
__u64 TxDataPktCnt;
__u64 TxFlowPktCnt;
@@ -550,12 +643,26 @@ struct infinipath_counters {
__u64 RxP6HdrEgrOvflCnt;
__u64 RxP7HdrEgrOvflCnt;
__u64 RxP8HdrEgrOvflCnt;
- __u64 Reserved6;
- __u64 Reserved7;
+ __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
+ __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
+ __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
__u64 IBStatusChangeCnt;
__u64 IBLinkErrRecoveryCnt;
__u64 IBLinkDownedCnt;
__u64 IBSymbolErrCnt;
+ /* The following are new for IBA7220 */
+ __u64 RxVL15DroppedPktCnt;
+ __u64 RxOtherLocalPhyErrCnt;
+ __u64 PcieRetryBufDiagQwordCnt;
+ __u64 ExcessBufferOvflCnt;
+ __u64 LocalLinkIntegrityErrCnt;
+ __u64 RxVlErrCnt;
+ __u64 RxDlidFltrCnt;
};
/*
@@ -570,8 +677,12 @@ struct infinipath_counters {
#define INFINIPATH_RHF_LENGTH_SHIFT 0
#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0x7FF
+#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
+#define INFINIPATH_RHF_SEQ_MASK 0xF
+#define INFINIPATH_RHF_SEQ_SHIFT 0
+#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
+#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
#define INFINIPATH_RHF_H_ICRCERR 0x80000000
#define INFINIPATH_RHF_H_VCRCERR 0x40000000
#define INFINIPATH_RHF_H_PARITYERR 0x20000000
@@ -581,6 +692,8 @@ struct infinipath_counters {
#define INFINIPATH_RHF_H_TIDERR 0x02000000
#define INFINIPATH_RHF_H_MKERR 0x01000000
#define INFINIPATH_RHF_H_IBERR 0x00800000
+#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000
+#define INFINIPATH_RHF_L_USE_EGR 0x80000000
#define INFINIPATH_RHF_L_SWA 0x00008000
#define INFINIPATH_RHF_L_SWB 0x00004000
@@ -596,10 +709,15 @@ struct infinipath_counters {
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT 4
/* SendPIO per-buffer control */
#define INFINIPATH_SP_TEST 0x40
#define INFINIPATH_SP_TESTEBP 0x20
+#define INFINIPATH_SP_TRIGGER_SHIFT 15
/* SendPIOAvail bits */
#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
@@ -610,7 +728,7 @@ struct ipath_header {
/*
* Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
* 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
- * Port 3, TID 11, offset 14.
+ * Port 4, TID 11, offset 13.
*/
__le32 ver_port_tid_offset;
__le16 chksum;
@@ -666,6 +784,7 @@ struct ether_header {
#define IPATH_MSN_MASK 0xFFFFFF
#define IPATH_QPN_MASK 0xFFFFFF
#define IPATH_MULTICAST_LID_BASE 0xC000
+#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
#define IPATH_MULTICAST_QPN 0xFFFFFF
/* Receive Header Queue: receive type (from infinipath) */
@@ -685,7 +804,7 @@ struct ether_header {
*/
static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
{
- return __le32_to_cpu(rbuf[1]);
+ return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
}
static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
@@ -706,6 +825,23 @@ static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
& INFINIPATH_RHF_EGRINDEX_MASK;
}
+static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
+ & INFINIPATH_RHF_SEQ_MASK;
+}
+
+static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
+ & INFINIPATH_RHF_HDRQ_OFFSET_MASK;
+}
+
+static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
+{
+ return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
+}
+
static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
{
return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 049221bc590..0416c6c0e12 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,6 +32,7 @@
*/
#include <linux/err.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
@@ -46,7 +47,7 @@
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
- struct ipath_cq_wc *wc = cq->queue;
+ struct ipath_cq_wc *wc;
unsigned long flags;
u32 head;
u32 next;
@@ -57,6 +58,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
* Note that the head pointer might be writable by user processes.
* Take care to verify it is a sane value.
*/
+ wc = cq->queue;
head = wc->head;
if (head >= (unsigned) cq->ibcq.cqe) {
head = cq->ibcq.cqe;
@@ -75,7 +77,25 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
- wc->queue[head] = *entry;
+ if (cq->ip) {
+ wc->uqueue[head].wr_id = entry->wr_id;
+ wc->uqueue[head].status = entry->status;
+ wc->uqueue[head].opcode = entry->opcode;
+ wc->uqueue[head].vendor_err = entry->vendor_err;
+ wc->uqueue[head].byte_len = entry->byte_len;
+ wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
+ wc->uqueue[head].qp_num = entry->qp->qp_num;
+ wc->uqueue[head].src_qp = entry->src_qp;
+ wc->uqueue[head].wc_flags = entry->wc_flags;
+ wc->uqueue[head].pkey_index = entry->pkey_index;
+ wc->uqueue[head].slid = entry->slid;
+ wc->uqueue[head].sl = entry->sl;
+ wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
+ wc->uqueue[head].port_num = entry->port_num;
+ /* Make sure entry is written before the head index. */
+ smp_wmb();
+ } else
+ wc->kqueue[head] = *entry;
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -109,24 +129,38 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
- struct ipath_cq_wc *wc = cq->queue;
+ struct ipath_cq_wc *wc;
unsigned long flags;
int npolled;
+ u32 tail;
+
+ /* The kernel can only poll a kernel completion queue */
+ if (cq->ip) {
+ npolled = -EINVAL;
+ goto bail;
+ }
spin_lock_irqsave(&cq->lock, flags);
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
- if (wc->tail == wc->head)
+ if (tail == wc->head)
break;
- *entry = wc->queue[wc->tail];
- if (wc->tail >= cq->ibcq.cqe)
- wc->tail = 0;
+ /* The kernel doesn't need a RMB since it has the lock. */
+ *entry = wc->kqueue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
else
- wc->tail++;
+ tail++;
}
+ wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
+bail:
return npolled;
}
@@ -163,7 +197,7 @@ static void send_complete(unsigned long data)
*
* Called by ib_create_cq() in the generic verbs code.
*/
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
+struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -171,17 +205,13 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
struct ipath_cq *cq;
struct ipath_cq_wc *wc;
struct ib_cq *ret;
+ u32 sz;
if (entries < 1 || entries > ib_ipath_max_cqes) {
ret = ERR_PTR(-EINVAL);
goto done;
}
- if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
- ret = ERR_PTR(-ENOMEM);
- goto done;
- }
-
/* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
@@ -196,7 +226,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
- wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
+ sz = sizeof(*wc);
+ if (udata && udata->outlen >= sizeof(__u64))
+ sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
+ else
+ sz += sizeof(struct ib_wc) * (entries + 1);
+ wc = vmalloc_user(sz);
if (!wc) {
ret = ERR_PTR(-ENOMEM);
goto bail_cq;
@@ -207,36 +242,39 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- struct ipath_mmap_info *ip;
- __u64 offset = (__u64) wc;
int err;
- err = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
+ cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
+ if (!cq->ip) {
+ ret = ERR_PTR(-ENOMEM);
goto bail_wc;
}
- /* Allocate info for ipath_mmap(). */
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_wc;
+ err = ib_copy_to_udata(udata, &cq->ip->offset,
+ sizeof(cq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
}
- cq->ip = ip;
- ip->context = context;
- ip->obj = wc;
- kref_init(&ip->ref);
- ip->mmap_cnt = 0;
- ip->size = PAGE_ALIGN(sizeof(*wc) +
- sizeof(struct ib_wc) * entries);
- spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
- spin_unlock_irq(&dev->pending_lock);
} else
cq->ip = NULL;
+ spin_lock(&dev->n_cqs_lock);
+ if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+ spin_unlock(&dev->n_cqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_cqs_allocated++;
+ spin_unlock(&dev->n_cqs_lock);
+
+ if (cq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
@@ -253,15 +291,14 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
ret = &cq->ibcq;
- dev->n_cqs_allocated++;
goto done;
+bail_ip:
+ kfree(cq->ip);
bail_wc:
vfree(wc);
-
bail_cq:
kfree(cq);
-
done:
return ret;
}
@@ -280,7 +317,9 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
+ spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
+ spin_unlock(&dev->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, ipath_release_mmap_info);
else
@@ -293,17 +332,18 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
/**
* ipath_req_notify_cq - change the notification type for a completion queue
* @ibcq: the completion queue
- * @notify: the type of notification to request
+ * @notify_flags: the type of notification to request
*
* Returns 0 for success.
*
* This may be called from interrupt context. Also called by
* ib_req_notify_cq() in the generic verbs code.
*/
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
struct ipath_cq *cq = to_icq(ibcq);
unsigned long flags;
+ int ret = 0;
spin_lock_irqsave(&cq->lock, flags);
/*
@@ -311,18 +351,31 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
- cq->notify = notify;
+ cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
+
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->queue->head != cq->queue->tail)
+ ret = 1;
+
spin_unlock_irqrestore(&cq->lock, flags);
- return 0;
+
+ return ret;
}
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
- struct ipath_cq_wc *old_wc = cq->queue;
+ struct ipath_cq_wc *old_wc;
struct ipath_cq_wc *wc;
u32 head, tail, n;
int ret;
+ u32 sz;
if (cqe < 1 || cqe > ib_ipath_max_cqes) {
ret = -EINVAL;
@@ -332,22 +385,24 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
- wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
+ sz = sizeof(*wc);
+ if (udata && udata->outlen >= sizeof(__u64))
+ sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
+ else
+ sz += sizeof(struct ib_wc) * (cqe + 1);
+ wc = vmalloc_user(sz);
if (!wc) {
ret = -ENOMEM;
goto bail;
}
- /*
- * Return the address of the WC as the offset to mmap.
- * See ipath_mmap() for details.
- */
+ /* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
- __u64 offset = (__u64) wc;
+ __u64 offset = 0;
ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
if (ret)
- goto bail;
+ goto bail_free;
}
spin_lock_irq(&cq->lock);
@@ -355,6 +410,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
* Make sure head and tail are sane since they
* might be user writable.
*/
+ old_wc = cq->queue;
head = old_wc->head;
if (head > (u32) cq->ibcq.cqe)
head = (u32) cq->ibcq.cqe;
@@ -366,13 +422,14 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
else
n = head - tail;
if (unlikely((u32)cqe < n)) {
- spin_unlock_irq(&cq->lock);
- vfree(wc);
- ret = -EOVERFLOW;
- goto bail;
+ ret = -EINVAL;
+ goto bail_unlock;
}
for (n = 0; tail != head; n++) {
- wc->queue[n] = old_wc->queue[tail];
+ if (cq->ip)
+ wc->uqueue[n] = old_wc->uqueue[tail];
+ else
+ wc->kqueue[n] = old_wc->kqueue[tail];
if (tail == (u32) cq->ibcq.cqe)
tail = 0;
else
@@ -390,17 +447,32 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_mmap_info *ip = cq->ip;
- ip->obj = wc;
- ip->size = PAGE_ALIGN(sizeof(*wc) +
- sizeof(struct ib_wc) * cqe);
+ ipath_update_mmap_info(dev, ip, sz, wc);
+
+ /*
+ * Return the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
+
spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
ret = 0;
+ goto bail;
+bail_unlock:
+ spin_unlock_irq(&cq->lock);
+bail_free:
+ vfree(wc);
bail:
return ret;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8..65926cd3575 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -55,8 +55,9 @@
#define __IPATH_PKTDBG 0x80 /* print packet data */
/* print process startup (init)/exit messages */
#define __IPATH_PROCDBG 0x100
-/* print mmap/nopage stuff, not using VDBG any more */
+/* print mmap/fault stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x200
+#define __IPATH_ERRPKTDBG 0x400
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
@@ -65,6 +66,7 @@
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
+#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */
#else /* _IPATH_DEBUGGING */
@@ -80,7 +82,7 @@
#define __IPATH_VERBDBG 0x0 /* very verbose debug */
#define __IPATH_PKTDBG 0x0 /* print packet data */
#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */
-/* print mmap/nopage stuff, not using VDBG any more */
+/* print mmap/fault stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x0
#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
@@ -88,6 +90,7 @@
#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */
#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */
+#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */
#endif /* _IPATH_DEBUGGING */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 29958b6e021..45802e97332 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -44,6 +44,8 @@
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/export.h>
#include <asm/uaccess.h>
#include "ipath_kernel.h"
@@ -59,28 +61,65 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off);
-static struct file_operations diag_file_ops = {
+static const struct file_operations diag_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diag_write,
.read = ipath_diag_read,
.open = ipath_diag_open,
- .release = ipath_diag_release
+ .release = ipath_diag_release,
+ .llseek = default_llseek,
};
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off);
+
+static const struct file_operations diagpkt_file_ops = {
+ .owner = THIS_MODULE,
+ .write = ipath_diagpkt_write,
+ .llseek = noop_llseek,
+};
+
+static atomic_t diagpkt_count = ATOMIC_INIT(0);
+static struct cdev *diagpkt_cdev;
+static struct device *diagpkt_dev;
+
int ipath_diag_add(struct ipath_devdata *dd)
{
char name[16];
+ int ret = 0;
+
+ if (atomic_inc_return(&diagpkt_count) == 1) {
+ ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+ "ipath_diagpkt", &diagpkt_file_ops,
+ &diagpkt_cdev, &diagpkt_dev);
+
+ if (ret) {
+ ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
+ "device: %d", ret);
+ goto done;
+ }
+ }
snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
- return ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
- &diag_file_ops, &dd->diag_cdev,
- &dd->diag_class_dev);
+ ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
+ &diag_file_ops, &dd->diag_cdev,
+ &dd->diag_dev);
+ if (ret)
+ ipath_dev_err(dd, "Couldn't create %s device: %d",
+ name, ret);
+
+done:
+ return ret;
}
void ipath_diag_remove(struct ipath_devdata *dd)
{
- ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_class_dev);
+ if (atomic_dec_and_test(&diagpkt_count))
+ ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
+
+ ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
}
/**
@@ -261,7 +300,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
}
fp->private_data = dd;
- ipath_diag_inuse = 1;
+ ipath_diag_inuse = -2;
diag_set_link = 0;
ret = 0;
@@ -275,30 +314,6 @@ bail:
return ret;
}
-static ssize_t ipath_diagpkt_write(struct file *fp,
- const char __user *data,
- size_t count, loff_t *off);
-
-static struct file_operations diagpkt_file_ops = {
- .owner = THIS_MODULE,
- .write = ipath_diagpkt_write,
-};
-
-static struct cdev *diagpkt_cdev;
-static struct class_device *diagpkt_class_dev;
-
-int __init ipath_diagpkt_add(void)
-{
- return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
- "ipath_diagpkt", &diagpkt_file_ops,
- &diagpkt_cdev, &diagpkt_class_dev);
-}
-
-void __exit ipath_diagpkt_remove(void)
-{
- ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
-}
-
/**
* ipath_diagpkt_write - write an IB packet
* @fp: the diag data device file pointer
@@ -311,20 +326,32 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
- struct ipath_diag_pkt dp;
+ u32 plen, pbufn, maxlen_reserve;
+ struct ipath_diag_pkt odp;
+ struct ipath_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct ipath_devdata *dd;
ssize_t ret = 0;
u64 val;
+ u32 l_state, lt_state; /* LinkState, LinkTrainingState */
- if (count < sizeof(dp)) {
- ret = -EINVAL;
- goto bail;
- }
- if (copy_from_user(&dp, data, sizeof(dp))) {
- ret = -EFAULT;
+ if (count == sizeof(dp)) {
+ if (copy_from_user(&dp, data, sizeof(dp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ } else if (count == sizeof(odp)) {
+ if (copy_from_user(&odp, data, sizeof(odp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ dp.len = odp.len;
+ dp.unit = odp.unit;
+ dp.data = odp.data;
+ dp.pbc_wd = 0;
+ } else {
+ ret = -EINVAL;
goto bail;
}
@@ -334,7 +361,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- clen = dp.len >> 2;
+ plen = dp.len >> 2;
dd = ipath_lookup(dp.unit);
if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
@@ -360,25 +387,39 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -ENODEV;
goto bail;
}
- val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
- if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
- val != IPATH_IBSTATE_ACTIVE) {
+ /*
+ * Want to skip check for l_state if using custom PBC,
+ * because we might be trying to force an SM packet out.
+ * first-cut, skip _all_ state checking in that case.
+ */
+ val = ipath_ib_state(dd, dd->ipath_lastibcstat);
+ lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+ l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+ if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
+ (val != dd->ib_init && val != dd->ib_arm &&
+ val != dd->ib_active))) {
ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
dd->ipath_unit, (unsigned long long) val);
ret = -EINVAL;
goto bail;
}
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
-
- if ((plen + 4) > dd->ipath_ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
- plen - 4, dd->ipath_ibmaxlen);
+ dp.len, dd->ipath_ibmaxlen);
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
@@ -394,30 +435,38 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- piobuf = ipath_getpiobuf(dd, &pbufn);
+ plen >>= 2; /* in dwords */
+
+ piobuf = ipath_getpiobuf(dd, plen, &pbufn);
if (!piobuf) {
ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
dd->ipath_unit);
ret = -EBUSY;
goto bail;
}
-
- plen >>= 2; /* in dwords */
+ /* disarm it just to be extra sure */
+ ipath_disarm_piobufs(dd, pbufn, 1);
if (ipath_debug & __IPATH_PKTDBG)
ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
dd->ipath_unit, plen - 1, pbufn);
- /* we have to flush after the PBC for correctness on some cpus
- * or WC buffer can be written out of order */
- writeq(plen, piobuf);
- ipath_flush_wc();
- /* copy all by the trigger word, then flush, so it's written
+ if (dp.pbc_wd == 0)
+ dp.pbc_wd = plen;
+ writeq(dp.pbc_wd, piobuf);
+ /*
+ * Copy all by the trigger word, then flush, so it's written
* to chip before trigger word, then write trigger word, then
- * flush again, so packet is sent. */
- __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
- ipath_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ * flush again, so packet is sent.
+ */
+ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
+ ipath_flush_wc();
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
+ } else
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen);
+
ipath_flush_wc();
ret = sizeof(dp);
@@ -450,6 +499,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
+ else if (ipath_diag_inuse < 1 && (*off || count != 8))
+ ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit reads */
ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -459,6 +510,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
if (ret >= 0) {
*off += count;
ret = count;
+ if (ipath_diag_inuse == -2)
+ ipath_diag_inuse++;
}
return ret;
@@ -478,6 +531,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
+ else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
+ ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
+ ret = -EINVAL; /* before any other write allowed */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit writes */
ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -487,6 +543,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
if (ret >= 0) {
*off += count;
ret = count;
+ if (ipath_diag_inuse == -1)
+ ipath_diag_inuse = 1; /* all read/write OK now */
}
return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
new file mode 100644
index 00000000000..123a8c05353
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/gfp.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_verbs.h"
+
+#define BAD_DMA_ADDRESS ((u64) 0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 ipath_dma_map_single(struct ib_device *dev,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ return (u64) cpu_addr;
+}
+
+static void ipath_dma_unmap_single(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static u64 ipath_dma_map_page(struct ib_device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction)
+{
+ u64 addr;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ if (offset + size > PAGE_SIZE) {
+ addr = BAD_DMA_ADDRESS;
+ goto done;
+ }
+
+ addr = (u64) page_address(page);
+ if (addr)
+ addr += offset;
+ /* TODO: handle highmem pages */
+
+done:
+ return addr;
+}
+
+static void ipath_dma_unmap_page(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ u64 addr;
+ int i;
+ int ret = nents;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ for_each_sg(sgl, sg, nents, i) {
+ addr = (u64) page_address(sg_page(sg));
+ /* TODO: handle highmem pages */
+ if (!addr) {
+ ret = 0;
+ break;
+ }
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
+ }
+ return ret;
+}
+
+static void ipath_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static void ipath_sync_single_for_cpu(struct ib_device *dev,
+ u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void ipath_sync_single_for_device(struct ib_device *dev,
+ u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p)
+ addr = page_address(p);
+ if (dma_handle)
+ *dma_handle = (u64) addr;
+ return addr;
+}
+
+static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
+{
+ free_pages((unsigned long) cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
+ .mapping_error = ipath_mapping_error,
+ .map_single = ipath_dma_map_single,
+ .unmap_single = ipath_dma_unmap_single,
+ .map_page = ipath_dma_map_page,
+ .unmap_page = ipath_dma_unmap_page,
+ .map_sg = ipath_map_sg,
+ .unmap_sg = ipath_unmap_sg,
+ .sync_single_for_cpu = ipath_sync_single_for_cpu,
+ .sync_single_for_device = ipath_sync_single_for_device,
+ .alloc_coherent = ipath_dma_alloc_coherent,
+ .free_coherent = ipath_dma_free_coherent
+};
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 2108466c7e3..bd0caedafe9 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,16 +31,20 @@
* SOFTWARE.
*/
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/pci.h>
+#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
-#include "ipath_common.h"
static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -72,10 +76,27 @@ module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(debug, "mask for debug prints");
EXPORT_SYMBOL_GPL(ipath_debug);
+unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
+module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
+MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
+
+static unsigned ipath_hol_timeout_ms = 13000;
+module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+ "duration of user app suspension after link failure");
+
+unsigned ipath_linkrecovery = 1;
+module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
+
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
+MODULE_AUTHOR("QLogic <support@qlogic.com>");
MODULE_DESCRIPTION("QLogic InfiniPath driver");
+/*
+ * Table to translate the LINKTRAININGSTATE portion of
+ * IBCStatus to a human-readable form.
+ */
const char *ipath_ibcstatus_str[] = {
"Disabled",
"LinkUp",
@@ -90,33 +111,34 @@ const char *ipath_ibcstatus_str[] = {
"CfgWaitRmt",
"CfgIdle",
"RecovRetrain",
- "LState0xD", /* unused */
+ "CfgTxRevLane", /* unused before IBA7220 */
"RecovWaitRmt",
"RecovIdle",
+ /* below were added for IBA7220 */
+ "CfgEnhanced",
+ "CfgTest",
+ "CfgWaitRmtTest",
+ "CfgWaitCfgEnhanced",
+ "SendTS_T",
+ "SendTstIdles",
+ "RcvTS_T",
+ "SendTst_TS1s",
+ "LTState18", "LTState19", "LTState1A", "LTState1B",
+ "LTState1C", "LTState1D", "LTState1E", "LTState1F"
};
-/*
- * These variables are initialized in the chip-specific files
- * but are defined here.
- */
-u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-u64 ipath_gpio_sda, ipath_gpio_scl;
-u64 infinipath_i_bitsextant;
-ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
-static void __devexit ipath_remove_one(struct pci_dev *);
-static int __devinit ipath_init_one(struct pci_dev *,
- const struct pci_device_id *);
+static void ipath_remove_one(struct pci_dev *);
+static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
/* Only needed for registration, nothing else needs this info */
#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
+
+/* Number of seconds before our card status check... */
+#define STATUS_TIMEOUT 60
static const struct pci_device_id ipath_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
- { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
{ 0, }
};
@@ -125,11 +147,13 @@ MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
static struct pci_driver ipath_driver = {
.name = IPATH_DRV_NAME,
.probe = ipath_init_one,
- .remove = __devexit_p(ipath_remove_one),
+ .remove = ipath_remove_one,
.id_table = ipath_pci_tbl,
+ .driver = {
+ .groups = ipath_driver_attr_groups,
+ },
};
-
static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
u32 *bar0, u32 *bar1)
{
@@ -170,22 +194,17 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
struct ipath_devdata *dd;
int ret;
- if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- dd = vmalloc(sizeof(*dd));
+ dd = vzalloc(sizeof(*dd));
if (!dd) {
dd = ERR_PTR(-ENOMEM);
goto bail;
}
- memset(dd, 0, sizeof(*dd));
dd->ipath_unit = -1;
+ idr_preload(GFP_KERNEL);
spin_lock_irqsave(&ipath_devs_lock, flags);
- ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
+ ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate unit ID: error %d\n", -ret);
@@ -193,6 +212,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
dd = ERR_PTR(ret);
goto bail_unlock;
}
+ dd->ipath_unit = ret;
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
@@ -201,7 +221,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
bail_unlock:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
+ idr_preload_end();
bail:
return dd;
}
@@ -223,12 +243,12 @@ struct ipath_devdata *ipath_lookup(int unit)
return dd;
}
-int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
+int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
{
int nunits, npresent, nup;
struct ipath_devdata *dd;
unsigned long flags;
- u32 maxports;
+ int maxports;
nunits = npresent = nup = maxports = 0;
@@ -273,14 +293,107 @@ void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
{
}
-static int __devinit ipath_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+/*
+ * Perform a PIO buffer bandwidth write test, to verify proper system
+ * configuration. Even when all the setup calls work, occasionally
+ * BIOS or other issues can prevent write combining from working, or
+ * can cause other bandwidth problems to the chip.
+ *
+ * This test simply writes the same buffer over and over again, and
+ * measures close to the peak bandwidth to the chip (not testing
+ * data bandwidth to the wire). On chips that use an address-based
+ * trigger to send packets to the wire, this is easy. On chips that
+ * use a count to trigger, we want to make sure that the packet doesn't
+ * go out on the wire, or trigger flow control checks.
+ */
+static void ipath_verify_pioperf(struct ipath_devdata *dd)
+{
+ u32 pbnum, cnt, lcnt;
+ u32 __iomem *piobuf;
+ u32 *addr;
+ u64 msecs, emsecs;
+
+ piobuf = ipath_getpiobuf(dd, 0, &pbnum);
+ if (!piobuf) {
+ dev_info(&dd->pcidev->dev,
+ "No PIObufs for checking perf, skipping\n");
+ return;
+ }
+
+ /*
+ * Enough to give us a reasonable test, less than piobuf size, and
+ * likely multiple of store buffer length.
+ */
+ cnt = 1024;
+
+ addr = vmalloc(cnt);
+ if (!addr) {
+ dev_info(&dd->pcidev->dev,
+ "Couldn't get memory for checking PIO perf,"
+ " skipping\n");
+ goto done;
+ }
+
+ preempt_disable(); /* we want reasonably accurate elapsed time */
+ msecs = 1 + jiffies_to_msecs(jiffies);
+ for (lcnt = 0; lcnt < 10000U; lcnt++) {
+ /* wait until we cross msec boundary */
+ if (jiffies_to_msecs(jiffies) >= msecs)
+ break;
+ udelay(1);
+ }
+
+ ipath_disable_armlaunch(dd);
+
+ /*
+ * length 0, no dwords actually sent, and mark as VL15
+ * on chips where that may matter (due to IB flowcontrol)
+ */
+ if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
+ writeq(1UL << 63, piobuf);
+ else
+ writeq(0, piobuf);
+ ipath_flush_wc();
+
+ /*
+ * this is only roughly accurate, since even with preempt we
+ * still take interrupts that could take a while. Running for
+ * >= 5 msec seems to get us "close enough" to accurate values
+ */
+ msecs = jiffies_to_msecs(jiffies);
+ for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
+ __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
+ emsecs = jiffies_to_msecs(jiffies) - msecs;
+ }
+
+ /* 1 GiB/sec, slightly over IB SDR line rate */
+ if (lcnt < (emsecs * 1024U))
+ ipath_dev_err(dd,
+ "Performance problem: bandwidth to PIO buffers is "
+ "only %u MiB/sec\n",
+ lcnt / (u32) emsecs);
+ else
+ ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
+ lcnt / (u32) emsecs);
+
+ preempt_enable();
+
+ vfree(addr);
+
+done:
+ /* disarm piobuf, so it's available again */
+ ipath_disarm_piobufs(dd, pbnum, 1);
+ ipath_enable_armlaunch(dd);
+}
+
+static void cleanup_device(struct ipath_devdata *dd);
+
+static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret, len, j;
struct ipath_devdata *dd;
unsigned long long addr;
u32 bar0 = 0, bar1 = 0;
- u8 rev;
dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) {
@@ -292,8 +405,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
- read_bars(dd, pdev, &bar0, &bar1);
-
ret = pci_enable_device(pdev);
if (ret) {
/* This can happen iff:
@@ -314,7 +425,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
- ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %x, vend %x/%x "
+ ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
"driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
ent->device, ent->driver_data);
@@ -353,14 +464,14 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail_disable;
}
- ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret) {
/*
* if the 64 bit setup fails, try 32 bit. Some systems
* do not setup 64 bit maps on systems with 2GB or less
* memory installed.
*/
- ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) {
dev_info(&pdev->dev,
"Unable to set DMA mask for unit %u: %d\n",
@@ -369,7 +480,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
else {
ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
- ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret)
dev_info(&pdev->dev,
"Unable to set DMA consistent mask "
@@ -379,7 +490,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
}
else {
- ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret)
dev_info(&pdev->dev,
"Unable to set DMA consistent mask "
@@ -403,9 +514,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
case PCI_DEVICE_ID_INFINIPATH_HT:
ipath_init_iba6110_funcs(dd);
break;
- case PCI_DEVICE_ID_INFINIPATH_PE800:
- ipath_init_iba6120_funcs(dd);
- break;
+
default:
ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
"failing\n", ent->device);
@@ -415,9 +524,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
for (j = 0; j < 6; j++) {
if (!pdev->resource[j].start)
continue;
- ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
- j, (unsigned long long)pdev->resource[j].start,
- (unsigned long long)pdev->resource[j].end,
+ ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
+ j, &pdev->resource[j],
(unsigned long long)pci_resource_len(pdev, j));
}
@@ -427,16 +535,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail_regions;
}
- dd->ipath_deviceid = ent->device; /* save for later use */
- dd->ipath_vendorid = ent->vendor;
-
- ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
- if (ret) {
- ipath_dev_err(dd, "Failed to read PCI revision ID unit "
- "%u: err %d\n", dd->ipath_unit, -ret);
- goto bail_regions; /* shouldn't ever happen */
- }
- dd->ipath_pcirev = rev;
+ dd->ipath_pcirev = pdev->revision;
#if defined(__powerpc__)
/* There isn't a generic way to specify writethrough mappings */
@@ -459,14 +558,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
addr, dd->ipath_kregbase);
- /*
- * clear ipath_flags here instead of in ipath_init_chip as it is set
- * by ipath_setup_htconfig.
- */
- dd->ipath_flags = 0;
- dd->ipath_lli_counter = 0;
- dd->ipath_lli_errors = 0;
-
if (dd->ipath_f_bus(dd, pdev))
ipath_dev_err(dd, "Failed to setup config space; "
"continuing anyway\n");
@@ -477,22 +568,22 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
* check 0 irq after we return from chip-specific bus setup, since
* that can affect this due to setup
*/
- if (!pdev->irq)
+ if (!dd->ipath_irq)
ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
"work\n");
else {
- ret = request_irq(pdev->irq, ipath_intr, IRQF_SHARED,
+ ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
IPATH_DRV_NAME, dd);
if (ret) {
ipath_dev_err(dd, "Couldn't setup irq handler, "
- "irq=%u: %d\n", pdev->irq, ret);
+ "irq=%d: %d\n", dd->ipath_irq, ret);
goto bail_iounmap;
}
}
ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
if (ret)
- goto bail_iounmap;
+ goto bail_irqsetup;
ret = ipath_enable_wc(dd);
@@ -503,6 +594,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ret = 0;
}
+ ipath_verify_pioperf(dd);
+
ipath_device_create_group(&pdev->dev, dd);
ipathfs_add_device(dd);
ipath_user_add(dd);
@@ -511,6 +604,15 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail;
+bail_irqsetup:
+ cleanup_device(dd);
+
+ if (dd->ipath_irq)
+ dd->ipath_f_free_irq(dd);
+
+ if (dd->ipath_f_cleanup)
+ dd->ipath_f_cleanup(dd);
+
bail_iounmap:
iounmap((volatile void __iomem *) dd->ipath_kregbase);
@@ -527,28 +629,161 @@ bail:
return ret;
}
-static void __devexit ipath_remove_one(struct pci_dev *pdev)
+static void cleanup_device(struct ipath_devdata *dd)
{
- struct ipath_devdata *dd;
+ int port;
+ struct ipath_portdata **tmp;
+ unsigned long flags;
- ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
- if (!pdev)
- return;
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip; needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these are
+ * to ensure any register reads/writes "fail" until
+ * re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_uregbase = 0;
+ dd->ipath_sregbase = 0;
+ dd->ipath_cregbase = 0;
+ dd->ipath_kregsize = 0;
+ }
+ ipath_disable_wc(dd);
+ }
+
+ if (dd->ipath_spectriggerhit)
+ dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
+ dd->ipath_spectriggerhit);
+
+ if (dd->ipath_pioavailregs_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *) dd->ipath_pioavailregs_dma,
+ dd->ipath_pioavailregs_phys);
+ dd->ipath_pioavailregs_dma = NULL;
+ }
+ if (dd->ipath_dummy_hdrq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->ipath_pd[0]->port_rcvhdrq_size,
+ dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+ dd->ipath_dummy_hdrq = NULL;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ dma_addr_t *tmpd = dd->ipath_physshadow;
+ int i, cnt = 0;
+
+ ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+ "locked\n");
+ for (port = 0; port < dd->ipath_cfgports; port++) {
+ int port_tidbase = port * dd->ipath_rcvtidcnt;
+ int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (!tmpp[i])
+ continue;
+ pci_unmap_page(dd->pcidev, tmpd[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ cnt++;
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ ipath_cdbg(VERBOSE, "There were still %u expTID "
+ "entries locked\n", cnt);
+ }
+ if (ipath_stats.sps_pagelocks ||
+ ipath_stats.sps_pageunlocks)
+ ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+ "unlocked via ipath_m{un}lock\n",
+ (unsigned long long)
+ ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+
+ ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ tmpp = dd->ipath_pageshadow;
+ dd->ipath_pageshadow = NULL;
+ vfree(tmpp);
+
+ dd->ipath_egrtidbase = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just kernel ports)
+ * at unload; we do for portcnt, because that's what we allocate.
+ * We acquire lock to be really paranoid that ipath_pd isn't being
+ * accessed from some interrupt-related code (that should not happen,
+ * but best to be sure).
+ */
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
+ tmp = dd->ipath_pd;
+ dd->ipath_pd = NULL;
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+ for (port = 0; port < dd->ipath_portcnt; port++) {
+ struct ipath_portdata *pd = tmp[port];
+ tmp[port] = NULL; /* debugging paranoia */
+ ipath_free_pddata(dd, pd);
+ }
+ kfree(tmp);
+}
+
+static void ipath_remove_one(struct pci_dev *pdev)
+{
+ struct ipath_devdata *dd = pci_get_drvdata(pdev);
+
+ ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+
+ /*
+ * disable the IB link early, to be sure no new packets arrive, which
+ * complicates the shutdown process
+ */
+ ipath_shutdown_device(dd);
+
+ flush_workqueue(ib_wq);
+
+ if (dd->verbs_dev)
+ ipath_unregister_ib_device(dd->verbs_dev);
- dd = pci_get_drvdata(pdev);
- ipath_unregister_ib_device(dd->verbs_dev);
ipath_diag_remove(dd);
ipath_user_remove(dd);
ipathfs_remove_device(dd);
ipath_device_remove_group(&pdev->dev, dd);
+
ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
"unit %u\n", dd, (u32) dd->ipath_unit);
- if (dd->ipath_kregbase) {
- ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
- dd->ipath_kregbase);
- iounmap((volatile void __iomem *) dd->ipath_kregbase);
- dd->ipath_kregbase = NULL;
- }
+
+ cleanup_device(dd);
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ if (dd->ipath_irq) {
+ ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
+ dd->ipath_unit, dd->ipath_irq);
+ dd->ipath_f_free_irq(dd);
+ } else
+ ipath_dbg("irq is 0, not doing free_irq "
+ "for unit %u\n", dd->ipath_unit);
+ /*
+ * we check for NULL here, because it's outside
+ * the kregbase check, and we need to call it
+ * after the free_irq. Thus it's possible that
+ * the function pointers were never initialized.
+ */
+ if (dd->ipath_f_cleanup)
+ /* clean up chip-specific stuff */
+ dd->ipath_f_cleanup(dd);
+
+ ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+ iounmap((volatile void __iomem *) dd->ipath_kregbase);
pci_release_regions(pdev);
ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
pci_disable_device(pdev);
@@ -576,31 +811,25 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
unsigned cnt)
{
unsigned i, last = first + cnt;
- u64 sendctrl, sendorig;
+ unsigned long flags;
ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
- sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
for (i = first; i < last; i++) {
- sendctrl = sendorig |
- (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ /*
+ * The disarm-related bits are write-only, so it
+ * is ok to OR them in with our copy of sendctrl
+ * while we hold the lock.
+ */
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- sendctrl);
+ dd->ipath_sendctrl | INFINIPATH_S_DISARM |
+ (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
+ /* can't disarm bufs back-to-back per iba7220 spec */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
-
- /*
- * Write it again with current value, in case ipath_sendctrl changed
- * while we were looping; no critical bits that would require
- * locking.
- *
- * Write a 0, and then the original value, reading scratch in
- * between. This seems to avoid a chip timing race that causes
- * pioavail updates to memory to stop.
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- 0);
- sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
+ /* on some older chips, update may not happen after cancel */
+ ipath_force_pio_avail_update(dd);
}
/**
@@ -615,8 +844,7 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
* -ETIMEDOUT state can have multiple states set, for any of several
* transitions.
*/
-static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
- int msecs)
+int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
{
dd->ipath_state_wanted = state;
wait_event_interruptible_timeout(ipath_state_wait,
@@ -638,14 +866,81 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
(unsigned long long) ipath_read_kreg64(
dd, dd->ipath_kregs->kr_ibcctrl),
(unsigned long long) val,
- ipath_ibcstatus_str[val & 0xf]);
+ ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
}
return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
}
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
+ char *buf, size_t blen)
+{
+ static const struct {
+ ipath_err_t err;
+ const char *msg;
+ } errs[] = {
+ { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
+ { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
+ { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
+ { INFINIPATH_E_SDMABASE, "SDmaBase" },
+ { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
+ { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
+ { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
+ { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
+ { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
+ { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
+ { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
+ { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
+ };
+ int i;
+ int expected;
+ size_t bidx = 0;
+
+ for (i = 0; i < ARRAY_SIZE(errs); i++) {
+ expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
+ test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ if ((err & errs[i].err) && !expected)
+ bidx += snprintf(buf + bidx, blen - bidx,
+ "%s ", errs[i].msg);
+ }
+}
+
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else. Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+ ipath_err_t err)
{
+ int iserr = 1;
*buf = '\0';
+ if (err & INFINIPATH_E_PKTERRS) {
+ if (!(err & ~INFINIPATH_E_PKTERRS))
+ iserr = 0; // if only packet errors.
+ if (ipath_debug & __IPATH_ERRPKTDBG) {
+ if (err & INFINIPATH_E_REBP)
+ strlcat(buf, "EBP ", blen);
+ if (err & INFINIPATH_E_RVCRC)
+ strlcat(buf, "VCRC ", blen);
+ if (err & INFINIPATH_E_RICRC) {
+ strlcat(buf, "CRC ", blen);
+ // clear for check below, so only once
+ err &= INFINIPATH_E_RICRC;
+ }
+ if (err & INFINIPATH_E_RSHORTPKTLEN)
+ strlcat(buf, "rshortpktlen ", blen);
+ if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+ strlcat(buf, "sdroppeddatapkt ", blen);
+ if (err & INFINIPATH_E_SPKTLEN)
+ strlcat(buf, "spktlen ", blen);
+ }
+ if ((err & INFINIPATH_E_RICRC) &&
+ !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+ strlcat(buf, "CRC ", blen);
+ if (!iserr)
+ goto done;
+ }
if (err & INFINIPATH_E_RHDRLEN)
strlcat(buf, "rhdrlen ", blen);
if (err & INFINIPATH_E_RBADTID)
@@ -654,14 +949,16 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "rbadversion ", blen);
if (err & INFINIPATH_E_RHDR)
strlcat(buf, "rhdr ", blen);
+ if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
+ strlcat(buf, "sendspecialtrigger ", blen);
if (err & INFINIPATH_E_RLONGPKTLEN)
strlcat(buf, "rlongpktlen ", blen);
- if (err & INFINIPATH_E_RSHORTPKTLEN)
- strlcat(buf, "rshortpktlen ", blen);
if (err & INFINIPATH_E_RMAXPKTLEN)
strlcat(buf, "rmaxpktlen ", blen);
if (err & INFINIPATH_E_RMINPKTLEN)
strlcat(buf, "rminpktlen ", blen);
+ if (err & INFINIPATH_E_SMINPKTLEN)
+ strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_RFORMATERR)
strlcat(buf, "rformaterr ", blen);
if (err & INFINIPATH_E_RUNSUPVL)
@@ -670,32 +967,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "runexpchar ", blen);
if (err & INFINIPATH_E_RIBFLOW)
strlcat(buf, "ribflow ", blen);
- if (err & INFINIPATH_E_REBP)
- strlcat(buf, "EBP ", blen);
if (err & INFINIPATH_E_SUNDERRUN)
strlcat(buf, "sunderrun ", blen);
if (err & INFINIPATH_E_SPIOARMLAUNCH)
strlcat(buf, "spioarmlaunch ", blen);
if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
strlcat(buf, "sunexperrpktnum ", blen);
- if (err & INFINIPATH_E_SDROPPEDDATAPKT)
- strlcat(buf, "sdroppeddatapkt ", blen);
if (err & INFINIPATH_E_SDROPPEDSMPPKT)
strlcat(buf, "sdroppedsmppkt ", blen);
if (err & INFINIPATH_E_SMAXPKTLEN)
strlcat(buf, "smaxpktlen ", blen);
- if (err & INFINIPATH_E_SMINPKTLEN)
- strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_SUNSUPVL)
strlcat(buf, "sunsupVL ", blen);
- if (err & INFINIPATH_E_SPKTLEN)
- strlcat(buf, "spktlen ", blen);
if (err & INFINIPATH_E_INVALIDADDR)
strlcat(buf, "invalidaddr ", blen);
- if (err & INFINIPATH_E_RICRC)
- strlcat(buf, "CRC ", blen);
- if (err & INFINIPATH_E_RVCRC)
- strlcat(buf, "VCRC ", blen);
if (err & INFINIPATH_E_RRCVEGRFULL)
strlcat(buf, "rcvegrfull ", blen);
if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -708,6 +993,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "hardware ", blen);
if (err & INFINIPATH_E_RESET)
strlcat(buf, "reset ", blen);
+ if (err & INFINIPATH_E_SDMAERRS)
+ decode_sdma_errs(dd, err, buf, blen);
+ if (err & INFINIPATH_E_INVALIDEEPCMD)
+ strlcat(buf, "invalideepromcmd ", blen);
+done:
+ return iserr;
}
/**
@@ -753,15 +1044,13 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len)
* ipath_get_egrbuf - get an eager buffer
* @dd: the infinipath device
* @bufnum: the eager buffer to get
- * @err: unused
*
* must only be called if ipath_pd[port] is known to be allocated
*/
-static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
- int err)
+static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
{
- return dd->ipath_port0_skbs ?
- (void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+ return dd->ipath_port0_skbinfo ?
+ (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
}
/**
@@ -783,31 +1072,34 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
*/
/*
- * We need 4 extra bytes for unaligned transfer copying
+ * We need 2 extra bytes for ipath_ether data sent in the
+ * key header. In order to keep everything dword aligned,
+ * we'll reserve 4 bytes.
*/
+ len = dd->ipath_ibmaxlen + 4;
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- /* we need a 4KB multiple alignment, and there is no way
+ /* We need a 2KB multiple alignment, and there is no way
* to do it except to allocate extra and then skb_reserve
* enough to bring it up to the right alignment.
*/
- len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+ len += 2047;
}
- else
- len = dd->ipath_ibmaxlen + 4;
+
skb = __dev_alloc_skb(len, gfp_mask);
if (!skb) {
ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
len);
goto bail;
}
+
+ skb_reserve(skb, 4);
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+ u32 una = (unsigned long)skb->data & 2047;
if (una)
- skb_reserve(skb, 4 + (1 << 11) - una);
- else
- skb_reserve(skb, 4);
- } else
- skb_reserve(skb, 4);
+ skb_reserve(skb, 2048 - una);
+ }
bail:
return skb;
@@ -817,18 +1109,17 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
u32 eflags,
u32 l,
u32 etail,
- u64 *rc)
+ __le32 *rhf_addr,
+ struct ipath_message_header *hdr)
{
char emsg[128];
- struct ipath_message_header *hdr;
get_rhf_errstring(eflags, emsg, sizeof emsg);
- hdr = (struct ipath_message_header *)&rc[1];
ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
"tlen=%x opcode=%x egridx=%x: %s\n",
eflags, l,
- ipath_hdrget_rcv_type((__le32 *) rc),
- ipath_hdrget_length_in_bytes((__le32 *) rc),
+ ipath_hdrget_rcv_type(rhf_addr),
+ ipath_hdrget_length_in_bytes(rhf_addr),
be32_to_cpu(hdr->bth[0]) >> 24,
etail, emsg);
@@ -847,67 +1138,61 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
/*
* ipath_kreceive - receive a packet
- * @dd: the infinipath device
+ * @pd: the infinipath port
*
* called from interrupt handler for errors or receive interrupt
*/
-void ipath_kreceive(struct ipath_devdata *dd)
+void ipath_kreceive(struct ipath_portdata *pd)
{
- u64 *rc;
+ struct ipath_devdata *dd = pd->port_dd;
+ __le32 *rhf_addr;
void *ebuf;
const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
u32 etail = -1, l, hdrqtail;
struct ipath_message_header *hdr;
- u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
+ u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
static u64 totcalls; /* stats, may eventually remove */
+ int last;
- if (!dd->ipath_hdrqtailptr) {
- ipath_dev_err(dd,
- "hdrqtailptr not set, can't do receives\n");
- goto bail;
- }
+ l = pd->port_head;
+ rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+ u32 seq = ipath_hdrget_seq(rhf_addr);
- /* There is already a thread processing this queue. */
- if (test_and_set_bit(0, &dd->ipath_rcv_pending))
- goto bail;
-
- l = dd->ipath_port0head;
- hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
- if (l == hdrqtail)
- goto done;
+ if (seq != pd->port_seq_cnt)
+ goto bail;
+ hdrqtail = 0;
+ } else {
+ hdrqtail = ipath_get_rcvhdrtail(pd);
+ if (l == hdrqtail)
+ goto bail;
+ smp_rmb();
+ }
reloop:
- for (i = 0; l != hdrqtail; i++) {
- u32 qp;
- u8 *bthbytes;
-
- rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2));
- hdr = (struct ipath_message_header *)&rc[1];
- /*
- * could make a network order version of IPATH_KD_QP, and
- * do the obvious shift before masking to speed this up.
- */
- qp = ntohl(hdr->bth[1]) & 0xffffff;
- bthbytes = (u8 *) hdr->bth;
-
- eflags = ipath_hdrget_err_flags((__le32 *) rc);
- etype = ipath_hdrget_rcv_type((__le32 *) rc);
+ for (last = 0, i = 1; !last; i += !last) {
+ hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
+ eflags = ipath_hdrget_err_flags(rhf_addr);
+ etype = ipath_hdrget_rcv_type(rhf_addr);
/* total length */
- tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
+ tlen = ipath_hdrget_length_in_bytes(rhf_addr);
ebuf = NULL;
- if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
+ if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
+ ipath_hdrget_use_egr_buf(rhf_addr) :
+ (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
/*
- * it turns out that the chips uses an eager buffer
+ * It turns out that the chip uses an eager buffer
* for all non-expected packets, whether it "needs"
* one or not. So always get the index, but don't
* set ebuf (so we try to copy data) unless the
* length requires it.
*/
- etail = ipath_hdrget_index((__le32 *) rc);
+ etail = ipath_hdrget_index(rhf_addr);
+ updegr = 1;
if (tlen > sizeof(*hdr) ||
etype == RCVHQ_RCV_TYPE_NON_KD)
- ebuf = ipath_get_egrbuf(dd, etail, 0);
+ ebuf = ipath_get_egrbuf(dd, etail);
}
/*
@@ -915,75 +1200,91 @@ reloop:
* packets; only ipathhdrerr should be set.
*/
- if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
- RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
- hdr->iph.ver_port_tid_offset) !=
- IPS_PROTO_VERSION) {
+ if (etype != RCVHQ_RCV_TYPE_NON_KD &&
+ etype != RCVHQ_RCV_TYPE_ERROR &&
+ ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
+ IPS_PROTO_VERSION)
ipath_cdbg(PKT, "Bad InfiniPath protocol version "
"%x\n", etype);
- }
if (unlikely(eflags))
- ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
+ ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
- ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+ ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
if (dd->ipath_lli_counter)
dd->ipath_lli_counter--;
+ } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
+ u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
+ u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
"qp=%x), len %x; ignored\n",
- etype, bthbytes[0], qp, tlen);
+ etype, opcode, qp, tlen);
}
- else if (etype == RCVHQ_RCV_TYPE_EAGER)
- ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
- "qp=%x), len %x; ignored\n",
- etype, bthbytes[0], qp, tlen);
else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
- be32_to_cpu(hdr->bth[0]) & 0xff);
+ be32_to_cpu(hdr->bth[0]) >> 24);
else {
/*
- * error packet, type of error unknown.
+ * error packet, type of error unknown.
* Probably type 3, but we don't know, so don't
* even try to print the opcode, etc.
+ * Usually caused by a "bad packet", that has no
+ * BTH, when the LRH says it should.
*/
- ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
- "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
- "hdr %llx %llx %llx %llx %llx\n",
- etail, tlen, (unsigned long) rc, l,
- (unsigned long long) rc[0],
- (unsigned long long) rc[1],
- (unsigned long long) rc[2],
- (unsigned long long) rc[3],
- (unsigned long long) rc[4],
- (unsigned long long) rc[5]);
+ ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
+ " %x, len %x hdrq+%x rhf: %Lx\n",
+ etail, tlen, l, (unsigned long long)
+ le64_to_cpu(*(__le64 *) rhf_addr));
+ if (ipath_debug & __IPATH_ERRPKTDBG) {
+ u32 j, *d, dw = rsize-2;
+ if (rsize > (tlen>>2))
+ dw = tlen>>2;
+ d = (u32 *)hdr;
+ printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
+ dw);
+ for (j = 0; j < dw; j++)
+ printk(KERN_DEBUG "%8x%s", d[j],
+ (j%8) == 7 ? "\n" : " ");
+ printk(KERN_DEBUG ".\n");
+ }
}
l += rsize;
if (l >= maxcnt)
l = 0;
- if (etype != RCVHQ_RCV_TYPE_EXPECTED)
- updegr = 1;
+ rhf_addr = (__le32 *) pd->port_rcvhdrq +
+ l + dd->ipath_rhf_offset;
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+ u32 seq = ipath_hdrget_seq(rhf_addr);
+
+ if (++pd->port_seq_cnt > 13)
+ pd->port_seq_cnt = 1;
+ if (seq != pd->port_seq_cnt)
+ last = 1;
+ } else if (l == hdrqtail)
+ last = 1;
/*
* update head regs on last packet, and every 16 packets.
* Reduce bus traffic, while still trying to prevent
* rcvhdrq overflows, for when the queue is nearly full
*/
- if (l == hdrqtail || (i && !(i&0xf))) {
- u64 lval;
- if (l == hdrqtail)
- /* request IBA6120 interrupt only on last */
- lval = dd->ipath_rhdrhead_intr_off | l;
- else
- lval = l;
- (void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
+ if (last || !(i & 0xf)) {
+ u64 lval = l;
+
+ /* request IBA6120 and 7220 interrupt only on last */
+ if (last)
+ lval |= dd->ipath_rhdrhead_intr_off;
+ ipath_write_ureg(dd, ur_rcvhdrhead, lval,
+ pd->port_port);
if (updegr) {
- (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
- etail, 0);
+ ipath_write_ureg(dd, ur_rcvegrindexhead,
+ etail, pd->port_port);
updegr = 0;
}
}
}
- if (!dd->ipath_rhdrhead_intr_off && !reloop) {
+ if (!dd->ipath_rhdrhead_intr_off && !reloop &&
+ !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
/* IBA6110 workaround; we can have a race clearing chip
* interrupt with another interrupt about to be delivered,
* and can clear it before it is delivered on the GPIO
@@ -992,7 +1293,7 @@ reloop:
* earlier packets, we "almost" guarantee we have covered
* that case.
*/
- u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
+ u32 hqtail = ipath_get_rcvhdrtail(pd);
if (hqtail != hdrqtail) {
hdrqtail = hqtail;
reloop = 1; /* loop 1 extra time at most */
@@ -1002,7 +1303,7 @@ reloop:
pkttot += i;
- dd->ipath_port0head = l;
+ pd->port_head = l;
if (pkttot > ipath_stats.sps_maxpkts_call)
ipath_stats.sps_maxpkts_call = pkttot;
@@ -1010,10 +1311,6 @@ reloop:
ipath_stats.sps_avgpkts_call =
ipath_stats.sps_port0pkts / ++totcalls;
-done:
- clear_bit(0, &dd->ipath_rcv_pending);
- smp_mb__after_clear_bit();
-
bail:;
}
@@ -1049,7 +1346,6 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
* happens when all buffers are in use, so only cpu overhead, not
* latency or bandwidth is affected.
*/
-#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
if (!dd->ipath_pioavailregs_dma) {
ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
return;
@@ -1090,16 +1386,11 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
/*
* Chip Errata: bug 6641; even and odd qwords>3 are swapped
*/
- if (i > 3) {
- if (i & 1)
- piov = le64_to_cpu(
- dd->ipath_pioavailregs_dma[i - 1]);
- else
- piov = le64_to_cpu(
- dd->ipath_pioavailregs_dma[i + 1]);
- } else
+ if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
+ piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
+ else
piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
- pchg = _IPATH_ALL_CHECKBITS &
+ pchg = dd->ipath_pioavailkernel[i] &
~(dd->ipath_pioavailshadow[i] ^ piov);
pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
@@ -1111,6 +1402,41 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
+/*
+ * used to force update of pioavailshadow if we can't get a pio buffer.
+ * Needed primarily due to exitting freeze mode after recovering
+ * from errors. Done lazily, because it's safer (known to not
+ * be writing pio buffers).
+ */
+static void ipath_reset_availshadow(struct ipath_devdata *dd)
+{
+ int i, im;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ u64 val, oldval;
+ /* deal with 6110 chip bug on high register #s */
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
+ /*
+ * busy out the buffers not in the kernel avail list,
+ * without changing the generation bits.
+ */
+ oldval = dd->ipath_pioavailshadow[i];
+ dd->ipath_pioavailshadow[i] = val |
+ ((~dd->ipath_pioavailkernel[i] <<
+ INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
+ 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
+ if (oldval != dd->ipath_pioavailshadow[i])
+ ipath_dbg("shadow[%d] was %Lx, now %lx\n",
+ i, (unsigned long long) oldval,
+ dd->ipath_pioavailshadow[i]);
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
/**
* ipath_setrcvhdrsize - set the receive header size
* @dd: the infinipath device
@@ -1150,27 +1476,69 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
return ret;
}
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @pbufnum: the buffer number is placed here
+/*
+ * debugging code and stats updates if no pio buffers available.
+ */
+static noinline void no_pio_bufs(struct ipath_devdata *dd)
+{
+ unsigned long *shadow = dd->ipath_pioavailshadow;
+ __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
+
+ dd->ipath_upd_pio_shadow = 1;
+
+ /*
+ * not atomic, but if we lose a stat count in a while, that's OK
+ */
+ ipath_stats.sps_nopiobufs++;
+ if (!(++dd->ipath_consec_nopiobuf % 100000)) {
+ ipath_force_pio_avail_update(dd); /* at start */
+ ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
+ dd->ipath_consec_nopiobuf,
+ (unsigned long)get_cycles(),
+ (unsigned long long) le64_to_cpu(dma[0]),
+ (unsigned long long) le64_to_cpu(dma[1]),
+ (unsigned long long) le64_to_cpu(dma[2]),
+ (unsigned long long) le64_to_cpu(dma[3]),
+ shadow[0], shadow[1], shadow[2], shadow[3]);
+ /*
+ * 4 buffers per byte, 4 registers above, cover rest
+ * below
+ */
+ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
+ (sizeof(shadow[0]) * 4 * 4))
+ ipath_dbg("2nd group: dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
+ (unsigned long long)le64_to_cpu(dma[4]),
+ (unsigned long long)le64_to_cpu(dma[5]),
+ (unsigned long long)le64_to_cpu(dma[6]),
+ (unsigned long long)le64_to_cpu(dma[7]),
+ shadow[4], shadow[5], shadow[6], shadow[7]);
+
+ /* at end, so update likely happened */
+ ipath_reset_availshadow(dd);
+ }
+}
+
+/*
+ * common code for normal driver pio buffer allocation, and reserved
+ * allocation.
*
* do appropriate marking as busy, etc.
* returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_layer_send
*/
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
+static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
+ u32 *pbufnum, u32 first, u32 last, u32 firsti)
{
- int i, j, starti, updated = 0;
- unsigned piobcnt, iter;
+ int i, j, updated = 0;
+ unsigned piobcnt;
unsigned long flags;
unsigned long *shadow = dd->ipath_pioavailshadow;
u32 __iomem *buf;
- piobcnt = (unsigned)(dd->ipath_piobcnt2k
- + dd->ipath_piobcnt4k);
- starti = dd->ipath_lastport_piobuf;
- iter = piobcnt - starti;
+ piobcnt = last - first;
if (dd->ipath_upd_pio_shadow) {
/*
* Minor optimization. If we had no buffers on last call,
@@ -1178,12 +1546,10 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
* if no buffers were updated, to be paranoid
*/
ipath_update_pio_bufs(dd);
- /* we scanned here, don't do it at end of scan */
- updated = 1;
- i = starti;
+ updated++;
+ i = first;
} else
- i = dd->ipath_lastpioindex;
-
+ i = firsti;
rescan:
/*
* while test_and_set_bit() is atomic, we do that and then the
@@ -1191,101 +1557,197 @@ rescan:
* of the remaining armlaunch errors.
*/
spin_lock_irqsave(&ipath_pioavail_lock, flags);
- for (j = 0; j < iter; j++, i++) {
- if (i >= piobcnt)
- i = starti;
- /*
- * To avoid bus lock overhead, we first find a candidate
- * buffer, then do the test and set, and continue if that
- * fails.
- */
- if (test_bit((2 * i) + 1, shadow) ||
- test_and_set_bit((2 * i) + 1, shadow))
+ for (j = 0; j < piobcnt; j++, i++) {
+ if (i >= last)
+ i = first;
+ if (__test_and_set_bit((2 * i) + 1, shadow))
continue;
/* flip generation bit */
- change_bit(2 * i, shadow);
+ __change_bit(2 * i, shadow);
break;
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
- if (j == iter) {
- volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-
- /*
- * first time through; shadow exhausted, but may be real
- * buffers available, so go see; if any updated, rescan
- * (once)
- */
+ if (j == piobcnt) {
if (!updated) {
+ /*
+ * first time through; shadow exhausted, but may be
+ * buffers available, try an update and then rescan.
+ */
+ ipath_update_pio_bufs(dd);
+ updated++;
+ i = first;
+ goto rescan;
+ } else if (updated == 1 && piobcnt <=
+ ((dd->ipath_sendctrl
+ >> INFINIPATH_S_UPDTHRESH_SHIFT) &
+ INFINIPATH_S_UPDTHRESH_MASK)) {
+ /*
+ * for chips supporting and using the update
+ * threshold we need to force an update of the
+ * in-memory copy if the count is less than the
+ * thershold, then check one more time.
+ */
+ ipath_force_pio_avail_update(dd);
ipath_update_pio_bufs(dd);
- updated = 1;
- i = starti;
+ updated++;
+ i = first;
goto rescan;
}
- dd->ipath_upd_pio_shadow = 1;
+
+ no_pio_bufs(dd);
+ buf = NULL;
+ } else {
+ if (i < dd->ipath_piobcnt2k)
+ buf = (u32 __iomem *) (dd->ipath_pio2kbase +
+ i * dd->ipath_palign);
+ else
+ buf = (u32 __iomem *)
+ (dd->ipath_pio4kbase +
+ (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+ if (pbufnum)
+ *pbufnum = i;
+ }
+
+ return buf;
+}
+
+/**
+ * ipath_getpiobuf - find an available pio buffer
+ * @dd: the infinipath device
+ * @plen: the size of the PIO buffer needed in 32-bit words
+ * @pbufnum: the buffer number is placed here
+ */
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
+{
+ u32 __iomem *buf;
+ u32 pnum, nbufs;
+ u32 first, lasti;
+
+ if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
+ first = dd->ipath_piobcnt2k;
+ lasti = dd->ipath_lastpioindexl;
+ } else {
+ first = 0;
+ lasti = dd->ipath_lastpioindex;
+ }
+ nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
+
+ if (buf) {
/*
- * not atomic, but if we lose one once in a while, that's OK
+ * Set next starting place. It's just an optimization,
+ * it doesn't matter who wins on this, so no locking
*/
- ipath_stats.sps_nopiobufs++;
- if (!(++dd->ipath_consec_nopiobuf % 100000)) {
- ipath_dbg(
- "%u pio sends with no bufavail; dmacopy: "
- "%llx %llx %llx %llx; shadow: "
- "%lx %lx %lx %lx\n",
- dd->ipath_consec_nopiobuf,
- (unsigned long long) le64_to_cpu(dma[0]),
- (unsigned long long) le64_to_cpu(dma[1]),
- (unsigned long long) le64_to_cpu(dma[2]),
- (unsigned long long) le64_to_cpu(dma[3]),
- shadow[0], shadow[1], shadow[2],
- shadow[3]);
+ if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+ dd->ipath_lastpioindexl = pnum + 1;
+ else
+ dd->ipath_lastpioindex = pnum + 1;
+ if (dd->ipath_upd_pio_shadow)
+ dd->ipath_upd_pio_shadow = 0;
+ if (dd->ipath_consec_nopiobuf)
+ dd->ipath_consec_nopiobuf = 0;
+ ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
+ pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
+ if (pbufnum)
+ *pbufnum = pnum;
+
+ }
+ return buf;
+}
+
+/**
+ * ipath_chg_pioavailkernel - change which send buffers are available for kernel
+ * @dd: the infinipath device
+ * @start: the starting send buffer number
+ * @len: the number of send buffers
+ * @avail: true if the buffers are available for kernel use, false otherwise
+ */
+void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+ unsigned len, int avail)
+{
+ unsigned long flags;
+ unsigned end, cnt = 0;
+
+ /* There are two bits per send buffer (busy and generation) */
+ start *= 2;
+ end = start + len * 2;
+
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ /* Set or clear the busy bit in the shadow. */
+ while (start < end) {
+ if (avail) {
+ unsigned long dma;
+ int i, im;
/*
- * 4 buffers per byte, 4 registers above, cover rest
- * below
+ * the BUSY bit will never be set, because we disarm
+ * the user buffers before we hand them back to the
+ * kernel. We do have to make sure the generation
+ * bit is set correctly in shadow, since it could
+ * have changed many times while allocated to user.
+ * We can't use the bitmap functions on the full
+ * dma array because it is always little-endian, so
+ * we have to flip to host-order first.
+ * BITS_PER_LONG is slightly wrong, since it's
+ * always 64 bits per register in chip...
+ * We only work on 64 bit kernels, so that's OK.
*/
- if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
- (sizeof(shadow[0]) * 4 * 4))
- ipath_dbg("2nd group: dmacopy: %llx %llx "
- "%llx %llx; shadow: %lx %lx "
- "%lx %lx\n",
- (unsigned long long)
- le64_to_cpu(dma[4]),
- (unsigned long long)
- le64_to_cpu(dma[5]),
- (unsigned long long)
- le64_to_cpu(dma[6]),
- (unsigned long long)
- le64_to_cpu(dma[7]),
- shadow[4], shadow[5],
- shadow[6], shadow[7]);
+ /* deal with 6110 chip bug on high register #s */
+ i = start / BITS_PER_LONG;
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ dma = (unsigned long) le64_to_cpu(
+ dd->ipath_pioavailregs_dma[im]);
+ if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start) % BITS_PER_LONG, &dma))
+ __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ else
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ __set_bit(start, dd->ipath_pioavailkernel);
+ } else {
+ __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
+ dd->ipath_pioavailshadow);
+ __clear_bit(start, dd->ipath_pioavailkernel);
}
- buf = NULL;
- goto bail;
+ start += 2;
}
+ if (dd->ipath_pioupd_thresh) {
+ end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
/*
- * set next starting place. Since it's just an optimization,
- * it doesn't matter who wins on this, so no locking
+ * When moving buffers from kernel to user, if number assigned to
+ * the user is less than the pio update threshold, and threshold
+ * is supported (cnt was computed > 0), drop the update threshold
+ * so we update at least once per allocated number of buffers.
+ * In any case, if the kernel buffers are less than the threshold,
+ * drop the threshold. We don't bother increasing it, having once
+ * decreased it, since it would typically just cycle back and forth.
+ * If we don't decrease below buffers in use, we can wait a long
+ * time for an update, until some other context uses PIO buffers.
*/
- dd->ipath_lastpioindex = i + 1;
- if (dd->ipath_upd_pio_shadow)
- dd->ipath_upd_pio_shadow = 0;
- if (dd->ipath_consec_nopiobuf)
- dd->ipath_consec_nopiobuf = 0;
- if (i < dd->ipath_piobcnt2k)
- buf = (u32 __iomem *) (dd->ipath_pio2kbase +
- i * dd->ipath_palign);
- else
- buf = (u32 __iomem *)
- (dd->ipath_pio4kbase +
- (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
- ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
- i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
- if (pbufnum)
- *pbufnum = i;
-
-bail:
- return buf;
+ if (!avail && len < cnt)
+ cnt = len;
+ if (cnt < dd->ipath_pioupd_thresh) {
+ dd->ipath_pioupd_thresh = cnt;
+ ipath_dbg("Decreased pio update threshold to %u\n",
+ dd->ipath_pioupd_thresh);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+ << INFINIPATH_S_UPDTHRESH_SHIFT);
+ dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+ << INFINIPATH_S_UPDTHRESH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
}
/**
@@ -1319,16 +1781,27 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ret = -ENOMEM;
goto bail;
}
- pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
- if (!pd->port_rcvhdrtail_kvaddr) {
- ipath_dev_err(dd, "attempt to allocate 1 page "
- "for port %u rcvhdrqtailaddr failed\n",
- pd->port_port);
- ret = -ENOMEM;
- goto bail;
+
+ if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+ pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
+ &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ GFP_KERNEL);
+ if (!pd->port_rcvhdrtail_kvaddr) {
+ ipath_dev_err(dd, "attempt to allocate 1 page "
+ "for port %u rcvhdrqtailaddr "
+ "failed\n", pd->port_port);
+ ret = -ENOMEM;
+ dma_free_coherent(&dd->pcidev->dev, amt,
+ pd->port_rcvhdrq,
+ pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
+ goto bail;
+ }
+ pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
+ ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
+ "physical\n", pd->port_port,
+ (unsigned long long) phys_hdrqtail);
}
- pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
pd->port_rcvhdrq_size = amt;
@@ -1338,21 +1811,19 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
(unsigned long) pd->port_rcvhdrq_phys,
(unsigned long) pd->port_rcvhdrq_size,
pd->port_port);
-
- ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
- pd->port_port,
- (unsigned long long) phys_hdrqtail);
}
else
ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
"hdrtailaddr@%p %llx physical\n",
pd->port_port, pd->port_rcvhdrq,
- pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
- (unsigned long long)pd->port_rcvhdrqtailaddr_phys);
+ (unsigned long long) pd->port_rcvhdrq_phys,
+ pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+ pd->port_rcvhdrqtailaddr_phys);
/* clear for security and sanity on each use */
memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
- memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+ if (pd->port_rcvhdrtail_kvaddr)
+ memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/*
* tell chip each time we init it, even if we are re-using previous
@@ -1363,114 +1834,167 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, pd->port_rcvhdrq_phys);
- ret = 0;
bail:
return ret;
}
-int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id,
- u64 bits_to_wait_for, u64 * valp)
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent. Used whenever we need to be
+ * sure the send side is idle. Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo. The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if normal send had happened.
+ */
+void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
{
- unsigned long timeout;
- u64 lastval, val;
- int ret;
+ unsigned long flags;
- lastval = ipath_read_kreg64(dd, reg_id);
- /* wait a ridiculously long time */
- timeout = jiffies + msecs_to_jiffies(5);
- do {
- val = ipath_read_kreg64(dd, reg_id);
- /* set so they have something, even on failures. */
- *valp = val;
- if ((val & bits_to_wait_for) == bits_to_wait_for) {
- ret = 0;
- break;
- }
- if (val != lastval)
- ipath_cdbg(VERBOSE, "Changed from %llx to %llx, "
- "waiting for %llx bits\n",
- (unsigned long long) lastval,
- (unsigned long long) val,
- (unsigned long long) bits_to_wait_for);
- cond_resched();
- if (time_after(jiffies, timeout)) {
- ipath_dbg("Didn't get bits %llx in register 0x%x, "
- "got %llx\n",
- (unsigned long long) bits_to_wait_for,
- reg_id, (unsigned long long) *valp);
- ret = -ENODEV;
- break;
- }
- } while (1);
+ if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
+ ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
+ goto bail;
+ }
+ /*
+ * If we have SDMA, and it's not disabled, we have to kick off the
+ * abort state machine, provided we aren't already aborting.
+ * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
+ * we skip the rest of this routine. It is already "in progress"
+ */
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
+ int skip_cancel;
+ unsigned long *statp = &dd->ipath_sdma_status;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ skip_cancel =
+ test_and_set_bit(IPATH_SDMA_ABORTING, statp)
+ && !test_bit(IPATH_SDMA_DISABLED, statp);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (skip_cancel)
+ goto bail;
+ }
- return ret;
+ ipath_dbg("Cancelling all in-progress send buffers\n");
+
+ /* skip armlaunch errs for a while */
+ dd->ipath_lastcancel = jiffies + HZ / 2;
+
+ /*
+ * The abort bit is auto-clearing. We also don't want pioavail
+ * update happening during this, and we don't want any other
+ * sends going out, so turn those off for the duration. We read
+ * the scratch register to be sure that cancels and the abort
+ * have taken effect in the chip. Otherwise two parts are same
+ * as ipath_force_pio_avail_update()
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
+ | INFINIPATH_S_PIOENABLE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl | INFINIPATH_S_ABORT);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* disarm all send buffers */
+ ipath_disarm_piobufs(dd, 0,
+ dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+
+ if (restore_sendctrl) {
+ /* else done by caller later if needed */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
+ INFINIPATH_S_PIOENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ /* and again, be sure all have hit the chip */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
+
+ if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
+ !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
+ test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ /* only wait so long for intr */
+ dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
+ dd->ipath_sdma_reset_wait = 200;
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ }
+bail:;
}
-/**
- * ipath_waitfor_mdio_cmdready - wait for last command to complete
- * @dd: the infinipath device
- *
- * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go
- * away indicating the last command has completed. It doesn't return data
+/*
+ * Force an update of in-memory copy of the pioavail registers, when
+ * needed for any of a variety of reasons. We read the scratch register
+ * to make it highly likely that the update will have happened by the
+ * time we return. If already off (as in cancel_sends above), this
+ * routine is a nop, on the assumption that the caller will "do the
+ * right thing".
*/
-int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
+void ipath_force_pio_avail_update(struct ipath_devdata *dd)
{
- unsigned long timeout;
- u64 val;
- int ret;
-
- /* wait a ridiculously long time */
- timeout = jiffies + msecs_to_jiffies(5);
- do {
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio);
- if (!(val & IPATH_MDIO_CMDVALID)) {
- ret = 0;
- break;
- }
- cond_resched();
- if (time_after(jiffies, timeout)) {
- ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n",
- (unsigned long long) val);
- ret = -ENODEV;
- break;
- }
- } while (1);
+ unsigned long flags;
- return ret;
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ }
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
+ int linitcmd)
{
+ u64 mod_wd;
static const char *what[4] = {
- [0] = "DOWN",
- [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT",
+ [0] = "NOP",
+ [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
};
- int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
- INFINIPATH_IBCC_LINKCMD_MASK;
-
- ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
- "is %s\n", dd->ipath_unit,
- what[linkcmd],
- ipath_ibcstatus_str[
- (ipath_read_kreg64
- (dd, dd->ipath_kregs->kr_ibcstatus) >>
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
- INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
- /* flush all queued sends when going to DOWN or INIT, to be sure that
- * they don't block MAD packets */
- if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
- ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
- (unsigned)(dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k) -
- dd->ipath_lastport_piobuf);
+
+ if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
+ /*
+ * If we are told to disable, note that so link-recovery
+ * code does not attempt to bring us back up.
+ */
+ preempt_disable();
+ dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
+ preempt_enable();
+ } else if (linitcmd) {
+ /*
+ * Any other linkinitcmd will lead to LINKDOWN and then
+ * to INIT (if all is well), so clear flag to let
+ * link-recovery code attempt to bring us back up.
+ */
+ preempt_disable();
+ dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
+ preempt_enable();
}
+ mod_wd = (linkcmd << dd->ibcc_lc_shift) |
+ (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ ipath_cdbg(VERBOSE,
+ "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
+ dd->ipath_unit, what[linkcmd], linitcmd,
+ ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
+
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl | which);
+ dd->ipath_ibcctrl | mod_wd);
+ /* read from chip so write is flushed */
+ (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
}
int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
@@ -1479,38 +2003,33 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
int ret;
switch (newstate) {
+ case IPATH_IB_LINKDOWN_ONLY:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
case IPATH_IB_LINKDOWN:
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+ INFINIPATH_IBCC_LINKINITCMD_POLL);
/* don't wait */
ret = 0;
goto bail;
case IPATH_IB_LINKDOWN_SLEEP:
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+ INFINIPATH_IBCC_LINKINITCMD_SLEEP);
/* don't wait */
ret = 0;
goto bail;
case IPATH_IB_LINKDOWN_DISABLE:
- ipath_set_ib_lstate(dd,
- INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+ INFINIPATH_IBCC_LINKINITCMD_DISABLE);
/* don't wait */
ret = 0;
goto bail;
- case IPATH_IB_LINKINIT:
- if (dd->ipath_flags & IPATH_LINKINIT) {
- ret = 0;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- lstate = IPATH_LINKINIT;
- break;
-
case IPATH_IB_LINKARM:
if (dd->ipath_flags & IPATH_LINKARMED) {
ret = 0;
@@ -1521,8 +2040,8 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
ret = -EINVAL;
goto bail;
}
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
+
/*
* Since the port can transition to ACTIVE by receiving
* a non VL 15 packet, wait for either state.
@@ -1539,11 +2058,51 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
ret = -EINVAL;
goto bail;
}
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
lstate = IPATH_LINKACTIVE;
break;
+ case IPATH_IB_LINK_LOOPBACK:
+ dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+
+ /* turn heartbeat off, as it causes loopback to fail */
+ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_OFF);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINK_EXTERNAL:
+ dev_info(&dd->pcidev->dev,
+ "Disabling IB local loopback (normal)\n");
+ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_ON);
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ /*
+ * Heartbeat can be explicitly enabled by the user via
+ * "hrtbt_enable" "file", and if disabled, trying to enable here
+ * will have no effect. Implicit changes (heartbeat off when
+ * loopback on, and vice versa) are included to ease testing.
+ */
+ case IPATH_IB_LINK_HRTBT:
+ ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_ON);
+ goto bail;
+
+ case IPATH_IB_LINK_NO_HRTBT:
+ ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_OFF);
+ goto bail;
+
default:
ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
ret = -EINVAL;
@@ -1565,7 +2124,7 @@ bail:
* sanity checking on this, and we don't deal with what happens to
* programs that are already running when the size changes.
* NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
+ * link INIT state...
*/
int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
{
@@ -1580,7 +2139,7 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
* piosize). We check that it's one of the valid IB sizes.
*/
if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
- arg != 4096) {
+ (arg != 4096 || !ipath_mtu4096)) {
ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
ret = -EINVAL;
goto bail;
@@ -1596,6 +2155,8 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
/* Only if it's not the initial value (or reset to it) */
if (piosize != dd->ipath_init_ibmaxlen) {
+ if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
+ piosize = dd->ipath_init_ibmaxlen;
dd->ipath_ibmaxlen = piosize;
changed = 1;
}
@@ -1609,24 +2170,17 @@ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
}
if (changed) {
+ u64 ibc = dd->ipath_ibcctrl, ibdw;
/*
- * set the IBC maxpktlength to the size of our pio
- * buffers in words
+ * update our housekeeping variables, and set IBC max
+ * size, same as init code; max IBC is max we allow in
+ * buffer, less the qword pbc, plus 1 for ICRC, in dwords
*/
- u64 ibc = dd->ipath_ibcctrl;
+ dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
+ ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
- INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
- piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
- dd->ipath_ibmaxlen = piosize;
- piosize /= sizeof(u32); /* in words */
- /*
- * for ICRC, which we only send in diag test pkt mode, and
- * we don't need to worry about that for mtu
- */
- piosize += 1;
-
- ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+ dd->ibcc_mpl_shift);
+ ibc |= ibdw << dd->ibcc_mpl_shift;
dd->ipath_ibcctrl = ibc;
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl);
@@ -1639,38 +2193,20 @@ bail:
return ret;
}
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
{
- dd->ipath_lid = arg;
+ dd->ipath_lid = lid;
dd->ipath_lmc = lmc;
- return 0;
-}
+ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
+ (~((1U << lmc) - 1)) << 16);
-/**
- * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to read
- * @port: the port containing the register
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
- unsigned port)
-{
- u16 where;
-
- if (port < dd->ipath_portcnt &&
- (regno == dd->ipath_kregs->kr_rcvhdraddr ||
- regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
- where = regno + port;
- else
- where = -1;
+ dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
- return ipath_read_kreg64(dd, where);
+ return 0;
}
+
/**
* ipath_write_kreg_port - write a device's per-port 64-bit kernel register
* @dd: the infinipath device
@@ -1696,6 +2232,84 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
ipath_write_kreg(dd, where, value);
}
+/*
+ * Following deal with the "obviously simple" task of overriding the state
+ * of the LEDS, which normally indicate link physical and logical status.
+ * The complications arise in dealing with different hardware mappings
+ * and the board-dependent routine being called from interrupts.
+ * and then there's the requirement to _flash_ them.
+ */
+#define LED_OVER_FREQ_SHIFT 8
+#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
+/* Below is "non-zero" to force override, but both actual LEDs are off */
+#define LED_OVER_BOTH_OFF (8)
+
+static void ipath_run_led_override(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+ int timeoff;
+ int pidx;
+ u64 lstate, ltstate, val;
+
+ if (!(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ pidx = dd->ipath_led_override_phase++ & 1;
+ dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
+ timeoff = dd->ipath_led_override_timeoff;
+
+ /*
+ * below potentially restores the LED values per current status,
+ * should also possibly setup the traffic-blink register,
+ * but leave that to per-chip functions.
+ */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ ltstate = ipath_ib_linktrstate(dd, val);
+ lstate = ipath_ib_linkstate(dd, val);
+
+ dd->ipath_f_setextled(dd, lstate, ltstate);
+ mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
+}
+
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
+{
+ int timeoff, freq;
+
+ if (!(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ /* First check if we are blinking. If not, use 1HZ polling */
+ timeoff = HZ;
+ freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+
+ if (freq) {
+ /* For blink, set each phase from one nybble of val */
+ dd->ipath_led_override_vals[0] = val & 0xF;
+ dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
+ timeoff = (HZ << 4)/freq;
+ } else {
+ /* Non-blink set both phases the same. */
+ dd->ipath_led_override_vals[0] = val & 0xF;
+ dd->ipath_led_override_vals[1] = val & 0xF;
+ }
+ dd->ipath_led_override_timeoff = timeoff;
+
+ /*
+ * If the timer has not already been started, do so. Use a "quick"
+ * timeout so the function will be called soon, to look at our request.
+ */
+ if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
+ /* Need to start timer */
+ init_timer(&dd->ipath_led_override_timer);
+ dd->ipath_led_override_timer.function =
+ ipath_run_led_override;
+ dd->ipath_led_override_timer.data = (unsigned long) dd;
+ dd->ipath_led_override_timer.expires = jiffies + 1;
+ add_timer(&dd->ipath_led_override_timer);
+ } else
+ atomic_dec(&dd->ipath_led_override_timer_active);
+}
+
/**
* ipath_shutdown_device - shut down a device
* @dd: the infinipath device
@@ -1707,10 +2321,12 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
*/
void ipath_shutdown_device(struct ipath_devdata *dd)
{
- u64 val;
+ unsigned long flags;
ipath_dbg("Shutting down the device\n");
+ ipath_hol_up(dd); /* make sure user processes aren't suspended */
+
dd->ipath_flags |= IPATH_LINKUNK;
dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
IPATH_LINKINIT | IPATH_LINKARMED |
@@ -1725,30 +2341,37 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ teardown_sdma(dd);
+
/*
* gracefully stop all sends allowing any in progress to trickle out
* first.
*/
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl = 0;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
/* flush it */
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
/*
* enough for anything that's going to trickle out to have actually
* done so.
*/
udelay(5);
+ dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
+
+ ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
+ ipath_cancel_sends(dd, 0);
+
/*
- * abort any armed or launched PIO buffers that didn't go. (self
- * clearing). Will cause any packet currently being transmitted to
- * go out with an EBP, and may also cause a short packet error on
- * the receiver.
+ * we are shutting down, so tell components that care. We don't do
+ * this on just a link state change, much like ethernet, a cable
+ * unplug, etc. doesn't change driver state
*/
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
-
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ signal_ib_event(dd, IB_EVENT_PORT_ERR);
/* disable IBC */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
@@ -1758,15 +2381,24 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
/*
* clear SerdesEnable and turn the leds off; do this here because
* we are unloading, so don't count on interrupts to move along
- * Turn the LEDs off explictly for the same reason.
+ * Turn the LEDs off explicitly for the same reason.
*/
dd->ipath_f_quiet_serdes(dd);
- dd->ipath_f_setextled(dd, 0, 0);
+ /* stop all the timers that might still be running */
+ del_timer_sync(&dd->ipath_hol_timer);
if (dd->ipath_stats_timer_active) {
del_timer_sync(&dd->ipath_stats_timer);
dd->ipath_stats_timer_active = 0;
}
+ if (dd->ipath_intrchk_timer.data) {
+ del_timer_sync(&dd->ipath_intrchk_timer);
+ dd->ipath_intrchk_timer.data = 0;
+ }
+ if (atomic_read(&dd->ipath_led_override_timer_active)) {
+ del_timer_sync(&dd->ipath_led_override_timer);
+ atomic_set(&dd->ipath_led_override_timer_active, 0);
+ }
/*
* clear all interrupts and errors, so that the next time the driver
@@ -1777,6 +2409,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+ ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
+ ipath_update_eeprom_log(dd);
}
/**
@@ -1805,7 +2440,7 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
pd->port_rcvhdrq = NULL;
if (pd->port_rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)pd->port_rcvhdrtail_kvaddr,
+ pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrqtailaddr_phys);
pd->port_rcvhdrtail_kvaddr = NULL;
}
@@ -1824,24 +2459,32 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
dma_free_coherent(&dd->pcidev->dev, size,
base, pd->port_rcvegrbuf_phys[e]);
}
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
- vfree(pd->port_rcvegrbuf_phys);
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0;
- } else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
+ } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
unsigned e;
- struct sk_buff **skbs = dd->ipath_port0_skbs;
-
- dd->ipath_port0_skbs = NULL;
- ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
- "@ %p\n", pd->port_port, skbs);
- for (e = 0; e < dd->ipath_rcvegrcnt; e++)
- if (skbs[e])
- dev_kfree_skb(skbs[e]);
- vfree(skbs);
+ struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
+
+ dd->ipath_port0_skbinfo = NULL;
+ ipath_cdbg(VERBOSE, "free closed port %d "
+ "ipath_port0_skbinfo @ %p\n", pd->port_port,
+ skbinfo);
+ for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
+ if (skbinfo[e].skb) {
+ pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+ dd->ipath_ibmaxlen,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(skbinfo[e].skb);
+ }
+ vfree(skbinfo);
}
kfree(pd->port_tid_pg_list);
+ vfree(pd->subport_uregbase);
+ vfree(pd->subport_rcvegrbuf);
+ vfree(pd->subport_rcvhdr_base);
kfree(pd);
}
@@ -1849,17 +2492,14 @@ static int __init infinipath_init(void)
{
int ret;
- ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+ if (ipath_debug & __IPATH_DBG)
+ printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
/*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
idr_init(&unit_table);
- if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto bail;
- }
ret = pci_register_driver(&ipath_driver);
if (ret < 0) {
@@ -1868,35 +2508,15 @@ static int __init infinipath_init(void)
goto bail_unit;
}
- ret = ipath_driver_create_group(&ipath_driver.driver);
- if (ret < 0) {
- printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver "
- "sysfs entries: error %d\n", -ret);
- goto bail_pci;
- }
-
ret = ipath_init_ipathfs();
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
"ipathfs: error %d\n", -ret);
- goto bail_group;
- }
-
- ret = ipath_diagpkt_add();
- if (ret < 0) {
- printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
- "diag data device: error %d\n", -ret);
- goto bail_ipathfs;
+ goto bail_pci;
}
goto bail;
-bail_ipathfs:
- ipath_exit_ipathfs();
-
-bail_group:
- ipath_driver_remove_group(&ipath_driver.driver);
-
bail_pci:
pci_unregister_driver(&ipath_driver);
@@ -1907,150 +2527,10 @@ bail:
return ret;
}
-static void cleanup_device(struct ipath_devdata *dd)
-{
- int port;
-
- ipath_shutdown_device(dd);
-
- if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
- /* can't do anything more with chip; needs re-init */
- *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
- if (dd->ipath_kregbase) {
- /*
- * if we haven't already cleaned up before these are
- * to ensure any register reads/writes "fail" until
- * re-init
- */
- dd->ipath_kregbase = NULL;
- dd->ipath_uregbase = 0;
- dd->ipath_sregbase = 0;
- dd->ipath_cregbase = 0;
- dd->ipath_kregsize = 0;
- }
- ipath_disable_wc(dd);
- }
-
- if (dd->ipath_pioavailregs_dma) {
- dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *) dd->ipath_pioavailregs_dma,
- dd->ipath_pioavailregs_phys);
- dd->ipath_pioavailregs_dma = NULL;
- }
- if (dd->ipath_dummy_hdrq) {
- dma_free_coherent(&dd->pcidev->dev,
- dd->ipath_pd[0]->port_rcvhdrq_size,
- dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
- dd->ipath_dummy_hdrq = NULL;
- }
-
- if (dd->ipath_pageshadow) {
- struct page **tmpp = dd->ipath_pageshadow;
- int i, cnt = 0;
-
- ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
- "locked\n");
- for (port = 0; port < dd->ipath_cfgports; port++) {
- int port_tidbase = port * dd->ipath_rcvtidcnt;
- int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
- for (i = port_tidbase; i < maxtid; i++) {
- if (!tmpp[i])
- continue;
- ipath_release_user_pages(&tmpp[i], 1);
- tmpp[i] = NULL;
- cnt++;
- }
- }
- if (cnt) {
- ipath_stats.sps_pageunlocks += cnt;
- ipath_cdbg(VERBOSE, "There were still %u expTID "
- "entries locked\n", cnt);
- }
- if (ipath_stats.sps_pagelocks ||
- ipath_stats.sps_pageunlocks)
- ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
- "unlocked via ipath_m{un}lock\n",
- (unsigned long long)
- ipath_stats.sps_pagelocks,
- (unsigned long long)
- ipath_stats.sps_pageunlocks);
-
- ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
- dd->ipath_pageshadow);
- vfree(dd->ipath_pageshadow);
- dd->ipath_pageshadow = NULL;
- }
-
- /*
- * free any resources still in use (usually just kernel ports)
- * at unload; we do for portcnt, not cfgports, because cfgports
- * could have changed while we were loaded.
- */
- for (port = 0; port < dd->ipath_portcnt; port++) {
- struct ipath_portdata *pd = dd->ipath_pd[port];
- dd->ipath_pd[port] = NULL;
- ipath_free_pddata(dd, pd);
- }
- kfree(dd->ipath_pd);
- /*
- * debuggability, in case some cleanup path tries to use it
- * after this
- */
- dd->ipath_pd = NULL;
-}
-
static void __exit infinipath_cleanup(void)
{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- ipath_diagpkt_remove();
-
ipath_exit_ipathfs();
- ipath_driver_remove_group(&ipath_driver.driver);
-
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- /*
- * turn off rcv, send, and interrupts for all ports, all drivers
- * should also hard reset the chip here?
- * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
- * for all versions of the driver, if they were allocated
- */
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- if (dd->ipath_kregbase)
- cleanup_device(dd);
-
- if (dd->pcidev) {
- if (dd->pcidev->irq) {
- ipath_cdbg(VERBOSE,
- "unit %u free_irq of irq %x\n",
- dd->ipath_unit, dd->pcidev->irq);
- free_irq(dd->pcidev->irq, dd);
- } else
- ipath_dbg("irq is 0, not doing free_irq "
- "for unit %u\n", dd->ipath_unit);
-
- /*
- * we check for NULL here, because it's outside
- * the kregbase check, and we need to call it
- * after the free_irq. Thus it's possible that
- * the function pointers were never initialized.
- */
- if (dd->ipath_f_cleanup)
- /* clean up chip-specific stuff */
- dd->ipath_f_cleanup(dd);
-
- dd->pcidev = NULL;
- }
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
pci_unregister_driver(&ipath_driver);
@@ -2070,12 +2550,23 @@ int ipath_reset_device(int unit)
{
int ret, i;
struct ipath_devdata *dd = ipath_lookup(unit);
+ unsigned long flags;
if (!dd) {
ret = -ENODEV;
goto bail;
}
+ if (atomic_read(&dd->ipath_led_override_timer_active)) {
+ /* Need to stop LED timer, _then_ shut off LEDs */
+ del_timer_sync(&dd->ipath_led_override_timer);
+ atomic_set(&dd->ipath_led_override_timer_active, 0);
+ }
+
+ /* Shut off LEDs after we are sure timer is not running */
+ dd->ipath_led_override = LED_OVER_BOTH_OFF;
+ dd->ipath_f_setextled(dd, 0, 0);
+
dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
@@ -2085,26 +2576,34 @@ int ipath_reset_device(int unit)
goto bail;
}
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
if (dd->ipath_pd)
for (i = 1; i < dd->ipath_cfgports; i++) {
- if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
- ipath_dbg("unit %u port %d is in use "
- "(PID %u cmd %s), can't reset\n",
- unit, i,
- dd->ipath_pd[i]->port_pid,
- dd->ipath_pd[i]->port_comm);
- ret = -EBUSY;
- goto bail;
- }
+ if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
+ continue;
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+ ipath_dbg("unit %u port %d is in use "
+ "(PID %u cmd %s), can't reset\n",
+ unit, i,
+ pid_nr(dd->ipath_pd[i]->port_pid),
+ dd->ipath_pd[i]->port_comm);
+ ret = -EBUSY;
+ goto bail;
}
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ teardown_sdma(dd);
dd->ipath_flags &= ~IPATH_INITTED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
ret = dd->ipath_f_reset(dd);
- if (ret != 1)
- ipath_dbg("reset was not successful\n");
- ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
- unit);
- ret = ipath_init_chip(dd, 1);
+ if (ret == 1) {
+ ipath_dbg("Reinitializing unit %u after reset attempt\n",
+ unit);
+ ret = ipath_init_chip(dd, 1);
+ } else
+ ret = -EAGAIN;
if (ret)
ipath_dev_err(dd, "Reinitialize unit %u after "
"reset failed with %d\n", unit, ret);
@@ -2116,13 +2615,127 @@ bail:
return ret;
}
+/*
+ * send a signal to all the processes that have the driver open
+ * through the normal interfaces (i.e., everything other than diags
+ * interface). Returns number of signalled processes.
+ */
+static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
+{
+ int i, sub, any = 0;
+ struct pid *pid;
+ unsigned long flags;
+
+ if (!dd->ipath_pd)
+ return 0;
+
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
+ continue;
+ pid = dd->ipath_pd[i]->port_pid;
+ if (!pid)
+ continue;
+
+ dev_info(&dd->pcidev->dev, "context %d in use "
+ "(PID %u), sending signal %d\n",
+ i, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
+ any++;
+ for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
+ pid = dd->ipath_pd[i]->port_subpid[sub];
+ if (!pid)
+ continue;
+ dev_info(&dd->pcidev->dev, "sub-context "
+ "%d:%d in use (PID %u), sending "
+ "signal %d\n", i, sub, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
+ any++;
+ }
+ }
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+ return any;
+}
+
+static void ipath_hol_signal_down(struct ipath_devdata *dd)
+{
+ if (ipath_signal_procs(dd, SIGSTOP))
+ ipath_dbg("Stopped some processes\n");
+ ipath_cancel_sends(dd, 1);
+}
+
+
+static void ipath_hol_signal_up(struct ipath_devdata *dd)
+{
+ if (ipath_signal_procs(dd, SIGCONT))
+ ipath_dbg("Continued some processes\n");
+}
+
+/*
+ * link is down, stop any users processes, and flush pending sends
+ * to prevent HoL blocking, then start the HoL timer that
+ * periodically continues, then stop procs, so they can detect
+ * link down if they want, and do something about it.
+ * Timer may already be running, so use mod_timer, not add_timer.
+ */
+void ipath_hol_down(struct ipath_devdata *dd)
+{
+ dd->ipath_hol_state = IPATH_HOL_DOWN;
+ ipath_hol_signal_down(dd);
+ dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+ dd->ipath_hol_timer.expires = jiffies +
+ msecs_to_jiffies(ipath_hol_timeout_ms);
+ mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+}
+
+/*
+ * link is up, continue any user processes, and ensure timer
+ * is a nop, if running. Let timer keep running, if set; it
+ * will nop when it sees the link is up
+ */
+void ipath_hol_up(struct ipath_devdata *dd)
+{
+ ipath_hol_signal_up(dd);
+ dd->ipath_hol_state = IPATH_HOL_UP;
+}
+
+/*
+ * toggle the running/not running state of user proceses
+ * to prevent HoL blocking on chip resources, but still allow
+ * user processes to do link down special case handling.
+ * Should only be called via the timer
+ */
+void ipath_hol_event(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+ if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
+ && dd->ipath_hol_state != IPATH_HOL_UP) {
+ dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+ ipath_dbg("Stopping processes\n");
+ ipath_hol_signal_down(dd);
+ } else { /* may do "extra" if also in ipath_hol_up() */
+ dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
+ ipath_dbg("Continuing processes\n");
+ ipath_hol_signal_up(dd);
+ }
+ if (dd->ipath_hol_state == IPATH_HOL_UP)
+ ipath_dbg("link's up, don't resched timer\n");
+ else {
+ dd->ipath_hol_timer.expires = jiffies +
+ msecs_to_jiffies(ipath_hol_timeout_ms);
+ mod_timer(&dd->ipath_hol_timer,
+ dd->ipath_hol_timer.expires);
+ }
+}
+
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
{
u64 val;
- if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+
+ if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
return -1;
- }
- if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+ if (dd->ipath_rx_pol_inv != new_pol_inv) {
dd->ipath_rx_pol_inv = new_pol_inv;
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
@@ -2133,5 +2746,34 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
}
return 0;
}
+
+/*
+ * Disable and enable the armlaunch error. Used for PIO bandwidth testing on
+ * the 7220, which is count-based, rather than trigger-based. Safe for the
+ * driver check, since it's at init. Not completely safe when used for
+ * user-mode checking, since some error checking can be lost, but not
+ * particularly risky, and only has problematic side-effects in the face of
+ * very buggy user code. There is no reference counting, but that's also
+ * fine, given the intended use.
+ */
+void ipath_enable_armlaunch(struct ipath_devdata *dd)
+{
+ dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+ INFINIPATH_E_SPIOARMLAUNCH);
+ dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+}
+
+void ipath_disable_armlaunch(struct ipath_devdata *dd)
+{
+ /* so don't re-enable if already set */
+ dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
+ dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+}
+
module_init(infinipath_init);
module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 3313356ab93..fc7181985e8 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -62,6 +62,33 @@
* accessing eeprom contents from within the kernel, only via sysfs.
*/
+/* Added functionality for IBA7220-based cards */
+#define IPATH_EEPROM_DEV_V1 0xA0
+#define IPATH_EEPROM_DEV_V2 0xA2
+#define IPATH_TEMP_DEV 0x98
+#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
+#define IPATH_NO_DEV (0xFF)
+
+/*
+ * The number of I2C chains is proliferating. Table below brings
+ * some order to the madness. The basic principle is that the
+ * table is scanned from the top, and a "probe" is made to the
+ * device probe_dev. If that succeeds, the chain is considered
+ * to be of that type, and dd->i2c_chain_type is set to the index+1
+ * of the entry.
+ * The +1 is so static initialization can mean "unknown, do probe."
+ */
+static struct i2c_chain_desc {
+ u8 probe_dev; /* If seen at probe, chain is this type */
+ u8 eeprom_dev; /* Dev addr (if any) for EEPROM */
+ u8 temp_dev; /* Dev Addr (if any) for Temp-sense */
+} i2c_chains[] = {
+ { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
+ { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
+ { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
+ { IPATH_NO_DEV }
+};
+
enum i2c_type {
i2c_line_scl = 0,
i2c_line_sda
@@ -75,13 +102,6 @@ enum i2c_state {
#define READ_CMD 1
#define WRITE_CMD 0
-static int eeprom_init;
-
-/*
- * The gpioval manipulation really should be protected by spinlocks
- * or be converted to use atomic operations.
- */
-
/**
* i2c_gpio_set - set a GPIO line
* @dd: the infinipath device
@@ -95,39 +115,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
enum i2c_type line,
enum i2c_state new_line_state)
{
- u64 read_val, write_val, mask, *gpioval;
+ u64 out_mask, dir_mask, *gpioval;
+ unsigned long flags = 0;
gpioval = &dd->ipath_gpio_out;
- read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
- if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
- else
- mask = ipath_gpio_sda;
- if (new_line_state == i2c_line_high)
+ if (line == i2c_line_scl) {
+ dir_mask = dd->ipath_gpio_scl;
+ out_mask = (1UL << dd->ipath_gpio_scl_num);
+ } else {
+ dir_mask = dd->ipath_gpio_sda;
+ out_mask = (1UL << dd->ipath_gpio_sda_num);
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ if (new_line_state == i2c_line_high) {
/* tri-state the output rather than force high */
- write_val = read_val & ~mask;
- else
+ dd->ipath_extctrl &= ~dir_mask;
+ } else {
/* config line to be an output */
- write_val = read_val | mask;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+ dd->ipath_extctrl |= dir_mask;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
- /* set high and verify */
+ /* set output as well (no real verify) */
if (new_line_state == i2c_line_high)
- write_val = 0x1UL;
+ *gpioval |= out_mask;
else
- write_val = 0x0UL;
+ *gpioval &= ~out_mask;
- if (line == i2c_line_scl) {
- write_val <<= ipath_gpio_scl_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
- *gpioval |= write_val;
- } else {
- write_val <<= ipath_gpio_sda_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
- *gpioval |= write_val;
- }
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
return 0;
}
@@ -145,8 +163,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
enum i2c_type line,
enum i2c_state *curr_statep)
{
- u64 read_val, write_val, mask;
+ u64 read_val, mask;
int ret;
+ unsigned long flags = 0;
/* check args */
if (curr_statep == NULL) {
@@ -154,15 +173,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
goto bail;
}
- read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
- write_val = read_val & ~mask;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+ mask = dd->ipath_gpio_sda;
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ dd->ipath_extctrl &= ~mask;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
+ /*
+ * Below is very unlikely to reflect true input state if Output
+ * Enable actually changed.
+ */
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
if (read_val & mask)
*curr_statep = i2c_line_high;
@@ -187,10 +212,12 @@ bail:
static void i2c_wait_for_writes(struct ipath_devdata *dd)
{
(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ rmb();
}
static void scl_out(struct ipath_devdata *dd, u8 bit)
{
+ udelay(1);
i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
i2c_wait_for_writes(dd);
@@ -234,6 +261,27 @@ static int i2c_ackrcv(struct ipath_devdata *dd)
}
/**
+ * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
+ * @dd: the infinipath device
+ *
+ * Returns byte shifted out of device
+ */
+static int rd_byte(struct ipath_devdata *dd)
+{
+ int bit_cntr, data;
+
+ data = 0;
+
+ for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
+ data <<= 1;
+ scl_out(dd, i2c_line_high);
+ data |= sda_in(dd, 0);
+ scl_out(dd, i2c_line_low);
+ }
+ return data;
+}
+
+/**
* wr_byte - write a byte, one bit at a time
* @dd: the infinipath device
* @data: the byte to write
@@ -313,9 +361,14 @@ static int eeprom_reset(struct ipath_devdata *dd)
int clock_cycles_left = 9;
u64 *gpioval = &dd->ipath_gpio_out;
int ret;
+ unsigned long flags;
- eeprom_init = 1;
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ /* Make sure shadows are consistent */
+ dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
"is %llx\n", (unsigned long long) *gpioval);
@@ -327,12 +380,17 @@ static int eeprom_reset(struct ipath_devdata *dd)
scl_out(dd, i2c_line_low);
sda_out(dd, i2c_line_high);
+ /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
while (clock_cycles_left--) {
scl_out(dd, i2c_line_high);
+ /* SDA seen high, issue START by dropping it while SCL high */
if (sda_in(dd, 0)) {
sda_out(dd, i2c_line_low);
scl_out(dd, i2c_line_low);
+ /* ATMEL spec says must be followed by STOP. */
+ scl_out(dd, i2c_line_high);
+ sda_out(dd, i2c_line_high);
ret = 0;
goto bail;
}
@@ -346,29 +404,121 @@ bail:
return ret;
}
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
+/*
+ * Probe for I2C device at specified address. Returns 0 for "success"
+ * to match rest of this file.
+ * Leave bus in "reasonable" state for further commands.
*/
+static int i2c_probe(struct ipath_devdata *dd, int devaddr)
+{
+ int ret = 0;
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
- void *buffer, int len)
+ ret = eeprom_reset(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
+ devaddr);
+ return ret;
+ }
+ /*
+ * Reset no longer leaves bus in start condition, so normal
+ * i2c_startcmd() will do.
+ */
+ ret = i2c_startcmd(dd, devaddr | READ_CMD);
+ if (ret)
+ ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
+ devaddr);
+ else {
+ /*
+ * Device did respond. Complete a single-byte read, because some
+ * devices apparently cannot handle STOP immediately after they
+ * ACK the start-cmd.
+ */
+ int data;
+ data = rd_byte(dd);
+ stop_cmd(dd);
+ ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
+ }
+ return ret;
+}
+
+/*
+ * Returns the "i2c type". This is a pointer to a struct that describes
+ * the I2C chain on this board. To minimize impact on struct ipath_devdata,
+ * the (small integer) index into the table is actually memoized, rather
+ * then the pointer.
+ * Memoization is because the type is determined on the first call per chip.
+ * An alternative would be to move type determination to early
+ * init code.
+ */
+static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
{
- /* compiler complains unless initialized */
- u8 single_byte = 0;
- int bit_cntr;
- int ret;
+ int idx;
+
+ /* Get memoized index, from previous successful probes */
+ idx = dd->ipath_i2c_chain_type - 1;
+ if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
+ goto done;
+
+ idx = 0;
+ while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
+ /* if probe succeeds, this is type */
+ if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
+ break;
+ ++idx;
+ }
- if (!eeprom_init)
+ /*
+ * Old EEPROM (first entry) may require a reset after probe,
+ * rather than being able to "start" after "stop"
+ */
+ if (idx == 0)
eeprom_reset(dd);
- eeprom_offset = (eeprom_offset << 1) | READ_CMD;
+ if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
+ idx = -1;
+ else
+ dd->ipath_i2c_chain_type = idx + 1;
+done:
+ return (idx >= 0) ? i2c_chains + idx : NULL;
+}
- if (i2c_startcmd(dd, eeprom_offset)) {
- ipath_dbg("Failed startcmd\n");
+static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
+ u8 eeprom_offset, void *buffer, int len)
+{
+ int ret;
+ struct i2c_chain_desc *icd;
+ u8 *bp = buffer;
+
+ ret = 1;
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ if (icd->eeprom_dev == IPATH_NO_DEV) {
+ /* legacy not-really-I2C */
+ ipath_cdbg(VERBOSE, "Start command only address\n");
+ eeprom_offset = (eeprom_offset << 1) | READ_CMD;
+ ret = i2c_startcmd(dd, eeprom_offset);
+ } else {
+ /* Actual I2C */
+ ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
+ if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
+ ipath_dbg("Failed EEPROM startcmd\n");
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+ ret = wr_byte(dd, eeprom_offset);
+ stop_cmd(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Failed to write EEPROM address\n");
+ ret = 1;
+ goto bail;
+ }
+ ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
+ }
+ if (ret) {
+ ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
stop_cmd(dd);
ret = 1;
goto bail;
@@ -379,22 +529,11 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
* incrementing the address.
*/
while (len-- > 0) {
- /* get data */
- single_byte = 0;
- for (bit_cntr = 8; bit_cntr; bit_cntr--) {
- u8 bit;
- scl_out(dd, i2c_line_high);
- bit = sda_in(dd, 0);
- single_byte |= bit << (bit_cntr - 1);
- scl_out(dd, i2c_line_low);
- }
-
+ /* get and store data */
+ *bp++ = rd_byte(dd);
/* send ack if not the last byte */
if (len)
send_ack(dd);
-
- *((u8 *) buffer) = single_byte;
- buffer++;
}
stop_cmd(dd);
@@ -405,30 +544,40 @@ bail:
return ret;
}
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
- const void *buffer, int len)
+static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buffer, int len)
{
- u8 single_byte;
int sub_len;
const u8 *bp = buffer;
int max_wait_time, i;
int ret;
+ struct i2c_chain_desc *icd;
- if (!eeprom_init)
- eeprom_reset(dd);
+ ret = 1;
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
while (len > 0) {
- if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) {
- ipath_dbg("Failed to start cmd offset %u\n",
- eeprom_offset);
- goto failed_write;
+ if (icd->eeprom_dev == IPATH_NO_DEV) {
+ if (i2c_startcmd(dd,
+ (eeprom_offset << 1) | WRITE_CMD)) {
+ ipath_dbg("Failed to start cmd offset %u\n",
+ eeprom_offset);
+ goto failed_write;
+ }
+ } else {
+ /* Real I2C */
+ if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
+ ipath_dbg("Failed EEPROM startcmd\n");
+ goto failed_write;
+ }
+ ret = wr_byte(dd, eeprom_offset);
+ if (ret) {
+ ipath_dev_err(dd, "Failed to write EEPROM "
+ "address\n");
+ goto failed_write;
+ }
}
sub_len = min(len, 4);
@@ -454,9 +603,11 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
* the writes have completed. We do this inline to avoid
* the debug prints that are in the real read routine
* if the startcmd fails.
+ * We also use the proper device address, so it doesn't matter
+ * whether we have real eeprom_dev. legacy likes any address.
*/
max_wait_time = 100;
- while (i2c_startcmd(dd, READ_CMD)) {
+ while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
stop_cmd(dd);
if (!--max_wait_time) {
ipath_dbg("Did not get successful read to "
@@ -464,15 +615,8 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
goto failed_write;
}
}
- /* now read the zero byte */
- for (i = single_byte = 0; i < 8; i++) {
- u8 bit;
- scl_out(dd, i2c_line_high);
- bit = sda_in(dd, 0);
- scl_out(dd, i2c_line_low);
- single_byte <<= 1;
- single_byte |= bit;
- }
+ /* now read (and ignore) the resulting byte */
+ rd_byte(dd);
stop_cmd(dd);
}
@@ -487,12 +631,62 @@ bail:
return ret;
}
+/**
+ * ipath_eeprom_read - receives bytes from the eeprom via I2C
+ * @dd: the infinipath device
+ * @eeprom_offset: address to read from
+ * @buffer: where to store result
+ * @len: number of bytes to receive
+ */
+int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
+ void *buff, int len)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ return ret;
+}
+
+/**
+ * ipath_eeprom_write - writes data to the eeprom via I2C
+ * @dd: the infinipath device
+ * @eeprom_offset: where to place data
+ * @buffer: data to write
+ * @len: number of bytes to write
+ */
+int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buff, int len)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ return ret;
+}
+
static u8 flash_csum(struct ipath_flash *ifp, int adjust)
{
u8 *ip = (u8 *) ifp;
u8 csum = 0, len;
- for (len = 0; len < ifp->if_length; len++)
+ /*
+ * Limit length checksummed to max length of actual data.
+ * Checksum of erased eeprom will still be bad, but we avoid
+ * reading past the end of the buffer we were passed.
+ */
+ len = ifp->if_length;
+ if (len > sizeof(struct ipath_flash))
+ len = sizeof(struct ipath_flash);
+ while (len--)
csum += *ip++;
csum -= ifp->if_csum;
csum = ~csum;
@@ -514,13 +708,13 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
void *buf;
struct ipath_flash *ifp;
__be64 guid;
- int len;
+ int len, eep_stat;
u8 csum, *bguid;
int t = dd->ipath_unit;
struct ipath_devdata *dd0 = ipath_lookup(0);
if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
- u8 *bguid, oguid;
+ u8 oguid;
dd->ipath_guid = dd0->ipath_guid;
bguid = (u8 *) & dd->ipath_guid;
@@ -550,7 +744,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
goto bail;
}
- len = offsetof(struct ipath_flash, if_future);
+ /*
+ * read full flash, not just currently used part, since it may have
+ * been written with a newer definition
+ * */
+ len = sizeof(struct ipath_flash);
buf = vmalloc(len);
if (!buf) {
ipath_dev_err(dd, "Couldn't allocate memory to read %u "
@@ -558,7 +756,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
goto bail;
}
- if (ipath_eeprom_read(dd, 0, buf, len)) {
+ mutex_lock(&dd->ipath_eep_lock);
+ eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
+ mutex_unlock(&dd->ipath_eep_lock);
+
+ if (eep_stat) {
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
goto done;
}
@@ -570,8 +772,8 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
"0x%x, not 0x%x\n", csum, ifp->if_csum);
goto done;
}
- if (*(__be64 *) ifp->if_guid == 0ULL ||
- *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) {
+ if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
+ *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
ipath_dev_err(dd, "Invalid GUID %llx from flash; "
"ignoring\n",
*(unsigned long long *) ifp->if_guid);
@@ -612,7 +814,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
* elsewhere for backward-compatibility.
*/
char *snp = dd->ipath_serial;
- int len;
memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
snp[sizeof ifp->if_sprefix] = '\0';
len = strlen(snp);
@@ -625,12 +826,358 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
} else
memcpy(dd->ipath_serial, ifp->if_serial,
sizeof ifp->if_serial);
+ if (!strstr(ifp->if_comment, "Tested successfully"))
+ ipath_dev_err(dd, "Board SN %s did not pass functional "
+ "test: %s\n", dd->ipath_serial,
+ ifp->if_comment);
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
+ memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
+ /*
+ * Power-on (actually "active") hours are kept as little-endian value
+ * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+ * atomic_t while running.
+ */
+ atomic_set(&dd->ipath_active_time, 0);
+ dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
done:
vfree(buf);
bail:;
}
+
+/**
+ * ipath_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the infinipath device
+ *
+ * Although the time is kept as seconds in the ipath_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct ipath_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+
+int ipath_update_eeprom_log(struct ipath_devdata *dd)
+{
+ void *buf;
+ struct ipath_flash *ifp;
+ int len, hi_water;
+ uint32_t new_time, new_hrs;
+ u8 csum;
+ int ret, idx;
+ unsigned long flags;
+
+ /* first, check if we actually need to do anything. */
+ ret = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ if (dd->ipath_eep_st_new_errs[idx]) {
+ ret = 1;
+ break;
+ }
+ }
+ new_time = atomic_read(&dd->ipath_active_time);
+
+ if (ret == 0 && new_time < 3600)
+ return 0;
+
+ /*
+ * The quick-check above determined that there is something worthy
+ * of logging, so get current contents and do a more detailed idea.
+ * read full flash, not just currently used part, since it may have
+ * been written with a newer definition
+ */
+ len = sizeof(struct ipath_flash);
+ buf = vmalloc(len);
+ ret = 1;
+ if (!buf) {
+ ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+ "bytes from eeprom for logging\n", len);
+ goto bail;
+ }
+
+ /* Grab semaphore and read current EEPROM. If we get an
+ * error, let go, but if not, keep it until we finish write.
+ */
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (ret) {
+ ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+ goto free_bail;
+ }
+ ret = ipath_eeprom_internal_read(dd, 0, buf, len);
+ if (ret) {
+ mutex_unlock(&dd->ipath_eep_lock);
+ ipath_dev_err(dd, "Unable read EEPROM for logging\n");
+ goto free_bail;
+ }
+ ifp = (struct ipath_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ mutex_unlock(&dd->ipath_eep_lock);
+ ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+ csum, ifp->if_csum);
+ ret = 1;
+ goto free_bail;
+ }
+ hi_water = 0;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ int new_val = dd->ipath_eep_st_new_errs[idx];
+ if (new_val) {
+ /*
+ * If we have seen any errors, add to EEPROM values
+ * We need to saturate at 0xFF (255) and we also
+ * would need to adjust the checksum if we were
+ * trying to minimize EEPROM traffic
+ * Note that we add to actual current count in EEPROM,
+ * in case it was altered while we were running.
+ */
+ new_val += ifp->if_errcntp[idx];
+ if (new_val > 0xFF)
+ new_val = 0xFF;
+ if (ifp->if_errcntp[idx] != new_val) {
+ ifp->if_errcntp[idx] = new_val;
+ hi_water = offsetof(struct ipath_flash,
+ if_errcntp) + idx;
+ }
+ /*
+ * update our shadow (used to minimize EEPROM
+ * traffic), to match what we are about to write.
+ */
+ dd->ipath_eep_st_errs[idx] = new_val;
+ dd->ipath_eep_st_new_errs[idx] = 0;
+ }
+ }
+ /*
+ * now update active-time. We would like to round to the nearest hour
+ * but unless atomic_t are sure to be proper signed ints we cannot,
+ * because we need to account for what we "transfer" to EEPROM and
+ * if we log an hour at 31 minutes, then we would need to set
+ * active_time to -29 to accurately count the _next_ hour.
+ */
+ if (new_time >= 3600) {
+ new_hrs = new_time / 3600;
+ atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
+ new_hrs += dd->ipath_eep_hrs;
+ if (new_hrs > 0xFFFF)
+ new_hrs = 0xFFFF;
+ dd->ipath_eep_hrs = new_hrs;
+ if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+ ifp->if_powerhour[0] = new_hrs & 0xFF;
+ hi_water = offsetof(struct ipath_flash, if_powerhour);
+ }
+ if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+ ifp->if_powerhour[1] = new_hrs >> 8;
+ hi_water = offsetof(struct ipath_flash, if_powerhour)
+ + 1;
+ }
+ }
+ /*
+ * There is a tiny possibility that we could somehow fail to write
+ * the EEPROM after updating our shadows, but problems from holding
+ * the spinlock too long are a much bigger issue.
+ */
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ if (hi_water) {
+ /* we made some change to the data, uopdate cksum and write */
+ csum = flash_csum(ifp, 1);
+ ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
+ }
+ mutex_unlock(&dd->ipath_eep_lock);
+ if (ret)
+ ipath_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+ vfree(buf);
+bail:
+ return ret;
+
+}
+
+/**
+ * ipath_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the infinipath device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
+{
+ uint new_val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
+ if (new_val > 255)
+ new_val = 255;
+ dd->ipath_eep_st_new_errs[eidx] = new_val;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ return;
+}
+
+static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
+{
+ int ret;
+ struct i2c_chain_desc *icd;
+
+ ret = -ENOENT;
+
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ if (icd->temp_dev == IPATH_NO_DEV) {
+ /* tempsense only exists on new, real-I2C boards */
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
+ ipath_dbg("Failed tempsense startcmd\n");
+ stop_cmd(dd);
+ ret = -ENXIO;
+ goto bail;
+ }
+ ret = wr_byte(dd, regnum);
+ stop_cmd(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
+ regnum);
+ ret = -ENXIO;
+ goto bail;
+ }
+ if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
+ ipath_dbg("Failed tempsense RD startcmd\n");
+ stop_cmd(dd);
+ ret = -ENXIO;
+ goto bail;
+ }
+ /*
+ * We can only clock out one byte per command, sensibly
+ */
+ ret = rd_byte(dd);
+ stop_cmd(dd);
+
+bail:
+ return ret;
+}
+
+#define VALID_TS_RD_REG_MASK 0xBF
+
+/**
+ * ipath_tempsense_read - read register of temp sensor via I2C
+ * @dd: the infinipath device
+ * @regnum: register to read from
+ *
+ * returns reg contents (0..255) or < 0 for error
+ */
+int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
+{
+ int ret;
+
+ if (regnum > 7)
+ return -EINVAL;
+
+ /* return a bogus value for (the one) register we do not have */
+ if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
+ return 0;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_tempsense_internal_read(dd, regnum);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ /*
+ * There are three possibilities here:
+ * ret is actual value (0..255)
+ * ret is -ENXIO or -EINVAL from code in this file
+ * ret is -EINTR from mutex_lock_interruptible.
+ */
+ return ret;
+}
+
+static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
+ u8 regnum, u8 data)
+{
+ int ret = -ENOENT;
+ struct i2c_chain_desc *icd;
+
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ if (icd->temp_dev == IPATH_NO_DEV) {
+ /* tempsense only exists on new, real-I2C boards */
+ ret = -ENXIO;
+ goto bail;
+ }
+ if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
+ ipath_dbg("Failed tempsense startcmd\n");
+ stop_cmd(dd);
+ ret = -ENXIO;
+ goto bail;
+ }
+ ret = wr_byte(dd, regnum);
+ if (ret) {
+ stop_cmd(dd);
+ ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
+ regnum);
+ ret = -ENXIO;
+ goto bail;
+ }
+ ret = wr_byte(dd, data);
+ stop_cmd(dd);
+ ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
+ if (ret) {
+ ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
+ regnum);
+ ret = -ENXIO;
+ }
+
+bail:
+ return ret;
+}
+
+#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
+
+/**
+ * ipath_tempsense_write - write register of temp sensor via I2C
+ * @dd: the infinipath device
+ * @regnum: register to write
+ * @data: data to write
+ *
+ * returns 0 for success or < 0 for error
+ */
+int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
+{
+ int ret;
+
+ if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_tempsense_internal_write(dd, regnum, data);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ /*
+ * There are three possibilities here:
+ * ret is 0 for success
+ * ret is -ENXIO or -EINVAL from code in this file
+ * ret is -EINTR from mutex_lock_interruptible.
+ */
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 29930e22318..6d7f453b4d0 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -35,40 +35,87 @@
#include <linux/poll.h>
#include <linux/cdev.h>
#include <linux/swap.h>
+#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/aio.h>
+#include <linux/jiffies.h>
+#include <linux/cpu.h>
#include <asm/pgtable.h>
#include "ipath_kernel.h"
#include "ipath_common.h"
+#include "ipath_user_sdma.h"
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
loff_t *);
+static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
+ unsigned long , loff_t);
static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
static int ipath_mmap(struct file *, struct vm_area_struct *);
-static struct file_operations ipath_file_ops = {
+static const struct file_operations ipath_file_ops = {
.owner = THIS_MODULE,
.write = ipath_write,
+ .aio_write = ipath_writev,
.open = ipath_open,
.release = ipath_close,
.poll = ipath_poll,
- .mmap = ipath_mmap
+ .mmap = ipath_mmap,
+ .llseek = noop_llseek,
};
-static int ipath_get_base_info(struct ipath_portdata *pd,
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+ struct page *page;
+ u64 paddr = 0;
+
+ page = vmalloc_to_page(p);
+ if (page)
+ paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+ return paddr;
+}
+
+static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
+ struct ipath_portdata *pd = port_fp(fp);
int ret = 0;
struct ipath_base_info *kinfo = NULL;
struct ipath_devdata *dd = pd->port_dd;
+ unsigned subport_cnt;
+ int shared, master;
+ size_t sz;
+
+ subport_cnt = pd->port_subport_cnt;
+ if (!subport_cnt) {
+ shared = 0;
+ master = 0;
+ subport_cnt = 1;
+ } else {
+ shared = 1;
+ master = !subport_fp(fp);
+ }
- if (ubase_size < sizeof(*kinfo)) {
+ sz = sizeof(*kinfo);
+ /* If port sharing is not requested, allow the old size structure */
+ if (!shared)
+ sz -= 7 * sizeof(u64);
+ if (ubase_size < sz) {
ipath_cdbg(PROC,
- "Base size %lu, need %lu (version mismatch?)\n",
- (unsigned long) ubase_size,
- (unsigned long) sizeof(*kinfo));
+ "Base size %zu, need %zu (version mismatch?)\n",
+ ubase_size, sz);
ret = -EINVAL;
goto bail;
}
@@ -95,7 +142,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
pd->port_rcvegrbuf_chunks;
- kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
+ kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+ if (master)
+ kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
/*
* for this use, may be ipath_cfgports summed over all chips that
* are are configured and present
@@ -118,32 +167,94 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* page_address() macro worked, but in 2.6.11, even that returns the
* full 64 bit address (upper bits all 1's). So far, using the
* physical addresses (or chip offsets, for chip mapping) works, but
- * no doubt some future kernel release will chang that, and we'll be
- * on to yet another method of dealing with this
+ * no doubt some future kernel release will change that, and we'll be
+ * on to yet another method of dealing with this.
*/
kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
- kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
- kinfo->spi_piobufbase = (u64) pd->port_piobufs;
- kinfo->__spi_uregbase =
- dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ if (!shared) {
+ kinfo->spi_piocnt = pd->port_piocnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_ureg_align * pd->port_port;
+ } else if (master) {
+ kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
+ (pd->port_piocnt % subport_cnt);
+ /* Master's PIO buffers are after all the slave's */
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign *
+ (pd->port_piocnt - kinfo->spi_piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign * kinfo->spi_piocnt * slave;
+ }
+
+ if (shared) {
+ kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_ureg_align * pd->port_port;
+ kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+ kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+ kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
+
+ kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport_fp(fp));
+
+ kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport_fp(fp));
+ kinfo->spi_rcvhdr_tailaddr = 0;
+ kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+ pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+ subport_fp(fp));
+
+ kinfo->spi_subport_uregbase =
+ cvt_kvaddr(pd->subport_uregbase);
+ kinfo->spi_subport_rcvegrbuf =
+ cvt_kvaddr(pd->subport_rcvegrbuf);
+ kinfo->spi_subport_rcvhdr_base =
+ cvt_kvaddr(pd->subport_rcvhdr_base);
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
+ }
- kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
- kinfo->spi_piocnt = dd->ipath_pbufsport;
+ /*
+ * All user buffers are 2KB buffers. If we ever support
+ * giving 4KB buffers to user processes, this will need some
+ * work.
+ */
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase -
+ (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
kinfo->spi_pioalign = dd->ipath_palign;
kinfo->spi_qpair = IPATH_KD_QP;
- kinfo->spi_piosize = dd->ipath_ibmaxlen;
+ /*
+ * user mode PIO buffers are always 2KB, even when 4KB can
+ * be received, and sent via the kernel; this is ibmaxlen
+ * for 2K MTU.
+ */
+ kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
kinfo->spi_port = pd->port_port;
+ kinfo->spi_subport = subport_fp(fp);
kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
kinfo->spi_hw_version = dd->ipath_revision;
- if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
+ if (master) {
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+ }
+
+ sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+ if (copy_to_user(ubase, kinfo, sz))
ret = -EFAULT;
bail:
@@ -154,6 +265,7 @@ bail:
/**
* ipath_tid_update - update a port TID
* @pd: the port
+ * @fp: the ipath device file
* @ti: the TID information
*
* The new implementation as of Oct 2004 is that the driver assigns
@@ -176,11 +288,11 @@ bail:
* virtually contiguous pages, that should change to improve
* performance.
*/
-static int ipath_tid_update(struct ipath_portdata *pd,
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
const struct ipath_tid_info *ti)
{
int ret = 0, ntids;
- u32 tid, porttid, cnt, i, tidcnt;
+ u32 tid, porttid, cnt, i, tidcnt, tidoff;
u16 *tidlist;
struct ipath_devdata *dd = pd->port_dd;
u64 physaddr;
@@ -188,6 +300,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
u64 __iomem *tidbase;
unsigned long tidmap[8];
struct page **pagep = NULL;
+ unsigned subport = subport_fp(fp);
if (!dd->ipath_pageshadow) {
ret = -ENOMEM;
@@ -204,20 +317,34 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -EFAULT;
goto done;
}
- tidcnt = dd->ipath_rcvtidcnt;
- if (cnt >= tidcnt) {
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt) {
+ tidcnt = dd->ipath_rcvtidcnt;
+ tid = pd->port_tidcursor;
+ tidoff = 0;
+ } else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ tidoff = dd->ipath_rcvtidcnt - tidcnt;
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ tidoff = tidcnt * (subport - 1);
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ }
+ if (cnt > tidcnt) {
/* make sure it all fits in port_tid_pg_list */
dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
"TIDs, only trying max (%u)\n", cnt, tidcnt);
cnt = tidcnt;
}
- pagep = (struct page **)pd->port_tid_pg_list;
- tidlist = (u16 *) (&pagep[cnt]);
+ pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+ tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
memset(tidmap, 0, sizeof(tidmap));
- tid = pd->port_tidcursor;
/* before decrement; chip actual # */
- porttid = pd->port_port * tidcnt;
ntids = tidcnt;
tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
@@ -274,22 +401,26 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -ENOMEM;
break;
}
- tidlist[i] = tid;
+ tidlist[i] = tid + tidoff;
ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
- "vaddr %lx\n", i, tid, vaddr);
+ "vaddr %lx\n", i, tid + tidoff, vaddr);
/* we "know" system pages and TID pages are same size */
dd->ipath_pageshadow[porttid + tid] = pagep[i];
+ dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+ dd->pcidev, pagep[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
/*
* don't need atomic or it's overhead
*/
__set_bit(tid, tidmap);
- physaddr = page_to_phys(pagep[i]);
+ physaddr = dd->ipath_physshadow[porttid + tid];
ipath_stats.sps_pagelocks++;
ipath_cdbg(VERBOSE,
"TID %u, vaddr %lx, physaddr %llx pgp %p\n",
tid, vaddr, (unsigned long long) physaddr,
pagep[i]);
- dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr);
+ dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
+ physaddr);
/*
* don't check this tid in ipath_portshadow, since we
* just filled it in; start with the next one.
@@ -315,8 +446,12 @@ static int ipath_tid_update(struct ipath_portdata *pd,
if (dd->ipath_pageshadow[porttid + tid]) {
ipath_cdbg(VERBOSE, "Freeing TID %u\n",
tid);
- dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+ dd->ipath_f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_stats.sps_pageunlocks++;
}
@@ -341,7 +476,10 @@ static int ipath_tid_update(struct ipath_portdata *pd,
}
if (tid == tidcnt)
tid = 0;
- pd->port_tidcursor = tid;
+ if (!pd->port_subport_cnt)
+ pd->port_tidcursor = tid;
+ else
+ tidcursor_fp(fp) = tid;
}
done:
@@ -354,6 +492,7 @@ done:
/**
* ipath_tid_free - free a port TID
* @pd: the port
+ * @subport: the subport
* @ti: the TID info
*
* right now we are unlocking one page at a time, but since
@@ -367,7 +506,7 @@ done:
* they pass in to us.
*/
-static int ipath_tid_free(struct ipath_portdata *pd,
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
const struct ipath_tid_info *ti)
{
int ret = 0;
@@ -388,11 +527,20 @@ static int ipath_tid_free(struct ipath_portdata *pd,
}
porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt)
+ tidcnt = dd->ipath_rcvtidcnt;
+ else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ porttid += dd->ipath_rcvtidcnt - tidcnt;
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ porttid += tidcnt * (subport - 1);
+ }
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
porttid * sizeof(*tidbase));
- tidcnt = dd->ipath_rcvtidcnt;
limit = sizeof(tidmap) * BITS_PER_BYTE;
if (limit > tidcnt)
/* just in case size changes in future */
@@ -413,13 +561,18 @@ static int ipath_tid_free(struct ipath_portdata *pd,
continue;
cnt++;
if (dd->ipath_pageshadow[porttid + tid]) {
+ struct page *p;
+ p = dd->ipath_pageshadow[porttid + tid];
+ dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
- pd->port_pid, tid);
- dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+ pid_nr(pd->port_pid), tid);
+ dd->ipath_f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid);
- ipath_release_user_pages(
- &dd->ipath_pageshadow[porttid + tid], 1);
- dd->ipath_pageshadow[porttid + tid] = NULL;
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&p, 1);
ipath_stats.sps_pageunlocks++;
} else
ipath_dbg("Unused tid %u, ignoring\n", tid);
@@ -581,20 +734,23 @@ bail:
/**
* ipath_manage_rcvq - manage a port's receive queue
* @pd: the port
+ * @subport: the subport
* @start_stop: action to carry out
*
* start_stop == 0 disables receive on the port, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+ int start_stop)
{
struct ipath_devdata *dd = pd->port_dd;
- u64 tval;
- ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
+ ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
start_stop ? "en" : "dis", dd->ipath_unit,
- pd->port_port);
+ pd->port_port, subport);
+ if (subport)
+ goto bail;
/* atomically clear receive enable port. */
if (start_stop) {
/*
@@ -609,16 +765,17 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
* updated and correct itself, even in the face of software
* bugs.
*/
- *pd->port_rcvhdrtail_kvaddr = 0;
- set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+ if (pd->port_rcvhdrtail_kvaddr)
+ ipath_clear_rcvhdrtail(pd);
+ set_bit(dd->ipath_r_portenable_shift + pd->port_port,
&dd->ipath_rcvctrl);
} else
- clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+ clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
&dd->ipath_rcvctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
/* now be sure chip saw it before we return */
- tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
if (start_stop) {
/*
* And try to be sure that tail reg update has happened too.
@@ -627,9 +784,10 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
* in memory copy, since we could overwrite an update by the
* chip if we did.
*/
- tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+ ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
}
/* always; new head should be equal to new tail; see above */
+bail:
return 0;
}
@@ -687,6 +845,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
}
}
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned egrperchunk, egrcnt, size;
+
+ /*
+ * to avoid wasting a lot of memory, we allocate 32KB chunks of
+ * physically contiguous memory, advance through it until used up
+ * and then allocate more. Of course, we need memory to store those
+ * extra pointers, now. Started out with 256KB, but under heavy
+ * memory pressure (creating large files and then copying them over
+ * NFS while doing lots of MPI jobs), we hit some allocation
+ * failures, even though we can sleep... (2.6.10) Still get
+ * failures at 64K. 32K is the lowest we can go without wasting
+ * additional memory.
+ */
+ size = 0x8000;
+ egrperchunk = size / dd->ipath_rcvegrbufsize;
+ egrcnt = dd->ipath_rcvegrcnt;
+ pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+ pd->port_rcvegrbufs_perchunk = egrperchunk;
+ pd->port_rcvegrbuf_size = size;
+}
+
/**
* ipath_create_user_egr - allocate eager TID buffers
* @pd: the port to allocate TID buffers for
@@ -702,7 +890,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
static int ipath_create_user_egr(struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
+ unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
int ret;
gfp_t gfp_flags;
@@ -717,36 +905,23 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
egrcnt = dd->ipath_rcvegrcnt;
/* TID number offset for this port */
- egroff = pd->port_port * egrcnt;
+ egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
egrsize = dd->ipath_rcvegrbufsize;
ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
"offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
- /*
- * to avoid wasting a lot of memory, we allocate 32KB chunks of
- * physically contiguous memory, advance through it until used up
- * and then allocate more. Of course, we need memory to store those
- * extra pointers, now. Started out with 256KB, but under heavy
- * memory pressure (creating large files and then copying them over
- * NFS while doing lots of MPI jobs), we hit some allocation
- * failures, even though we can sleep... (2.6.10) Still get
- * failures at 64K. 32K is the lowest we can go without wasting
- * additional memory.
- */
- size = 0x8000;
- alloced = ALIGN(egrsize * egrcnt, size);
- egrperchunk = size / egrsize;
- chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
- pd->port_rcvegrbuf_chunks = chunk;
- pd->port_rcvegrbufs_perchunk = egrperchunk;
- pd->port_rcvegrbuf_size = size;
- pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
+ chunk = pd->port_rcvegrbuf_chunks;
+ egrperchunk = pd->port_rcvegrbufs_perchunk;
+ size = pd->port_rcvegrbuf_size;
+ pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf) {
ret = -ENOMEM;
goto bail;
}
pd->port_rcvegrbuf_phys =
- vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
+ kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf_phys) {
ret = -ENOMEM;
goto bail_rcvegrbuf;
@@ -774,7 +949,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
(u64 __iomem *)
((char __iomem *)
dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0, pa);
+ dd->ipath_rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER, pa);
pa += egrsize;
}
cond_resched(); /* don't hog the cpu */
@@ -791,105 +967,23 @@ bail_rcvegrbuf_phys:
pd->port_rcvegrbuf_phys[e]);
}
- vfree(pd->port_rcvegrbuf_phys);
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
bail_rcvegrbuf:
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
bail:
return ret;
}
-static int ipath_do_user_init(struct ipath_portdata *pd,
- const struct ipath_user_info *uinfo)
-{
- int ret = 0;
- struct ipath_devdata *dd = pd->port_dd;
- u32 head32;
-
- /* for now, if major version is different, bail */
- if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
- dev_info(&dd->pcidev->dev,
- "User major version %d not same as driver "
- "major %d\n", uinfo->spu_userversion >> 16,
- IPATH_USER_SWMAJOR);
- ret = -ENODEV;
- goto done;
- }
-
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
- ipath_dbg("User minor version %d not same as driver "
- "minor %d\n", uinfo->spu_userversion & 0xffff,
- IPATH_USER_SWMINOR);
-
- if (uinfo->spu_rcvhdrsize) {
- ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
- if (ret)
- goto done;
- }
-
- /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
- /* for right now, kernel piobufs are at end, so port 1 is at 0 */
- pd->port_piobufs = dd->ipath_piobufbase +
- dd->ipath_pbufsport * (pd->port_port -
- 1) * dd->ipath_palign;
- ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
- pd->port_port, pd->port_piobufs);
-
- /*
- * Now allocate the rcvhdr Q and eager TIDs; skip the TID
- * array for time being. If pd->port_port > chip-supported,
- * we need to do extra stuff here to handle by handling overflow
- * through port 0, someday
- */
- ret = ipath_create_rcvhdrq(dd, pd);
- if (!ret)
- ret = ipath_create_user_egr(pd);
- if (ret)
- goto done;
-
- /*
- * set the eager head register for this port to the current values
- * of the tail pointers, since we don't know if they were
- * updated on last use of the port.
- */
- head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
- ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
- dd->ipath_lastegrheads[pd->port_port] = -1;
- dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
- ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
- pd->port_port, head32);
- pd->port_tidcursor = 0; /* start at beginning after open */
- /*
- * now enable the port; the tail registers will be written to memory
- * by the chip as soon as it sees the write to
- * dd->ipath_kregs->kr_rcvctrl. The update only happens on
- * transition from 0 to 1, so clear it first, then set it as part of
- * enabling the port. This will (very briefly) affect any other
- * open ports, but it shouldn't be long enough to be an issue.
- * We explictly set the in-memory copy to 0 beforehand, so we don't
- * have to wait to be sure the DMA update has happened.
- */
- *pd->port_rcvhdrtail_kvaddr = 0ULL;
- set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
- &dd->ipath_rcvctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
-done:
- return ret;
-}
-
/* common code for the mappings on dma_alloc_coherent mem */
static int ipath_mmap_mem(struct vm_area_struct *vma,
- struct ipath_portdata *pd, unsigned len,
- int write_ok, dma_addr_t addr, char *what)
+ struct ipath_portdata *pd, unsigned len, int write_ok,
+ void *kvaddr, char *what)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
+ unsigned long pfn;
int ret;
if ((vma->vm_end - vma->vm_start) > len) {
@@ -912,17 +1006,17 @@ static int ipath_mmap_mem(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
}
+ pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
- dev_info(&dd->pcidev->dev,
- "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
- what, pd->port_port, (unsigned long)addr, len,
- write_ok?'w':'o', ret);
+ dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+ "bytes r%c failed: %d\n", what, pd->port_port,
+ pfn, len, write_ok?'w':'o', ret);
else
- ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
- what, pd->port_port, (unsigned long)addr, len,
- write_ok?'w':'o');
+ ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+ "r%c\n", what, pd->port_port, pfn, len,
+ write_ok?'w':'o');
bail:
return ret;
}
@@ -957,7 +1051,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
static int mmap_piobufs(struct vm_area_struct *vma,
struct ipath_devdata *dd,
- struct ipath_portdata *pd)
+ struct ipath_portdata *pd,
+ unsigned piobufs, unsigned piocnt)
{
unsigned long phys;
int ret;
@@ -968,21 +1063,15 @@ static int mmap_piobufs(struct vm_area_struct *vma,
* process data, and catches users who might try to read the i/o
* space due to a bug.
*/
- if ((vma->vm_end - vma->vm_start) >
- (dd->ipath_pbufsport * dd->ipath_palign)) {
+ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
"reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
- phys = dd->ipath_physaddr + pd->port_piobufs;
-
- /*
- * Don't mark this as non-cached, or we don't get the
- * write combining behavior we want on the PIO buffers!
- */
+ phys = dd->ipath_physaddr + piobufs;
#if defined(__powerpc__)
/* There isn't a generic way to specify writethrough mappings */
@@ -1011,7 +1100,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
struct ipath_devdata *dd = pd->port_dd;
unsigned long start, size;
size_t total_size, i;
- dma_addr_t *phys;
+ unsigned long pfn;
int ret;
size = pd->port_rcvegrbuf_size;
@@ -1021,7 +1110,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
"reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
@@ -1035,11 +1124,11 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
- phys = pd->port_rcvegrbuf_phys;
for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
- ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
- size, vma->vm_page_prot);
+ pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, start, pfn, size,
+ vma->vm_page_prot);
if (ret < 0)
goto bail;
}
@@ -1049,6 +1138,101 @@ bail:
return ret;
}
+/*
+ * ipath_file_vma_fault - handle a VMA page fault.
+ */
+static int ipath_file_vma_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct page *page;
+
+ page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
+ if (!page)
+ return VM_FAULT_SIGBUS;
+ get_page(page);
+ vmf->page = page;
+
+ return 0;
+}
+
+static const struct vm_operations_struct ipath_file_vm_ops = {
+ .fault = ipath_file_vma_fault,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+ struct ipath_portdata *pd, unsigned subport)
+{
+ unsigned long len;
+ struct ipath_devdata *dd;
+ void *addr;
+ size_t size;
+ int ret = 0;
+
+ /* If the port is not shared, all addresses should be physical */
+ if (!pd->port_subport_cnt)
+ goto bail;
+
+ dd = pd->port_dd;
+ size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
+ /*
+ * Each process has all the subport uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped - as an array for all the processes,
+ * and also separately for this process.
+ */
+ if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * subport;
+ size = PAGE_SIZE;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport;
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+ size * subport)) {
+ addr = pd->subport_rcvegrbuf + size * subport;
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ } else {
+ goto bail;
+ }
+ len = vma->vm_end - vma->vm_start;
+ if (len > size) {
+ ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+ vma->vm_ops = &ipath_file_vm_ops;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ ret = 1;
+
+bail:
+ return ret;
+}
+
/**
* ipath_mmap - mmap various structures into user space
* @fp: the file pointer
@@ -1064,73 +1248,96 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
struct ipath_portdata *pd;
struct ipath_devdata *dd;
u64 pgaddr, ureg;
+ unsigned piobufs, piocnt;
int ret;
pd = port_fp(fp);
+ if (!pd) {
+ ret = -EINVAL;
+ goto bail;
+ }
dd = pd->port_dd;
/*
* This is the ipath_do_user_init() code, mapping the shared buffers
* into the user process. The address referred to by vm_pgoff is the
- * virtual, not physical, address; we only do one mmap for each
- * space mapped.
+ * file offset passed via mmap(). For shared ports, this is the
+ * kernel vmalloc() address of the pages to share with the master.
+ * For non-shared or master ports, this is a physical address.
+ * We only do one mmap for each space mapped.
*/
pgaddr = vma->vm_pgoff << PAGE_SHIFT;
/*
- * Must fit in 40 bits for our hardware; some checked elsewhere,
- * but we'll be paranoid. Check for 0 is mostly in case one of the
- * allocations failed, but user called mmap anyway. We want to catch
- * that before it can match.
+ * Check for 0 in case one of the allocations failed, but user
+ * called mmap anyway.
*/
- if (!pgaddr || pgaddr >= (1ULL<<40)) {
- ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
- (unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
- return -EINVAL;
+ if (!pgaddr) {
+ ret = -EINVAL;
+ goto bail;
}
- /* just the offset of the port user registers, not physical addr */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
-
- ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
+ ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
(unsigned long long) pgaddr, vma->vm_start,
- vma->vm_end - vma->vm_start);
+ vma->vm_end - vma->vm_start, dd->ipath_unit,
+ pd->port_port, subport_fp(fp));
- if (vma->vm_start & (PAGE_SIZE-1)) {
- ipath_dev_err(dd,
- "vm_start not aligned: %lx, end=%lx phys %lx\n",
- vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
- ret = -EINVAL;
+ /*
+ * Physical addresses must fit in 40 bits for our hardware.
+ * Check for kernel virtual addresses first, anything else must
+ * match a HW or memory address.
+ */
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto bail;
}
- else if (pgaddr == ureg)
+
+ ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
+ if (!pd->port_subport_cnt) {
+ /* port is not shared */
+ piocnt = pd->port_piocnt;
+ piobufs = pd->port_piobufs;
+ } else if (!subport_fp(fp)) {
+ /* caller is the master */
+ piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
+ (pd->port_piocnt % pd->port_subport_cnt);
+ piobufs = pd->port_piobufs +
+ dd->ipath_palign * (pd->port_piocnt - piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ /* caller is a slave */
+ piocnt = pd->port_piocnt / pd->port_subport_cnt;
+ piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
+ }
+
+ if (pgaddr == ureg)
ret = mmap_ureg(vma, dd, ureg);
- else if (pgaddr == pd->port_piobufs)
- ret = mmap_piobufs(vma, dd, pd);
- else if (pgaddr == (u64) pd->port_rcvegr_phys)
+ else if (pgaddr == piobufs)
+ ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+ else if (pgaddr == dd->ipath_pioavailregs_phys)
+ /* in-memory copy of pioavail registers */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ (void *) dd->ipath_pioavailregs_dma,
+ "pioavail registers");
+ else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
- else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
+ else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
/*
* The rcvhdrq itself; readonly except on HT (so have
* to allow writable mapping), multiple pages, contiguous
* from an i/o perspective.
*/
- unsigned total_size =
- ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
- * sizeof(u32), PAGE_SIZE);
- ret = ipath_mmap_mem(vma, pd, total_size, 1,
- pd->port_rcvhdrq_phys,
+ ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+ pd->port_rcvhdrq,
"rcvhdrq");
- }
- else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
+ else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
/* in-memory copy of rcvhdrq tail register */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
- pd->port_rcvhdrqtailaddr_phys,
+ pd->port_rcvhdrtail_kvaddr,
"rcvhdrq tail");
- else if (pgaddr == dd->ipath_pioavailregs_phys)
- /* in-memory copy of pioavail registers */
- ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
- dd->ipath_pioavailregs_phys,
- "pioavail registers");
else
ret = -EINVAL;
@@ -1138,80 +1345,242 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
if (ret < 0)
dev_info(&dd->pcidev->dev,
- "Failure %d on addr %lx, off %lx\n",
- -ret, vma->vm_start, vma->vm_pgoff);
-
+ "Failure %d on off %llx len %lx\n",
+ -ret, (unsigned long long)pgaddr,
+ vma->vm_end - vma->vm_start);
+bail:
return ret;
}
-static unsigned int ipath_poll(struct file *fp,
- struct poll_table_struct *pt)
+static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
+{
+ unsigned pollflag = 0;
+
+ if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
+ pd->port_hdrqfull != pd->port_hdrqfull_poll) {
+ pollflag |= POLLIN | POLLRDNORM;
+ pd->port_hdrqfull_poll = pd->port_hdrqfull;
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
+ struct file *fp,
+ struct poll_table_struct *pt)
{
- struct ipath_portdata *pd;
- u32 head, tail;
- int bit;
unsigned pollflag = 0;
struct ipath_devdata *dd;
- pd = port_fp(fp);
dd = pd->port_dd;
- bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
- set_bit(bit, &dd->ipath_rcvctrl);
+ /* variable access in ipath_poll_hdrqfull() needs this */
+ rmb();
+ pollflag = ipath_poll_hdrqfull(pd);
- /*
- * Before blocking, make sure that head is still == tail,
- * reading from the chip, so we can be sure the interrupt
- * enable has made it to the chip. If not equal, disable
- * interrupt again and return immediately. This avoids races,
- * and the overhead of the chip read doesn't matter much at
- * this point, since we are waiting for something anyway.
- */
+ if (pd->port_urgent != pd->port_urgent_poll) {
+ pollflag |= POLLIN | POLLRDNORM;
+ pd->port_urgent_poll = pd->port_urgent;
+ }
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
+ if (!pollflag) {
+ /* this saves a spin_lock/unlock in interrupt handler... */
+ set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
+ /* flush waiting flag so don't miss an event... */
+ wmb();
+ poll_wait(fp, &pd->port_wait, pt);
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll_next(struct ipath_portdata *pd,
+ struct file *fp,
+ struct poll_table_struct *pt)
+{
+ u32 head;
+ u32 tail;
+ unsigned pollflag = 0;
+ struct ipath_devdata *dd;
+
+ dd = pd->port_dd;
+
+ /* variable access in ipath_poll_hdrqfull() needs this */
+ rmb();
+ pollflag = ipath_poll_hdrqfull(pd);
head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
- tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+ if (pd->port_rcvhdrtail_kvaddr)
+ tail = ipath_get_rcvhdrtail(pd);
+ else
+ tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
- if (tail == head) {
+ if (head != tail)
+ pollflag |= POLLIN | POLLRDNORM;
+ else {
+ /* this saves a spin_lock/unlock in interrupt handler */
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
- if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
- (void)ipath_write_ureg(dd, ur_rcvhdrhead,
- dd->ipath_rhdrhead_intr_off
- | head, pd->port_port);
+ /* flush waiting flag so we don't miss an event */
+ wmb();
+
+ set_bit(pd->port_port + dd->ipath_r_intravail_shift,
+ &dd->ipath_rcvctrl);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+ ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off | head,
+ pd->port_port);
+
poll_wait(fp, &pd->port_wait, pt);
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll(struct file *fp,
+ struct poll_table_struct *pt)
+{
+ struct ipath_portdata *pd;
+ unsigned pollflag;
+
+ pd = port_fp(fp);
+ if (!pd)
+ pollflag = 0;
+ else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
+ pollflag = ipath_poll_urgent(pd, fp, pt);
+ else
+ pollflag = ipath_poll_next(pd, fp, pt);
- if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
- /* timed out, no packets received */
- clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
- pd->port_rcvwait_to++;
+ return pollflag;
+}
+
+static int ipath_supports_subports(int user_swmajor, int user_swminor)
+{
+ /* no subport implementation prior to software version 1.3 */
+ return (user_swmajor > 1) || (user_swminor >= 3);
+}
+
+static int ipath_compatible_subports(int user_swmajor, int user_swminor)
+{
+ /* this code is written long-hand for clarity */
+ if (IPATH_USER_SWMAJOR != user_swmajor) {
+ /* no promise of compatibility if major mismatch */
+ return 0;
+ }
+ if (IPATH_USER_SWMAJOR == 1) {
+ switch (IPATH_USER_SWMINOR) {
+ case 0:
+ case 1:
+ case 2:
+ /* no subport implementation so cannot be compatible */
+ return 0;
+ case 3:
+ /* 3 is only compatible with itself */
+ return user_swminor == 3;
+ default:
+ /* >= 4 are compatible (or are expected to be) */
+ return user_swminor >= 4;
}
- else
- pollflag = POLLIN | POLLRDNORM;
}
- else {
- /* it's already happened; don't do wait_event overhead */
- pollflag = POLLIN | POLLRDNORM;
- pd->port_rcvnowait++;
+ /* make no promises yet for future major versions */
+ return 0;
+}
+
+static int init_subports(struct ipath_devdata *dd,
+ struct ipath_portdata *pd,
+ const struct ipath_user_info *uinfo)
+{
+ int ret = 0;
+ unsigned num_subports;
+ size_t size;
+
+ /*
+ * If the user is requesting zero subports,
+ * skip the subport allocation.
+ */
+ if (uinfo->spu_subport_cnt <= 0)
+ goto bail;
+
+ /* Self-consistency check for ipath_compatible_subports() */
+ if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
+ !ipath_compatible_subports(IPATH_USER_SWMAJOR,
+ IPATH_USER_SWMINOR)) {
+ dev_info(&dd->pcidev->dev,
+ "Inconsistent ipath_compatible_subports()\n");
+ goto bail;
}
- clear_bit(bit, &dd->ipath_rcvctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
+ /* Check for subport compatibility */
+ if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
+ uinfo->spu_userversion & 0xffff)) {
+ dev_info(&dd->pcidev->dev,
+ "Mismatched user version (%d.%d) and driver "
+ "version (%d.%d) while port sharing. Ensure "
+ "that driver and library are from the same "
+ "release.\n",
+ (int) (uinfo->spu_userversion >> 16),
+ (int) (uinfo->spu_userversion & 0xffff),
+ IPATH_USER_SWMAJOR,
+ IPATH_USER_SWMINOR);
+ goto bail;
+ }
+ if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
- return pollflag;
+ num_subports = uinfo->spu_subport_cnt;
+ pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
+ if (!pd->subport_uregbase) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE) * num_subports;
+ pd->subport_rcvhdr_base = vzalloc(size);
+ if (!pd->subport_rcvhdr_base) {
+ ret = -ENOMEM;
+ goto bail_ureg;
+ }
+
+ pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_subports);
+ if (!pd->subport_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail_rhdr;
+ }
+
+ pd->port_subport_cnt = uinfo->spu_subport_cnt;
+ pd->port_subport_id = uinfo->spu_subport_id;
+ pd->active_slaves = 1;
+ set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ goto bail;
+
+bail_rhdr:
+ vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+ vfree(pd->subport_uregbase);
+ pd->subport_uregbase = NULL;
+bail:
+ return ret;
}
static int try_alloc_port(struct ipath_devdata *dd, int port,
- struct file *fp)
+ struct file *fp,
+ const struct ipath_user_info *uinfo)
{
+ struct ipath_portdata *pd;
int ret;
- if (!dd->ipath_pd[port]) {
- void *p, *ptmp;
+ if (!(pd = dd->ipath_pd[port])) {
+ void *ptmp;
- p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+ pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
/*
* Allocate memory for use in ipath_tid_update() just once
@@ -1221,34 +1590,36 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
dd->ipath_rcvtidcnt * sizeof(struct page **),
GFP_KERNEL);
- if (!p || !ptmp) {
+ if (!pd || !ptmp) {
ipath_dev_err(dd, "Unable to allocate portdata "
"memory, failing open\n");
ret = -ENOMEM;
- kfree(p);
+ kfree(pd);
kfree(ptmp);
goto bail;
}
- dd->ipath_pd[port] = p;
+ dd->ipath_pd[port] = pd;
dd->ipath_pd[port]->port_port = port;
dd->ipath_pd[port]->port_dd = dd;
dd->ipath_pd[port]->port_tid_pg_list = ptmp;
init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
}
- if (!dd->ipath_pd[port]->port_cnt) {
- dd->ipath_pd[port]->port_cnt = 1;
- fp->private_data = (void *) dd->ipath_pd[port];
+ if (!pd->port_cnt) {
+ pd->userversion = uinfo->spu_userversion;
+ init_user_egr_sizes(pd);
+ if ((ret = init_subports(dd, pd, uinfo)) != 0)
+ goto bail;
ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
current->comm, current->pid, dd->ipath_unit,
port);
- dd->ipath_pd[port]->port_pid = current->pid;
- strncpy(dd->ipath_pd[port]->port_comm, current->comm,
- sizeof(dd->ipath_pd[port]->port_comm));
+ pd->port_cnt = 1;
+ port_fp(fp) = pd;
+ pd->port_pid = get_pid(task_pid(current));
+ strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
- goto bail;
- }
- ret = -EBUSY;
+ } else
+ ret = -EBUSY;
bail:
return ret;
@@ -1264,7 +1635,8 @@ static inline int usable(struct ipath_devdata *dd)
| IPATH_LINKUNK));
}
-static int find_free_port(int unit, struct file *fp)
+static int find_free_port(int unit, struct file *fp,
+ const struct ipath_user_info *uinfo)
{
struct ipath_devdata *dd = ipath_lookup(unit);
int ret, i;
@@ -1279,8 +1651,8 @@ static int find_free_port(int unit, struct file *fp)
goto bail;
}
- for (i = 0; i < dd->ipath_cfgports; i++) {
- ret = try_alloc_port(dd, i, fp);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (ret != -EBUSY)
goto bail;
}
@@ -1290,13 +1662,14 @@ bail:
return ret;
}
-static int find_best_unit(struct file *fp)
+static int find_best_unit(struct file *fp,
+ const struct ipath_user_info *uinfo)
{
int ret = 0, i, prefunit = -1, devmax;
int maxofallports, npresent, nup;
int ndev;
- (void) ipath_count_units(&npresent, &nup, &maxofallports);
+ devmax = ipath_count_units(&npresent, &nup, &maxofallports);
/*
* This code is present to allow a knowledgeable person to
@@ -1305,7 +1678,7 @@ static int find_best_unit(struct file *fp)
* InfiniPath chip to that processor (we assume reasonable connectivity,
* for now). This code assumes that if affinity has been set
* before this point, that at most one cpu is set; for now this
- * is reasonable. I check for both cpus_empty() and cpus_full(),
+ * is reasonable. I check for both cpumask_empty() and cpumask_full(),
* in case some kernel variant sets none of the bits when no
* affinity is set. 2.6.11 and 12 kernels have all present
* cpus set. Some day we'll have to fix it up further to handle
@@ -1314,20 +1687,23 @@ static int find_best_unit(struct file *fp)
* information. There may be some issues with dual core numbering
* as well. This needs more work prior to release.
*/
- if (!cpus_empty(current->cpus_allowed) &&
- !cpus_full(current->cpus_allowed)) {
- int ncpus = num_online_cpus(), curcpu = -1;
- for (i = 0; i < ncpus; i++)
- if (cpu_isset(i, current->cpus_allowed)) {
+ if (!cpumask_empty(tsk_cpus_allowed(current)) &&
+ !cpumask_full(tsk_cpus_allowed(current))) {
+ int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
+ get_online_cpus();
+ for_each_online_cpu(i)
+ if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
ipath_cdbg(PROC, "%s[%u] affinity set for "
- "cpu %d\n", current->comm,
- current->pid, i);
+ "cpu %d/%d\n", current->comm,
+ current->pid, i, ncpus);
curcpu = i;
+ nset++;
}
- if (curcpu != -1) {
+ put_online_cpus();
+ if (curcpu != -1 && nset != ncpus) {
if (npresent) {
prefunit = curcpu / (ncpus / npresent);
- ipath_dbg("%s[%u] %d chips, %d cpus, "
+ ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
"%d cpus/chip, select unit %d\n",
current->comm, current->pid,
npresent, ncpus, ncpus / npresent,
@@ -1343,8 +1719,6 @@ static int find_best_unit(struct file *fp)
if (prefunit != -1)
devmax = prefunit + 1;
- else
- devmax = ipath_count_units(NULL, NULL, NULL);
recheck:
for (i = 1; i < maxofallports; i++) {
for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
@@ -1359,7 +1733,7 @@ recheck:
* next.
*/
continue;
- ret = try_alloc_port(dd, i, fp);
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (!ret)
goto done;
}
@@ -1395,22 +1769,235 @@ done:
return ret;
}
+static int find_shared_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int devmax, ndev, i;
+ int ret = 0;
+
+ devmax = ipath_count_units(NULL, NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct ipath_devdata *dd = ipath_lookup(ndev);
+
+ if (!usable(dd))
+ continue;
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ /* Skip ports which are not yet open */
+ if (!pd || !pd->port_cnt)
+ continue;
+ /* Skip port if it doesn't match the requested one */
+ if (pd->port_subport_id != uinfo->spu_subport_id)
+ continue;
+ /* Verify the sharing process matches the master */
+ if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+ pd->userversion != uinfo->spu_userversion ||
+ pd->port_cnt >= pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto done;
+ }
+ port_fp(fp) = pd;
+ subport_fp(fp) = pd->port_cnt++;
+ pd->port_subpid[subport_fp(fp)] =
+ get_pid(task_pid(current));
+ tidcursor_fp(fp) = 0;
+ pd->active_slaves |= 1 << subport_fp(fp);
+ ipath_cdbg(PROC,
+ "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+ current->comm, current->pid,
+ subport_fp(fp),
+ pd->port_comm, pid_nr(pd->port_pid),
+ dd->ipath_unit, pd->port_port);
+ ret = 1;
+ goto done;
+ }
+ }
+
+done:
+ return ret;
+}
+
static int ipath_open(struct inode *in, struct file *fp)
{
- int ret, user_minor;
+ /* The real work is performed later in ipath_assign_port() */
+ fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+ return fp->private_data ? 0 : -ENOMEM;
+}
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ int i_minor;
+ unsigned swmajor, swminor;
+
+ /* Check to be sure we haven't already initialized this file */
+ if (port_fp(fp)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* for now, if major version is different, bail */
+ swmajor = uinfo->spu_userversion >> 16;
+ if (swmajor != IPATH_USER_SWMAJOR) {
+ ipath_dbg("User major version %d not same as driver "
+ "major %d\n", uinfo->spu_userversion >> 16,
+ IPATH_USER_SWMAJOR);
+ ret = -ENODEV;
+ goto done;
+ }
+
+ swminor = uinfo->spu_userversion & 0xffff;
+ if (swminor != IPATH_USER_SWMINOR)
+ ipath_dbg("User minor version %d not same as driver "
+ "minor %d\n", swminor, IPATH_USER_SWMINOR);
mutex_lock(&ipath_mutex);
- user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
+ if (ipath_compatible_subports(swmajor, swminor) &&
+ uinfo->spu_subport_cnt &&
+ (ret = find_shared_port(fp, uinfo))) {
+ if (ret > 0)
+ ret = 0;
+ goto done_chk_sdma;
+ }
+
+ i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
- (long)in->i_rdev, user_minor);
+ (long)file_inode(fp)->i_rdev, i_minor);
- if (user_minor)
- ret = find_free_port(user_minor - 1, fp);
+ if (i_minor)
+ ret = find_free_port(i_minor - 1, fp, uinfo);
else
- ret = find_best_unit(fp);
+ ret = find_best_unit(fp, uinfo);
+
+done_chk_sdma:
+ if (!ret) {
+ struct ipath_filedata *fd = fp->private_data;
+ const struct ipath_portdata *pd = fd->pd;
+ const struct ipath_devdata *dd = pd->port_dd;
+
+ fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
+ dd->ipath_unit,
+ pd->port_port,
+ fd->subport);
+
+ if (!fd->pq)
+ ret = -ENOMEM;
+ }
mutex_unlock(&ipath_mutex);
+
+done:
+ return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ struct ipath_portdata *pd = port_fp(fp);
+ struct ipath_devdata *dd;
+ u32 head32;
+
+ /* Subports don't need to initialize anything since master did it. */
+ if (subport_fp(fp)) {
+ ret = wait_event_interruptible(pd->port_wait,
+ !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+ goto done;
+ }
+
+ dd = pd->port_dd;
+
+ if (uinfo->spu_rcvhdrsize) {
+ ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+ if (ret)
+ goto done;
+ }
+
+ /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+ /* some ports may get extra buffers, calculate that here */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_piocnt = dd->ipath_pbufsport + 1;
+ else
+ pd->port_piocnt = dd->ipath_pbufsport;
+
+ /* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_pio_base = (dd->ipath_pbufsport + 1)
+ * (pd->port_port - 1);
+ else
+ pd->port_pio_base = dd->ipath_ports_extrabuf +
+ dd->ipath_pbufsport * (pd->port_port - 1);
+ pd->port_piobufs = dd->ipath_piobufbase +
+ pd->port_pio_base * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
+ " first pio %u\n", pd->port_port, pd->port_piobufs,
+ pd->port_piocnt, pd->port_pio_base);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
+
+ /*
+ * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ * array for time being. If pd->port_port > chip-supported,
+ * we need to do extra stuff here to handle by handling overflow
+ * through port 0, someday
+ */
+ ret = ipath_create_rcvhdrq(dd, pd);
+ if (!ret)
+ ret = ipath_create_user_egr(pd);
+ if (ret)
+ goto done;
+
+ /*
+ * set the eager head register for this port to the current values
+ * of the tail pointers, since we don't know if they were
+ * updated on last use of the port.
+ */
+ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+ pd->port_lastrcvhdrqtail = -1;
+ ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+ pd->port_port, head32);
+ pd->port_tidcursor = 0; /* start at beginning after open */
+
+ /* initialize poll variables... */
+ pd->port_urgent = 0;
+ pd->port_urgent_poll = 0;
+ pd->port_hdrqfull_poll = pd->port_hdrqfull;
+
+ /*
+ * Now enable the port for receive.
+ * For chips that are set to DMA the tail register to memory
+ * when they change (and when the update bit transitions from
+ * 0 to 1. So for those chips, we turn it off and then back on.
+ * This will (very briefly) affect any other open ports, but the
+ * duration is very short, and therefore isn't an issue. We
+ * explicitly set the in-memory tail copy to 0 beforehand, so we
+ * don't have to wait to be sure the DMA update has happened
+ * (chip resets head/tail to 0 on transition to enable).
+ */
+ set_bit(dd->ipath_r_portenable_shift + pd->port_port,
+ &dd->ipath_rcvctrl);
+ if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+ if (pd->port_rcvhdrtail_kvaddr)
+ ipath_clear_rcvhdrtail(pd);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl &
+ ~(1ULL << dd->ipath_r_tailupd_shift));
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ /* Notify any waiting slaves */
+ if (pd->port_subport_cnt) {
+ clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ wake_up(&pd->port_wait);
+ }
+done:
return ret;
}
@@ -1430,12 +2017,15 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
pd->port_port);
for (i = port_tidbase; i < maxtid; i++) {
- if (!dd->ipath_pageshadow[i])
+ struct page *ps = dd->ipath_pageshadow[i];
+
+ if (!ps)
continue;
- ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
- 1);
dd->ipath_pageshadow[i] = NULL;
+ pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages_on_close(&ps, 1);
cnt++;
ipath_stats.sps_pageunlocks++;
}
@@ -1453,26 +2043,51 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
static int ipath_close(struct inode *in, struct file *fp)
{
int ret = 0;
+ struct ipath_filedata *fd;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
+ unsigned long flags;
unsigned port;
+ struct pid *pid;
ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
(long)in->i_rdev, fp->private_data);
mutex_lock(&ipath_mutex);
- pd = port_fp(fp);
- port = pd->port_port;
+ fd = fp->private_data;
fp->private_data = NULL;
+ pd = fd->pd;
+ if (!pd) {
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+
dd = pd->port_dd;
- if (pd->port_hdrqfull) {
- ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
- "during run\n", pd->port_comm, pd->port_pid,
- pd->port_hdrqfull);
- pd->port_hdrqfull = 0;
+ /* drain user sdma queue */
+ ipath_user_sdma_queue_drain(dd, fd->pq);
+ ipath_user_sdma_queue_destroy(fd->pq);
+
+ if (--pd->port_cnt) {
+ /*
+ * XXX If the master closes the port before the slave(s),
+ * revoke the mmap for the eager receive queue so
+ * the slave(s) don't wait for receive data forever.
+ */
+ pd->active_slaves &= ~(1 << fd->subport);
+ put_pid(pd->port_subpid[fd->subport]);
+ pd->port_subpid[fd->subport] = NULL;
+ mutex_unlock(&ipath_mutex);
+ goto bail;
}
+ /* early; no interrupt users after this */
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
+ port = pd->port_port;
+ dd->ipath_pd[port] = NULL;
+ pid = pd->port_pid;
+ pd->port_pid = NULL;
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
if (pd->port_rcvwait_to || pd->port_piowait_to
|| pd->port_rcvnowait || pd->port_pionowait) {
@@ -1485,15 +2100,16 @@ static int ipath_close(struct inode *in, struct file *fp)
pd->port_rcvnowait = pd->port_pionowait = 0;
}
if (pd->port_flag) {
- ipath_dbg("port %u port_flag still set to 0x%lx\n",
+ ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
pd->port_port, pd->port_flag);
pd->port_flag = 0;
}
if (dd->ipath_kregbase) {
- int i;
- /* atomically clear receive enable port. */
- clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
+ /* atomically clear receive enable port and intr avail. */
+ clear_bit(dd->ipath_r_portenable_shift + port,
+ &dd->ipath_rcvctrl);
+ clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
&dd->ipath_rcvctrl);
ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
@@ -1503,8 +2119,6 @@ static int ipath_close(struct inode *in, struct file *fp)
/* clean up the pkeys for this port user */
ipath_clean_part_key(pd, dd);
-
-
/*
* be paranoid, and never write 0's to these, just use an
* unused part of the port 0 tail page. Of course,
@@ -1520,42 +2134,53 @@ static int ipath_close(struct inode *in, struct file *fp)
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, dd->ipath_dummy_hdrq_phys);
- i = dd->ipath_pbufsport * (port - 1);
- ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
+ ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base,
+ pd->port_piocnt, 1);
+
+ dd->ipath_f_clear_tids(dd, pd->port_port);
if (dd->ipath_pageshadow)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
- pd->port_comm, pd->port_pid,
+ pd->port_comm, pid_nr(pid),
dd->ipath_unit, port);
-
- dd->ipath_f_clear_tids(dd, pd->port_port);
}
- pd->port_cnt = 0;
- pd->port_pid = 0;
-
- dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
+ put_pid(pid);
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
+bail:
+ kfree(fd);
return ret;
}
-static int ipath_port_info(struct ipath_portdata *pd,
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
struct ipath_port_info __user *uinfo)
{
struct ipath_port_info info;
int nup;
int ret;
+ size_t sz;
(void) ipath_count_units(NULL, &nup, NULL);
info.num_active = nup;
info.unit = pd->port_dd->ipath_unit;
info.port = pd->port_port;
+ info.subport = subport;
+ /* Don't return new fields if old library opened the port. */
+ if (ipath_supports_subports(pd->userversion >> 16,
+ pd->userversion & 0xffff)) {
+ /* Number of user ports available for this device. */
+ info.num_ports = pd->port_dd->ipath_cfgports - 1;
+ info.num_subports = pd->port_subport_cnt;
+ sz = sizeof(info);
+ } else
+ sz = sizeof(info) - 2 * sizeof(u16);
- if (copy_to_user(uinfo, &info, sizeof(info))) {
+ if (copy_to_user(uinfo, &info, sz)) {
ret = -EFAULT;
goto bail;
}
@@ -1565,6 +2190,45 @@ bail:
return ret;
}
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+ void __user *slave_mask_addr)
+{
+ int ret = 0;
+
+ if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+ ret = -EFAULT;
+ return ret;
+}
+
+static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
+ u32 __user *inflightp)
+{
+ const u32 val = ipath_user_sdma_inflight_counter(pq);
+
+ if (put_user(val, inflightp))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ipath_sdma_get_complete(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ u32 __user *completep)
+{
+ u32 val;
+ int err;
+
+ err = ipath_user_sdma_make_progress(dd, pq);
+ if (err < 0)
+ return err;
+
+ val = ipath_user_sdma_complete_counter(pq);
+ if (put_user(val, completep))
+ return -EFAULT;
+
+ return 0;
+}
+
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
@@ -1591,6 +2255,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed = sizeof(cmd.type);
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ case __IPATH_CMD_USER_INIT:
case IPATH_CMD_USER_INIT:
copy = sizeof(cmd.cmd.user_info);
dest = &cmd.cmd.user_info;
@@ -1617,51 +2283,128 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
+ case __IPATH_CMD_SLAVE_INFO:
+ copy = sizeof(cmd.cmd.slave_mask_addr);
+ dest = &cmd.cmd.slave_mask_addr;
+ src = &ucmd->cmd.slave_mask_addr;
+ break;
+ case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
+ copy = 0;
+ src = NULL;
+ dest = NULL;
+ break;
+ case IPATH_CMD_POLL_TYPE:
+ copy = sizeof(cmd.cmd.poll_type);
+ dest = &cmd.cmd.poll_type;
+ src = &ucmd->cmd.poll_type;
+ break;
+ case IPATH_CMD_ARMLAUNCH_CTRL:
+ copy = sizeof(cmd.cmd.armlaunch_ctrl);
+ dest = &cmd.cmd.armlaunch_ctrl;
+ src = &ucmd->cmd.armlaunch_ctrl;
+ break;
+ case IPATH_CMD_SDMA_INFLIGHT:
+ copy = sizeof(cmd.cmd.sdma_inflight);
+ dest = &cmd.cmd.sdma_inflight;
+ src = &ucmd->cmd.sdma_inflight;
+ break;
+ case IPATH_CMD_SDMA_COMPLETE:
+ copy = sizeof(cmd.cmd.sdma_complete);
+ dest = &cmd.cmd.sdma_complete;
+ src = &ucmd->cmd.sdma_complete;
+ break;
default:
ret = -EINVAL;
goto bail;
}
- if ((count - consumed) < copy) {
- ret = -EINVAL;
- goto bail;
- }
+ if (copy) {
+ if ((count - consumed) < copy) {
+ ret = -EINVAL;
+ goto bail;
+ }
- if (copy_from_user(dest, src, copy)) {
- ret = -EFAULT;
- goto bail;
+ if (copy_from_user(dest, src, copy)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ consumed += copy;
}
- consumed += copy;
pd = port_fp(fp);
+ if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+ cmd.type != IPATH_CMD_ASSIGN_PORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ break;
+ case __IPATH_CMD_USER_INIT:
+ /* backwards compatibility, get port first */
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ /* and fall through to current version. */
case IPATH_CMD_USER_INIT:
- ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
- if (ret < 0)
+ ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+ if (ret)
goto bail;
ret = ipath_get_base_info(
- pd, (void __user *) (unsigned long)
+ fp, (void __user *) (unsigned long)
cmd.cmd.user_info.spu_base_info,
cmd.cmd.user_info.spu_base_info_size);
break;
case IPATH_CMD_RECV_CTRL:
- ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
+ ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
break;
case IPATH_CMD_PORT_INFO:
- ret = ipath_port_info(pd,
+ ret = ipath_port_info(pd, subport_fp(fp),
(struct ipath_port_info __user *)
(unsigned long) cmd.cmd.port_info);
break;
case IPATH_CMD_TID_UPDATE:
- ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
break;
case IPATH_CMD_TID_FREE:
- ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
break;
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
+ case __IPATH_CMD_SLAVE_INFO:
+ ret = ipath_get_slave_info(pd,
+ (void __user *) (unsigned long)
+ cmd.cmd.slave_mask_addr);
+ break;
+ case IPATH_CMD_PIOAVAILUPD:
+ ipath_force_pio_avail_update(pd->port_dd);
+ break;
+ case IPATH_CMD_POLL_TYPE:
+ pd->poll_type = cmd.cmd.poll_type;
+ break;
+ case IPATH_CMD_ARMLAUNCH_CTRL:
+ if (cmd.cmd.armlaunch_ctrl)
+ ipath_enable_armlaunch(pd->port_dd);
+ else
+ ipath_disable_armlaunch(pd->port_dd);
+ break;
+ case IPATH_CMD_SDMA_INFLIGHT:
+ ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
+ (u32 __user *) (unsigned long)
+ cmd.cmd.sdma_inflight);
+ break;
+ case IPATH_CMD_SDMA_COMPLETE:
+ ret = ipath_sdma_get_complete(pd->port_dd,
+ user_sdma_queue_fp(fp),
+ (u32 __user *) (unsigned long)
+ cmd.cmd.sdma_complete);
+ break;
}
if (ret >= 0)
@@ -1671,14 +2414,28 @@ bail:
return ret;
}
+static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long dim, loff_t off)
+{
+ struct file *filp = iocb->ki_filp;
+ struct ipath_filedata *fp = filp->private_data;
+ struct ipath_portdata *pd = port_fp(filp);
+ struct ipath_user_sdma_queue *pq = fp->pq;
+
+ if (!dim)
+ return -EINVAL;
+
+ return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
+}
+
static struct class *ipath_class;
-static int init_cdev(int minor, char *name, struct file_operations *fops,
- struct cdev **cdevp, struct class_device **class_devp)
+static int init_cdev(int minor, char *name, const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp)
{
const dev_t dev = MKDEV(IPATH_MAJOR, minor);
struct cdev *cdev = NULL;
- struct class_device *class_dev = NULL;
+ struct device *device = NULL;
int ret;
cdev = cdev_alloc();
@@ -1702,12 +2459,12 @@ static int init_cdev(int minor, char *name, struct file_operations *fops,
goto err_cdev;
}
- class_dev = class_device_create(ipath_class, NULL, dev, NULL, name);
+ device = device_create(ipath_class, NULL, dev, NULL, name);
- if (IS_ERR(class_dev)) {
- ret = PTR_ERR(class_dev);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
- "class_dev for minor %d, %s (err %d)\n",
+ "device for minor %d, %s (err %d)\n",
minor, name, -ret);
goto err_cdev;
}
@@ -1721,29 +2478,29 @@ err_cdev:
done:
if (ret >= 0) {
*cdevp = cdev;
- *class_devp = class_dev;
+ *devp = device;
} else {
*cdevp = NULL;
- *class_devp = NULL;
+ *devp = NULL;
}
return ret;
}
-int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
- struct cdev **cdevp, struct class_device **class_devp)
+int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp)
{
- return init_cdev(minor, name, fops, cdevp, class_devp);
+ return init_cdev(minor, name, fops, cdevp, devp);
}
static void cleanup_cdev(struct cdev **cdevp,
- struct class_device **class_devp)
+ struct device **devp)
{
- struct class_device *class_dev = *class_devp;
+ struct device *dev = *devp;
- if (class_dev) {
- class_device_unregister(class_dev);
- *class_devp = NULL;
+ if (dev) {
+ device_unregister(dev);
+ *devp = NULL;
}
if (*cdevp) {
@@ -1753,13 +2510,13 @@ static void cleanup_cdev(struct cdev **cdevp,
}
void ipath_cdev_cleanup(struct cdev **cdevp,
- struct class_device **class_devp)
+ struct device **devp)
{
- cleanup_cdev(cdevp, class_devp);
+ cleanup_cdev(cdevp, devp);
}
static struct cdev *wildcard_cdev;
-static struct class_device *wildcard_class_dev;
+static struct device *wildcard_dev;
static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
@@ -1816,7 +2573,7 @@ int ipath_user_add(struct ipath_devdata *dd)
goto bail;
}
ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
- &wildcard_class_dev);
+ &wildcard_dev);
if (ret < 0) {
ipath_dev_err(dd, "Could not create wildcard "
"minor: error %d\n", -ret);
@@ -1829,7 +2586,7 @@ int ipath_user_add(struct ipath_devdata *dd)
snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
- &dd->user_cdev, &dd->user_class_dev);
+ &dd->user_cdev, &dd->user_dev);
if (ret < 0)
ipath_dev_err(dd, "Could not create user minor %d, %s\n",
dd->ipath_unit + 1, name);
@@ -1844,13 +2601,13 @@ bail:
void ipath_user_remove(struct ipath_devdata *dd)
{
- cleanup_cdev(&dd->user_cdev, &dd->user_class_dev);
+ cleanup_cdev(&dd->user_cdev, &dd->user_dev);
if (atomic_dec_return(&user_count) == 0) {
if (atomic_read(&user_setup) == 0)
goto bail;
- cleanup_cdev(&wildcard_cdev, &wildcard_class_dev);
+ cleanup_cdev(&wildcard_cdev, &wildcard_dev);
user_cleanup();
atomic_set(&user_setup, 0);
@@ -1858,4 +2615,3 @@ void ipath_user_remove(struct ipath_devdata *dd)
bail:
return;
}
-
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index c8a8af0fe47..e0c404bdc4a 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,14 +31,13 @@
* SOFTWARE.
*/
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
-#include <linux/pci.h>
+#include <linux/slab.h>
#include "ipath_kernel.h"
@@ -47,7 +46,7 @@
static struct super_block *ipath_super;
static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
- int mode, struct file_operations *fops,
+ umode_t mode, const struct file_operations *fops,
void *data)
{
int error;
@@ -58,16 +57,14 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
goto bail;
}
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_uid = 0;
- inode->i_gid = 0;
- inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_private = data;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
- inode->i_nlink++;
- dir->i_nlink++;
+ inc_nlink(inode);
+ inc_nlink(dir);
}
inode->i_fop = fops;
@@ -79,20 +76,20 @@ bail:
return error;
}
-static int create_file(const char *name, mode_t mode,
+static int create_file(const char *name, umode_t mode,
struct dentry *parent, struct dentry **dentry,
- struct file_operations *fops, void *data)
+ const struct file_operations *fops, void *data)
{
int error;
*dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
- if (!IS_ERR(dentry))
+ if (!IS_ERR(*dentry))
error = ipathfs_mknod(parent->d_inode, *dentry,
mode, fops, data);
else
- error = PTR_ERR(dentry);
+ error = PTR_ERR(*dentry);
mutex_unlock(&parent->d_inode->i_mutex);
return error;
@@ -105,194 +102,27 @@ static ssize_t atomic_stats_read(struct file *file, char __user *buf,
sizeof ipath_stats);
}
-static struct file_operations atomic_stats_ops = {
+static const struct file_operations atomic_stats_ops = {
.read = atomic_stats_read,
+ .llseek = default_llseek,
};
-#define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64)
-
static ssize_t atomic_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- u64 counters[NUM_COUNTERS];
- u16 i;
+ struct infinipath_counters counters;
struct ipath_devdata *dd;
- dd = file->f_dentry->d_inode->i_private;
-
- for (i = 0; i < NUM_COUNTERS; i++)
- counters[i] = ipath_snap_cntr(dd, i);
+ dd = file_inode(file)->i_private;
+ dd->ipath_f_read_counters(dd, &counters);
- return simple_read_from_buffer(buf, count, ppos, counters,
+ return simple_read_from_buffer(buf, count, ppos, &counters,
sizeof counters);
}
-static struct file_operations atomic_counters_ops = {
+static const struct file_operations atomic_counters_ops = {
.read = atomic_counters_read,
-};
-
-static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- u32 nodeinfo[10];
- struct ipath_devdata *dd;
- u64 guid;
-
- dd = file->f_dentry->d_inode->i_private;
-
- guid = be64_to_cpu(dd->ipath_guid);
-
- nodeinfo[0] = /* BaseVersion is SMA */
- /* ClassVersion is SMA */
- (1 << 8) /* NodeType */
- | (1 << 0); /* NumPorts */
- nodeinfo[1] = (u32) (guid >> 32);
- nodeinfo[2] = (u32) (guid & 0xffffffff);
- /* PortGUID == SystemImageGUID for us */
- nodeinfo[3] = nodeinfo[1];
- /* PortGUID == SystemImageGUID for us */
- nodeinfo[4] = nodeinfo[2];
- /* PortGUID == NodeGUID for us */
- nodeinfo[5] = nodeinfo[3];
- /* PortGUID == NodeGUID for us */
- nodeinfo[6] = nodeinfo[4];
- nodeinfo[7] = (4 << 16) /* we support 4 pkeys */
- | (dd->ipath_deviceid << 0);
- /* our chip version as 16 bits major, 16 bits minor */
- nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16);
- nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0);
-
- return simple_read_from_buffer(buf, count, ppos, nodeinfo,
- sizeof nodeinfo);
-}
-
-static struct file_operations atomic_node_info_ops = {
- .read = atomic_node_info_read,
-};
-
-static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- u32 portinfo[13];
- u32 tmp, tmp2;
- struct ipath_devdata *dd;
-
- dd = file->f_dentry->d_inode->i_private;
-
- /* so we only initialize non-zero fields. */
- memset(portinfo, 0, sizeof portinfo);
-
- /*
- * Notimpl yet M_Key (64)
- * Notimpl yet GID (64)
- */
-
- portinfo[4] = (dd->ipath_lid << 16);
-
- /*
- * Notimpl yet SMLID.
- * CapabilityMask is 0, we don't support any of these
- * DiagCode is 0; we don't store any diag info for now Notimpl yet
- * M_KeyLeasePeriod (we don't support M_Key)
- */
-
- /* LocalPortNum is whichever port number they ask for */
- portinfo[7] = (dd->ipath_unit << 24)
- /* LinkWidthEnabled */
- | (2 << 16)
- /* LinkWidthSupported (really 2, but not IB valid) */
- | (3 << 8)
- /* LinkWidthActive */
- | (2 << 0);
- tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
- tmp2 = 5;
- if (tmp == IPATH_IBSTATE_INIT)
- tmp = 2;
- else if (tmp == IPATH_IBSTATE_ARM)
- tmp = 3;
- else if (tmp == IPATH_IBSTATE_ACTIVE)
- tmp = 4;
- else {
- tmp = 0; /* down */
- tmp2 = tmp & 0xf;
- }
-
- portinfo[8] = (1 << 28) /* LinkSpeedSupported */
- | (tmp << 24) /* PortState */
- | (tmp2 << 20) /* PortPhysicalState */
- | (2 << 16)
-
- /* LinkDownDefaultState */
- /* M_KeyProtectBits == 0 */
- /* NotImpl yet LMC == 0 (we can support all values) */
- | (1 << 4) /* LinkSpeedActive */
- | (1 << 0); /* LinkSpeedEnabled */
- switch (dd->ipath_ibmtu) {
- case 4096:
- tmp = 5;
- break;
- case 2048:
- tmp = 4;
- break;
- case 1024:
- tmp = 3;
- break;
- case 512:
- tmp = 2;
- break;
- case 256:
- tmp = 1;
- break;
- default: /* oops, something is wrong */
- ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, "
- "treat as 2048\n", dd->ipath_ibmtu);
- tmp = 4;
- break;
- }
- portinfo[9] = (tmp << 28)
- /* NeighborMTU */
- /* Notimpl MasterSMSL */
- | (1 << 20)
-
- /* VLCap */
- /* Notimpl InitType (actually, an SMA decision) */
- /* VLHighLimit is 0 (only one VL) */
- ; /* VLArbitrationHighCap is 0 (only one VL) */
- portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
- /* InitTypeReply is SMA decision */
- (5 << 16) /* MTUCap 4096 */
- | (7 << 13) /* VLStallCount */
- | (0x1f << 8) /* HOQLife */
- | (1 << 4)
-
- /* OperationalVLs 0 */
- /* PartitionEnforcementInbound */
- /* PartitionEnforcementOutbound not enforced */
- /* FilterRawinbound not enforced */
- ; /* FilterRawOutbound not enforced */
- /* M_KeyViolations are not counted by hardware, SMA can count */
- tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
- /* P_KeyViolations are counted by hardware. */
- portinfo[11] = ((tmp & 0xffff) << 0);
- portinfo[12] =
- /* Q_KeyViolations are not counted by hardware */
- (1 << 8)
-
- /* GUIDCap */
- /* SubnetTimeOut handled by SMA */
- /* RespTimeValue handled by SMA */
- ;
- /* LocalPhyErrors are programmed to max */
- portinfo[12] |= (0xf << 20)
- | (0xf << 16) /* OverRunErrors are programmed to max */
- ;
-
- return simple_read_from_buffer(buf, count, ppos, portinfo,
- sizeof portinfo);
-}
-
-static struct file_operations atomic_port_info_ops = {
- .read = atomic_port_info_read,
+ .llseek = default_llseek,
};
static ssize_t flash_read(struct file *file, char __user *buf,
@@ -324,7 +154,7 @@ static ssize_t flash_read(struct file *file, char __user *buf,
goto bail;
}
- dd = file->f_dentry->d_inode->i_private;
+ dd = file_inode(file)->i_private;
if (ipath_eeprom_read(dd, pos, tmp, count)) {
ipath_dev_err(dd, "failed to read from flash\n");
ret = -ENXIO;
@@ -356,19 +186,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
pos = *ppos;
- if ( pos < 0) {
+ if (pos != 0) {
ret = -EINVAL;
goto bail;
}
- if (pos >= sizeof(struct ipath_flash)) {
- ret = 0;
+ if (count != sizeof(struct ipath_flash)) {
+ ret = -EINVAL;
goto bail;
}
- if (count > sizeof(struct ipath_flash) - pos)
- count = sizeof(struct ipath_flash) - pos;
-
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
@@ -380,7 +207,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
goto bail_tmp;
}
- dd = file->f_dentry->d_inode->i_private;
+ dd = file_inode(file)->i_private;
if (ipath_eeprom_write(dd, pos, tmp, count)) {
ret = -ENXIO;
ipath_dev_err(dd, "failed to write to flash\n");
@@ -397,9 +224,10 @@ bail:
return ret;
}
-static struct file_operations flash_ops = {
+static const struct file_operations flash_ops = {
.read = flash_read,
.write = flash_write,
+ .llseek = default_llseek,
};
static int create_device_files(struct super_block *sb,
@@ -411,8 +239,7 @@ static int create_device_files(struct super_block *sb,
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
- (struct file_operations *) &simple_dir_operations,
- dd);
+ &simple_dir_operations, dd);
if (ret) {
printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
goto bail;
@@ -426,22 +253,6 @@ static int create_device_files(struct super_block *sb,
goto bail;
}
- ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp,
- &atomic_node_info_ops, dd);
- if (ret) {
- printk(KERN_ERR "create_file(%s/node_info) "
- "failed: %d\n", unit, ret);
- goto bail;
- }
-
- ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp,
- &atomic_port_info_ops, dd);
- if (ret) {
- printk(KERN_ERR "create_file(%s/port_info) "
- "failed: %d\n", unit, ret);
- goto bail;
- }
-
ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
&flash_ops, dd);
if (ret) {
@@ -454,24 +265,34 @@ bail:
return ret;
}
-static void remove_file(struct dentry *parent, char *name)
+static int remove_file(struct dentry *parent, char *name)
{
struct dentry *tmp;
+ int ret;
tmp = lookup_one_len(name, parent, strlen(name));
- spin_lock(&dcache_lock);
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ goto bail;
+ }
+
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_locked(tmp);
+ dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, tmp);
- } else {
+ } else
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_lock);
- }
+
+ ret = 0;
+bail:
+ /*
+ * We don't expect clients to care about the return value, but
+ * it's there if they need it.
+ */
+ return ret;
}
static int remove_device_files(struct super_block *sb,
@@ -493,8 +314,6 @@ static int remove_device_files(struct super_block *sb,
}
remove_file(dir, "flash");
- remove_file(dir, "port_info");
- remove_file(dir, "node_info");
remove_file(dir, "atomic_counters");
d_delete(dir);
ret = simple_rmdir(root->d_inode, dir);
@@ -513,7 +332,7 @@ static int ipathfs_fill_super(struct super_block *sb, void *data,
int ret;
static struct tree_descr files[] = {
- [1] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
+ [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
{""},
};
@@ -528,10 +347,8 @@ static int ipathfs_fill_super(struct super_block *sb, void *data,
list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
spin_unlock_irqrestore(&ipath_devs_lock, flags);
ret = create_device_files(sb, dd);
- if (ret) {
- deactivate_super(sb);
+ if (ret)
goto bail;
- }
spin_lock_irqsave(&ipath_devs_lock, flags);
}
@@ -541,13 +358,13 @@ bail:
return ret;
}
-static int ipathfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- int ret = get_sb_single(fs_type, flags, data,
- ipathfs_fill_super, mnt);
- if (ret >= 0)
- ipath_super = mnt->mnt_sb;
+ struct dentry *ret;
+ ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
+ if (!IS_ERR(ret))
+ ipath_super = ret->d_sb;
return ret;
}
@@ -590,9 +407,10 @@ bail:
static struct file_system_type ipathfs_fs_type = {
.owner = THIS_MODULE,
.name = "ipathfs",
- .get_sb = ipathfs_get_sb,
+ .mount = ipathfs_mount,
.kill_sb = ipathfs_kill_super,
};
+MODULE_ALIAS_FS("ipathfs");
int __init ipath_init_ipathfs(void)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 5c9b509e40e..7cc305488a3 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -36,12 +36,18 @@
* HT chip.
*/
+#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/htirq.h>
+#include <rdma/ib_verbs.h>
#include "ipath_kernel.h"
#include "ipath_registers.h"
+static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
+
+
/*
* This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
@@ -143,10 +149,57 @@ struct _infinipath_do_not_use_kernel_regs {
unsigned long long ReservedSW2[4];
};
-#define IPATH_KREG_OFFSET(field) (offsetof(struct \
- _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+struct _infinipath_do_not_use_counters {
+ __u64 LBIntCnt;
+ __u64 LBFlowStallCnt;
+ __u64 Reserved1;
+ __u64 TxUnsupVLErrCnt;
+ __u64 TxDataPktCnt;
+ __u64 TxFlowPktCnt;
+ __u64 TxDwordCnt;
+ __u64 TxLenErrCnt;
+ __u64 TxMaxMinLenErrCnt;
+ __u64 TxUnderrunCnt;
+ __u64 TxFlowStallCnt;
+ __u64 TxDroppedPktCnt;
+ __u64 RxDroppedPktCnt;
+ __u64 RxDataPktCnt;
+ __u64 RxFlowPktCnt;
+ __u64 RxDwordCnt;
+ __u64 RxLenErrCnt;
+ __u64 RxMaxMinLenErrCnt;
+ __u64 RxICRCErrCnt;
+ __u64 RxVCRCErrCnt;
+ __u64 RxFlowCtrlErrCnt;
+ __u64 RxBadFormatCnt;
+ __u64 RxLinkProblemCnt;
+ __u64 RxEBPCnt;
+ __u64 RxLPCRCErrCnt;
+ __u64 RxBufOvflCnt;
+ __u64 RxTIDFullErrCnt;
+ __u64 RxTIDValidErrCnt;
+ __u64 RxPKeyMismatchCnt;
+ __u64 RxP0HdrEgrOvflCnt;
+ __u64 RxP1HdrEgrOvflCnt;
+ __u64 RxP2HdrEgrOvflCnt;
+ __u64 RxP3HdrEgrOvflCnt;
+ __u64 RxP4HdrEgrOvflCnt;
+ __u64 RxP5HdrEgrOvflCnt;
+ __u64 RxP6HdrEgrOvflCnt;
+ __u64 RxP7HdrEgrOvflCnt;
+ __u64 RxP8HdrEgrOvflCnt;
+ __u64 Reserved6;
+ __u64 Reserved7;
+ __u64 IBStatusChangeCnt;
+ __u64 IBLinkErrRecoveryCnt;
+ __u64 IBLinkDownedCnt;
+ __u64 IBSymbolErrCnt;
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
#define IPATH_CREG_OFFSET(field) (offsetof( \
- struct infinipath_counters, field) / sizeof(u64))
+ struct _infinipath_do_not_use_counters, field) / sizeof(u64))
static const struct ipath_kregs ipath_ht_kregs = {
.kr_control = IPATH_KREG_OFFSET(Control),
@@ -207,8 +260,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
/*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port(),
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
*/
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -252,8 +305,10 @@ static const struct ipath_cregs ipath_ht_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 0
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 12
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -277,12 +332,29 @@ static const struct ipath_cregs ipath_ht_cregs = {
#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL
#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL
+#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
+#define IBA6110_IBCS_LINKSTATE_SHIFT 4
+
/* kr_extstatus bits */
#define INFINIPATH_EXTS_FREQSEL 0x2
#define INFINIPATH_EXTS_SERDESSEL 0x4
#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
+#define INFINIPATH_R_INTRAVAIL_SHIFT 16
+#define INFINIPATH_R_TAILUPD_SHIFT 31
+
+/* kr_xgxsconfig bits */
+#define INFINIPATH_XGXS_RESET 0x7ULL
+
/*
* masks and bits that are different in different chips, or present only
* in one
@@ -309,7 +381,7 @@ static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
#define IPATH_GPIO_SCL \
(1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-/* keep the code below somewhat more readonable; not used elsewhere */
+/* keep the code below somewhat more readable; not used elsewhere */
#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
infinipath_hwe_htclnkabyte1crcerr)
#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \
@@ -338,7 +410,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
if (crcbits) {
u16 ctrl0, ctrl1;
snprintf(bitsmsg, sizeof bitsmsg,
- "[HT%s lane %s CRC (%llx); ignore till reload]",
+ "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
!(crcbits & _IPATH_HTLINK1_CRCBITS) ?
"0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
? "1 (B)" : "0+1 (A+B)"),
@@ -389,17 +461,42 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
_IPATH_HTLINK1_CRCBITS)));
}
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+ INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+ INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+ INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+ << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static void ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+ ++ipath_stats.sps_txeparity;
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+}
+
+
/**
- * ipath_ht_handle_hwerrors - display hardware errors
+ * ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
* @msg: the output buffer
* @msgl: the size of the output buffer
*
- * Use same msg buffer as regular errors to avoid
- * excessive stack use. Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
*/
static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
size_t msgl)
@@ -408,6 +505,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
+ int log_idx;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
@@ -436,51 +534,47 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
hwerrs &= dd->ipath_hwerrmask;
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
/*
* make sure we get this much out, unless told to be quiet,
+ * it's a parity error we may recover from,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
- (ipath_debug & __IPATH_VERBDBG))
+ if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+ RXE_EAGER_PARITY)) ||
+ (ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if (ctrl & INFINIPATH_C_FREEZEMODE) {
- if (hwerrs) {
- /*
- * if any set that we aren't ignoring; only
- * make the complaint once, in case it's stuck
- * or recurring, and we get here multiple
- * times.
- */
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Hardware Error (freeze "
- "mode), no longer usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- /* mark as having had error */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- /*
- * mark as not usable, at a minimum until driver
- * is reloaded, probably until reboot, since no
- * other reset is possible.
- */
- dd->ipath_flags &= ~IPATH_INITTED;
- } else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & TXE_PIO_PARITY) {
+ ipath_ht_txe_recover(dd);
+ hwerrs &= ~TXE_PIO_PARITY;
+ }
+
+ if (!hwerrs) {
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
+ ipath_clear_freeze(dd);
}
}
@@ -499,44 +593,15 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
- strlcat(msg, "[HTC Ireq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
- strlcat(msg, "[HTC Treq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
- strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6110_hwerror_msgs,
+ ARRAY_SIZE(ipath_6110_hwerror_msgs),
+ msg, msgl);
if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
hwerr_crcbits(dd, hwerrs, msg, msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
- strlcat(msg, "[HT core Misc5]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
- strlcat(msg, "[HT core Misc6]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
- strlcat(msg, "[HT core Misc7]", msgl);
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
@@ -573,12 +638,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
- ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (hwerrs) {
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ * force link down, so switch knows, and
+ * LEDs are turned off
+ */
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_ht_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ /*
+ * mark as not usable, at a minimum until driver
+ * is reloaded, probably until reboot, since no
+ * other reset is possible.
+ */
+ dd->ipath_flags &= ~IPATH_INITTED;
+ }
+ else
+ *msg = 0; /* recovered from all of them */
+ if (*msg)
+ ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
* for status file; if no trailing brace is copied,
@@ -603,42 +695,22 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
{
char *n = NULL;
u8 boardrev = dd->ipath_boardrev;
- int ret;
+ int ret = 0;
switch (boardrev) {
- case 4: /* Ponderosa is one of the bringup boards */
- n = "Ponderosa";
- break;
case 5:
/*
* original production board; two production levels, with
* different serial number ranges. See ipath_ht_early_init() for
* case where we enable IPATH_GPIO_INTR for later serial # range.
+ * Original 112* serial number is no longer supported.
*/
n = "InfiniPath_QHT7040";
break;
- case 6:
- n = "OEM_Board_3";
- break;
case 7:
/* small form factor production board */
n = "InfiniPath_QHT7140";
break;
- case 8:
- n = "LS/X-1";
- break;
- case 9: /* Comstock bringup test board */
- n = "Comstock";
- break;
- case 10:
- n = "OEM_Board_2";
- break;
- case 11:
- n = "InfiniPath_HT-470"; /* obsoleted */
- break;
- case 12:
- n = "OEM_Board_4";
- break;
default: /* don't know, just print the number */
ipath_dev_err(dd, "Don't yet know about board "
"with ID %u\n", boardrev);
@@ -649,9 +721,14 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
if (n)
snprintf(name, namelen, "%s", n);
- if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+ if (ret) {
+ ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
+ goto bail;
+ }
+ if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+ dd->ipath_minrev > 4)) {
/*
- * This version of the driver only supports Rev 3.2 and 3.3
+ * This version of the driver only supports Rev 3.2 - 3.4
*/
ipath_dev_err(dd,
"Unsupported InfiniPath hardware revision %u.%u!\n",
@@ -665,37 +742,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
* copies
*/
dd->ipath_flags |= IPATH_32BITCOUNTERS;
- if (dd->ipath_htspeed != 800)
+ dd->ipath_flags |= IPATH_GPIO_INTR;
+ if (dd->ipath_lbus_speed != 800)
ipath_dev_err(dd,
"Incorrectly configured for HT @ %uMHz\n",
- dd->ipath_htspeed);
- if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
- dd->ipath_boardrev == 6)
- dd->ipath_flags |= IPATH_GPIO_INTR;
- else
- dd->ipath_flags |= IPATH_POLL_RX_INTR;
- if (dd->ipath_boardrev == 8) { /* LS/X-1 */
- u64 val;
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
- if (val & INFINIPATH_EXTS_SERDESSEL) {
- /*
- * hardware disabled
- *
- * This means that the chip is hardware disabled,
- * and will not be able to bring up the link,
- * in any case. We special case this and abort
- * early, to avoid later messages. We also set
- * the DISABLED status bit
- */
- ipath_dbg("Unit %u is hardware-disabled\n",
- dd->ipath_unit);
- *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
- /* this value is handled differently */
- ret = 2;
- goto bail;
- }
- }
- ret = 0;
+ dd->ipath_lbus_speed);
+
+ /*
+ * set here, not in ipath_init_*_funcs because we have to do
+ * it after we can read chip registers.
+ */
+ dd->ipath_ureg_align =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
bail:
return ret;
@@ -750,7 +808,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
* errors. We only bother to do this at load time, because it's OK if
* it happened before we were loaded (first time after boot/reset),
* but any time after that, it's fatal anyway. Also need to not check
- * for for upper byte errors if we are in 8 bit mode, so figure out
+ * for upper byte errors if we are in 8 bit mode, so figure out
* our width. For now, at least, also complain if it's 8 bit.
*/
static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
@@ -788,7 +846,7 @@ static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
/*
* now write them back to clear the error.
*/
- pci_write_config_byte(pdev, link_off,
+ pci_write_config_word(pdev, link_off,
linkctrl & (0xf << 8));
}
}
@@ -853,7 +911,7 @@ static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
break;
}
- dd->ipath_htwidth = width;
+ dd->ipath_lbus_width = width;
if (linkwidth != 0x11) {
ipath_dev_err(dd, "Not configured for 16 bit HT "
@@ -901,53 +959,49 @@ static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
speed = 200;
break;
}
- dd->ipath_htspeed = speed;
+ dd->ipath_lbus_speed = speed;
}
+
+ snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+ "HyperTransport,%uMHz,x%u\n",
+ dd->ipath_lbus_speed,
+ dd->ipath_lbus_width);
}
-static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
- int pos)
+static int ipath_ht_intconfig(struct ipath_devdata *dd)
{
- u32 int_handler_addr_lower;
- u32 int_handler_addr_upper;
- u64 ihandler;
- u32 intvec;
+ int ret;
+
+ if (dd->ipath_intconfig) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
+ dd->ipath_intconfig); /* interrupt address */
+ ret = 0;
+ } else {
+ ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
+ "interrupt address\n");
+ ret = -EINVAL;
+ }
- /* use indirection register to get the intr handler */
- pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x10);
- pci_read_config_dword(pdev, pos + 4, &int_handler_addr_lower);
- pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x11);
- pci_read_config_dword(pdev, pos + 4, &int_handler_addr_upper);
+ return ret;
+}
- ihandler = (u64) int_handler_addr_lower |
- ((u64) int_handler_addr_upper << 32);
+static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
+ struct ht_irq_msg *msg)
+{
+ struct ipath_devdata *dd = pci_get_drvdata(dev);
+ u64 prev_intconfig = dd->ipath_intconfig;
+
+ dd->ipath_intconfig = msg->address_lo;
+ dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
/*
- * kernels with CONFIG_PCI_MSI set the vector in the irq field of
- * struct pci_device, so we use that to program the internal
- * interrupt register (not config space) with that value. The BIOS
- * must still have done the basic MSI setup.
- */
- intvec = pdev->irq;
- /*
- * clear any vector bits there; normally not set but we'll overload
- * this for some debug purposes (setting the HTC debug register
- * value from software, rather than GPIOs), so it might be set on a
- * driver reload.
+ * If the previous value of dd->ipath_intconfig is zero, we're
+ * getting configured for the first time, and must not program the
+ * intconfig register here (it will be programmed later, when the
+ * hardware is ready). Otherwise, we should.
*/
- ihandler &= ~0xff0000;
- /* x86 vector goes in intrinfo[23:16] */
- ihandler |= intvec << 16;
- ipath_cdbg(VERBOSE, "ihandler lower %x, upper %x, intvec %x, "
- "interruptconfig %llx\n", int_handler_addr_lower,
- int_handler_addr_upper, intvec,
- (unsigned long long) ihandler);
-
- /* can't program yet, so save for interrupt setup */
- dd->ipath_intconfig = ihandler;
- /* keep going, so we find link control stuff also */
-
- return ihandler != 0;
+ if (prev_intconfig)
+ ipath_ht_intconfig(dd);
}
/**
@@ -963,12 +1017,19 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
static int ipath_setup_ht_config(struct ipath_devdata *dd,
struct pci_dev *pdev)
{
- int pos, ret = 0;
- int ihandler = 0;
+ int pos, ret;
+
+ ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Couldn't create interrupt handler: "
+ "err %d\n", ret);
+ goto bail;
+ }
+ dd->ipath_irq = ret;
+ ret = 0;
/*
- * Read the capability info to find the interrupt info, and also
- * handle clearing CRC errors in linkctrl register if necessary. We
+ * Handle clearing CRC errors in linkctrl register if necessary. We
* do this early, before we ever enable errors or hardware errors,
* mostly to avoid causing the chip to enter freeze mode.
*/
@@ -982,7 +1043,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
do {
u8 cap_type;
- /* the HT capability type byte is 3 bytes after the
+ /*
+ * The HT capability type byte is 3 bytes after the
* capability byte.
*/
if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
@@ -992,16 +1054,10 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
}
if (!(cap_type & 0xE0))
slave_or_pri_blk(dd, pdev, pos, cap_type);
- else if (cap_type == HT_INTR_DISC_CONFIG)
- ihandler = set_int_handler(dd, pdev, pos);
} while ((pos = pci_find_next_capability(pdev, pos,
PCI_CAP_ID_HT)));
- if (!ihandler) {
- ipath_dev_err(dd, "Couldn't find interrupt handler in "
- "config space\n");
- ret = -ENODEV;
- }
+ dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
bail:
return ret;
@@ -1044,12 +1100,24 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
u64 lst, u64 ltst)
{
u64 extctl;
+ unsigned long flags = 0;
/* the diags use the LED to indicate diag info, so we leave
* the external LED alone when the diags are running */
if (ipath_diag_inuse)
return;
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (dd->ipath_led_override) {
+ ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+ ? INFINIPATH_IBCS_LT_STATE_LINKUP
+ : INFINIPATH_IBCS_LT_STATE_DISABLED;
+ lst = (dd->ipath_led_override & IPATH_LED_LOG)
+ ? INFINIPATH_IBCS_L_STATE_ACTIVE
+ : INFINIPATH_IBCS_L_STATE_DOWN;
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
/*
* start by setting both LED control bits to off, then turn
* on the appropriate bit(s).
@@ -1078,23 +1146,68 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
}
dd->ipath_extctrl = extctl;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
}
-static void ipath_init_ht_variables(void)
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
{
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ /*
+ * setup the register offsets, since they are different for each
+ * chip
+ */
+ dd->ipath_kregs = &ipath_ht_kregs;
+ dd->ipath_cregs = &ipath_ht_cregs;
+
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
+
+ /*
+ * Fill in data for field-values that change in newer chips.
+ * We dynamically specify only the mask for LINKTRAININGSTATE
+ * and only the shift for LINKSTATE, as they are the only ones
+ * that change. Also precalculate the 3 link states of interest
+ * and the combined mask.
+ */
+ dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
+ dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
+ dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
+ dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
+ dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
+ dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
+ dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
- infinipath_i_bitsextant =
+ /*
+ * Fill in data for ibcc field-values that change in newer chips.
+ * We dynamically specify only the mask for LINKINITCMD
+ * and only the shift for LINKCMD and MAXPKTLEN, as they are
+ * the only ones that change.
+ */
+ dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
+ dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
+ dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+
+ /* Fill in shifts for RcvCtrl. */
+ dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
+ dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
+ dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
+ dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
+
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -1112,7 +1225,7 @@ static void ipath_init_ht_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
@@ -1141,8 +1254,34 @@ static void ipath_init_ht_variables(void)
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
+ dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
+
+ dd->delay_mult = 2; /* SDR, 4X, can't change */
+
+ dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ dd->ipath_link_speed_supported = IPATH_IB_SDR;
+ dd->ipath_link_width_enabled = IB_WIDTH_4X;
+ dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
+ /* these can't change for this chip, so set once */
+ dd->ipath_link_width_active = dd->ipath_link_width_enabled;
+ dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
}
/**
@@ -1164,6 +1303,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+ ipath_dbg("MemBIST corrected\n");
ipath_check_htlink(dd);
@@ -1195,14 +1336,16 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
val &= ~INFINIPATH_HWE_HTCMISCERR4;
/*
- * PLL ignored because MDIO interface has a logic problem
- * for reads, on Comstock and Ponderosa. BRINGUP
+ * PLL ignored because unused MDIO interface has a logic problem
*/
if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
dd->ipath_hwerrmask = val;
}
+
+
+
/**
* ipath_ht_bringup_serdes - bring up the serdes
* @dd: the infinipath device
@@ -1274,16 +1417,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
}
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
- INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
- val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
- INFINIPATH_XGXS_MDIOADDR_SHIFT);
- /*
- * we use address 3
- */
- val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
- change = 1;
- }
if (val & INFINIPATH_XGXS_RESET) {
/* normally true after boot */
val &= ~INFINIPATH_XGXS_RESET;
@@ -1319,21 +1452,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
(unsigned long long)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
- if (!ipath_waitfor_mdio_cmdready(dd)) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio,
- ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
- IPATH_MDIO_CTRL_XGXS_REG_8,
- 0));
- if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
- IPATH_MDIO_DATAVALID, &val))
- ipath_dbg("Never got MDIO data for XGXS status "
- "read\n");
- else
- ipath_cdbg(VERBOSE, "MDIO Read reg8, "
- "'bank' 31 %x\n", (u32) val);
- } else
- ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
-
return ret; /* for now, say we always succeeded */
}
@@ -1352,30 +1470,11 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
}
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
- int ret;
-
- if (!dd->ipath_intconfig) {
- ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
- "interrupt address\n");
- ret = 1;
- goto bail;
- }
-
- ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
- dd->ipath_intconfig); /* interrupt address */
- ret = 0;
-
-bail:
- return ret;
-}
-
/**
* ipath_pe_put_tid - write a TID in chip
* @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidptr: pointer to the expected TID (in chip) to update
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
* @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
*
* This exists as a separate routine to allow for special locking etc.
@@ -1386,6 +1485,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
u64 __iomem *tidptr, u32 type,
unsigned long pa)
{
+ if (!dd->ipath_kregbase)
+ return;
+
if (pa != dd->ipath_tidinvalid) {
if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
dev_info(&dd->pcidev->dev,
@@ -1393,7 +1495,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
"40 bits, using only 40!!!\n", pa);
pa &= INFINIPATH_RT_ADDR_MASK;
}
- if (type == 0)
+ if (type == RCVHQ_RCV_TYPE_EAGER)
pa |= dd->ipath_tidtemplate;
else {
/* in words (fixed, full page). */
@@ -1402,10 +1504,11 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
pa |= lenvalid | INFINIPATH_RT_VALID;
}
}
- if (dd->ipath_kregbase)
- writeq(pa, tidptr);
+
+ writeq(pa, tidptr);
}
+
/**
* ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
* @dd: the infinipath device
@@ -1433,7 +1536,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
port * dd->ipath_rcvtidcnt *
sizeof(*tidbase));
for (i = 0; i < dd->ipath_rcvtidcnt; i++)
- ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
+ ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ dd->ipath_tidinvalid);
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvegrbase +
@@ -1441,7 +1545,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
sizeof(*tidbase));
for (i = 0; i < dd->ipath_rcvegrcnt; i++)
- ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
+ ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ dd->ipath_tidinvalid);
}
/**
@@ -1467,7 +1572,7 @@ static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
static int ipath_ht_early_init(struct ipath_devdata *dd)
{
u32 __iomem *piobuf;
- u32 pioincr, val32, egrsize;
+ u32 pioincr, val32;
int i;
/*
@@ -1487,7 +1592,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
* errors interrupts if we ever see one).
*/
dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
- egrsize = dd->ipath_rcvegrbufsize;
/*
* the min() check here is currently a nop, but it may not
@@ -1529,44 +1633,271 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
writel(16, piobuf);
piobuf += pioincr;
}
- /*
- * self-clearing
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
ipath_get_eeprom_info(dd);
- if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
- dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
+ if (dd->ipath_boardrev == 5) {
/*
* Later production QHT7040 has same changes as QHT7140, so
* can use GPIO interrupts. They have serial #'s starting
* with 128, rather than 112.
*/
- dd->ipath_flags |= IPATH_GPIO_INTR;
- dd->ipath_flags &= ~IPATH_POLL_RX_INTR;
+ if (dd->ipath_serial[0] == '1' &&
+ dd->ipath_serial[1] == '2' &&
+ dd->ipath_serial[2] == '8')
+ dd->ipath_flags |= IPATH_GPIO_INTR;
+ else {
+ ipath_dev_err(dd, "Unsupported InfiniPath board "
+ "(serial number %.16s)!\n",
+ dd->ipath_serial);
+ return 1;
+ }
+ }
+
+ if (dd->ipath_minrev >= 4) {
+ /* Rev4+ reports extra errors via internal GPIO pins */
+ dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+ dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
}
+
return 0;
}
+
/**
* ipath_init_ht_get_base_info - set chip-specific flags for user code
* @dd: the infinipath device
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
+ * HyperTransport can affect some user packet algorithms.
*/
static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
{
struct ipath_base_info *kinfo = kbase;
kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
- IPATH_RUNTIME_RCVHDR_COPY;
+ IPATH_RUNTIME_PIO_REGSWAPPED;
+
+ if (pd->port_dd->ipath_minrev < 4)
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
+
+ return 0;
+}
+
+static void ipath_ht_free_irq(struct ipath_devdata *dd)
+{
+ free_irq(dd->ipath_irq, dd);
+ ht_destroy_irq(dd->ipath_irq);
+ dd->ipath_irq = 0;
+ dd->ipath_intconfig = 0;
+}
+
+static struct ipath_message_header *
+ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
+{
+ return (struct ipath_message_header *)
+ &rhf_addr[sizeof(u64) / sizeof(u32)];
+}
+
+static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
+{
+ dd->ipath_portcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
+ dd->ipath_p0_rcvegrcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
+}
+
+static void ipath_ht_read_counters(struct ipath_devdata *dd,
+ struct infinipath_counters *cntrs)
+{
+ cntrs->LBIntCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
+ cntrs->LBFlowStallCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
+ cntrs->TxSDmaDescCnt = 0;
+ cntrs->TxUnsupVLErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
+ cntrs->TxDataPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
+ cntrs->TxFlowPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
+ cntrs->TxDwordCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
+ cntrs->TxLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
+ cntrs->TxMaxMinLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
+ cntrs->TxUnderrunCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
+ cntrs->TxFlowStallCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
+ cntrs->TxDroppedPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
+ cntrs->RxDroppedPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
+ cntrs->RxDataPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
+ cntrs->RxFlowPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
+ cntrs->RxDwordCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
+ cntrs->RxLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
+ cntrs->RxMaxMinLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
+ cntrs->RxICRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
+ cntrs->RxVCRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
+ cntrs->RxFlowCtrlErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
+ cntrs->RxBadFormatCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
+ cntrs->RxLinkProblemCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
+ cntrs->RxEBPCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
+ cntrs->RxLPCRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
+ cntrs->RxBufOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
+ cntrs->RxTIDFullErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
+ cntrs->RxTIDValidErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
+ cntrs->RxPKeyMismatchCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
+ cntrs->RxP0HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
+ cntrs->RxP1HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
+ cntrs->RxP2HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
+ cntrs->RxP3HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
+ cntrs->RxP4HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
+ cntrs->RxP5HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
+ cntrs->RxP6HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
+ cntrs->RxP7HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
+ cntrs->RxP8HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
+ cntrs->RxP9HdrEgrOvflCnt = 0;
+ cntrs->RxP10HdrEgrOvflCnt = 0;
+ cntrs->RxP11HdrEgrOvflCnt = 0;
+ cntrs->RxP12HdrEgrOvflCnt = 0;
+ cntrs->RxP13HdrEgrOvflCnt = 0;
+ cntrs->RxP14HdrEgrOvflCnt = 0;
+ cntrs->RxP15HdrEgrOvflCnt = 0;
+ cntrs->RxP16HdrEgrOvflCnt = 0;
+ cntrs->IBStatusChangeCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
+ cntrs->IBLinkErrRecoveryCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
+ cntrs->IBLinkDownedCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
+ cntrs->IBSymbolErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
+ cntrs->RxVL15DroppedPktCnt = 0;
+ cntrs->RxOtherLocalPhyErrCnt = 0;
+ cntrs->PcieRetryBufDiagQwordCnt = 0;
+ cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
+ cntrs->LocalLinkIntegrityErrCnt =
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors;
+ cntrs->RxVlErrCnt = 0;
+ cntrs->RxDlidFltrCnt = 0;
+}
+
+
+/* no interrupt fallback for these chips */
+static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+
+/*
+ * reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well.
+ */
+static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
+{
+ u64 val, prev_val;
+
+ prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val = prev_val | INFINIPATH_XGXS_RESET;
+ prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+}
+
+
+static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
+{
+ int ret;
+
+ switch (which) {
+ case IPATH_IB_CFG_LWID:
+ ret = dd->ipath_link_width_active;
+ break;
+ case IPATH_IB_CFG_SPD:
+ ret = dd->ipath_link_speed_active;
+ break;
+ case IPATH_IB_CFG_LWID_ENB:
+ ret = dd->ipath_link_width_enabled;
+ break;
+ case IPATH_IB_CFG_SPD_ENB:
+ ret = dd->ipath_link_speed_enabled;
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+/* we assume range checking is already done, if needed */
+static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
+{
+ int ret = 0;
+
+ if (which == IPATH_IB_CFG_LWID_ENB)
+ dd->ipath_link_width_enabled = val;
+ else if (which == IPATH_IB_CFG_SPD_ENB)
+ dd->ipath_link_speed_enabled = val;
+ else
+ ret = -ENOTSUPP;
+ return ret;
+}
+
+
+static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
+{
+}
+
+
+static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
+{
+ ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
+ ipath_ib_linktrstate(dd, ibcs));
return 0;
}
+
/**
* ipath_init_iba6110_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
@@ -1590,22 +1921,20 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
dd->ipath_f_setextled = ipath_setup_ht_setextled;
dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-
- /*
- * initialize chip-specific variables
- */
+ dd->ipath_f_free_irq = ipath_ht_free_irq;
dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
+ dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
+ dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
+ dd->ipath_f_config_ports = ipath_ht_config_ports;
+ dd->ipath_f_read_counters = ipath_ht_read_counters;
+ dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
+ dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
+ dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
+ dd->ipath_f_config_jint = ipath_ht_config_jint;
+ dd->ipath_f_ib_updown = ipath_ht_ib_updown;
/*
- * setup the register offsets, since they are different for each
- * chip
- */
- dd->ipath_kregs = &ipath_ht_kregs;
- dd->ipath_cregs = &ipath_ht_cregs;
-
- /*
- * do very early init that is needed before ipath_f_bus is
- * called
+ * initialize chip-specific variables
*/
- ipath_init_ht_variables();
+ ipath_init_ht_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
deleted file mode 100644
index d86516d23df..00000000000
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/*
- * This file contains all of the code that is specific to the
- * InfiniPath PCIe chip.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-/*
- * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PCI-Express chip.
- *
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- */
-struct _infinipath_do_not_use_kernel_regs {
- unsigned long long Revision;
- unsigned long long Control;
- unsigned long long PageAlign;
- unsigned long long PortCnt;
- unsigned long long DebugPortSelect;
- unsigned long long Reserved0;
- unsigned long long SendRegBase;
- unsigned long long UserRegBase;
- unsigned long long CounterRegBase;
- unsigned long long Scratch;
- unsigned long long Reserved1;
- unsigned long long Reserved2;
- unsigned long long IntBlocked;
- unsigned long long IntMask;
- unsigned long long IntStatus;
- unsigned long long IntClear;
- unsigned long long ErrorMask;
- unsigned long long ErrorStatus;
- unsigned long long ErrorClear;
- unsigned long long HwErrMask;
- unsigned long long HwErrStatus;
- unsigned long long HwErrClear;
- unsigned long long HwDiagCtrl;
- unsigned long long MDIO;
- unsigned long long IBCStatus;
- unsigned long long IBCCtrl;
- unsigned long long ExtStatus;
- unsigned long long ExtCtrl;
- unsigned long long GPIOOut;
- unsigned long long GPIOMask;
- unsigned long long GPIOStatus;
- unsigned long long GPIOClear;
- unsigned long long RcvCtrl;
- unsigned long long RcvBTHQP;
- unsigned long long RcvHdrSize;
- unsigned long long RcvHdrCnt;
- unsigned long long RcvHdrEntSize;
- unsigned long long RcvTIDBase;
- unsigned long long RcvTIDCnt;
- unsigned long long RcvEgrBase;
- unsigned long long RcvEgrCnt;
- unsigned long long RcvBufBase;
- unsigned long long RcvBufSize;
- unsigned long long RxIntMemBase;
- unsigned long long RxIntMemSize;
- unsigned long long RcvPartitionKey;
- unsigned long long Reserved3;
- unsigned long long RcvPktLEDCnt;
- unsigned long long Reserved4[8];
- unsigned long long SendCtrl;
- unsigned long long SendPIOBufBase;
- unsigned long long SendPIOSize;
- unsigned long long SendPIOBufCnt;
- unsigned long long SendPIOAvailAddr;
- unsigned long long TxIntMemBase;
- unsigned long long TxIntMemSize;
- unsigned long long Reserved5;
- unsigned long long PCIeRBufTestReg0;
- unsigned long long PCIeRBufTestReg1;
- unsigned long long Reserved51[6];
- unsigned long long SendBufferError;
- unsigned long long SendBufferErrorCONT1;
- unsigned long long Reserved6SBE[6];
- unsigned long long RcvHdrAddr0;
- unsigned long long RcvHdrAddr1;
- unsigned long long RcvHdrAddr2;
- unsigned long long RcvHdrAddr3;
- unsigned long long RcvHdrAddr4;
- unsigned long long Reserved7RHA[11];
- unsigned long long RcvHdrTailAddr0;
- unsigned long long RcvHdrTailAddr1;
- unsigned long long RcvHdrTailAddr2;
- unsigned long long RcvHdrTailAddr3;
- unsigned long long RcvHdrTailAddr4;
- unsigned long long Reserved8RHTA[11];
- unsigned long long Reserved9SW[8];
- unsigned long long SerdesConfig0;
- unsigned long long SerdesConfig1;
- unsigned long long SerdesStatus;
- unsigned long long XGXSConfig;
- unsigned long long IBPLLCfg;
- unsigned long long Reserved10SW2[3];
- unsigned long long PCIEQ0SerdesConfig0;
- unsigned long long PCIEQ0SerdesConfig1;
- unsigned long long PCIEQ0SerdesStatus;
- unsigned long long Reserved11;
- unsigned long long PCIEQ1SerdesConfig0;
- unsigned long long PCIEQ1SerdesConfig1;
- unsigned long long PCIEQ1SerdesStatus;
- unsigned long long Reserved12;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof(struct \
- _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
- struct infinipath_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_pe_kregs = {
- .kr_control = IPATH_KREG_OFFSET(Control),
- .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
- .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
- .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
- .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
- .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
- .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
- .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
- .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
- .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
- .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
- .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
- .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
- .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
- .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
- .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
- .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
- .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
- .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
- .kr_intclear = IPATH_KREG_OFFSET(IntClear),
- .kr_intmask = IPATH_KREG_OFFSET(IntMask),
- .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
- .kr_mdio = IPATH_KREG_OFFSET(MDIO),
- .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
- .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
- .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
- .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
- .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
- .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
- .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
- .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
- .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
- .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
- .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
- .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
- .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
- .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
- .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
- .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
- .kr_revision = IPATH_KREG_OFFSET(Revision),
- .kr_scratch = IPATH_KREG_OFFSET(Scratch),
- .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
- .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
- .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
- .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
- .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
- .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
- .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
- .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
- .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
- .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
- .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
- .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
- .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
- .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
- .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
-
- /*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port()
- */
- .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
- .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
-
- /* The rcvpktled register controls one of the debug port signals, so
- * a packet activity LED can be connected to it. */
- .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
- .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
- .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
- .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0),
- .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1),
- .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus),
- .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0),
- .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1),
- .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus)
-};
-
-static const struct ipath_cregs ipath_pe_cregs = {
- .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
- .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
- .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
- .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
- .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
- .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
- .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
- .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
- .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
- .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
- .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
- .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
- .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
- .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
- .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
- .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
- .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
- .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
- .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
- .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
- .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
- .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
- .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
- .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
- .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
- .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
- .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
- .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
- .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
- .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
- .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
- .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
- .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
-#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
-#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
-#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_FOUND 0x0000000000008000
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA (1ULL << \
- (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL (1ULL << \
- (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/**
- * ipath_pe_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use. Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue. We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
- size_t msgl)
-{
- ipath_err_t hwerrs;
- u32 bits, ctrl;
- int isfatal = 0;
- char bitsmsg[64];
-
- hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
- if (!hwerrs) {
- /*
- * better than printing cofusing messages
- * This seems to be related to clearing the crc error, or
- * the pll error during init.
- */
- ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
- return;
- } else if (hwerrs == ~0ULL) {
- ipath_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
- return;
- }
- ipath_stats.sps_hwerrs++;
-
- /* Always clear the error status register, except MEMBISTFAIL,
- * regardless of whether we continue or stop using the chip.
- * We want that set so we know it failed, even across driver reload.
- * We'll still ignore it in the hwerrmask. We do this partly for
- * diagnostics, but also for support */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
- hwerrs &= dd->ipath_hwerrmask;
-
- /*
- * make sure we get this much out, unless told to be quiet,
- * or it's occurred within the last 5 seconds
- */
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
- (ipath_debug & __IPATH_VERBDBG))
- dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
- dd->ipath_lasthwerror |= hwerrs;
-
- if (hwerrs & ~infinipath_hwe_bitsextant)
- ipath_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
-
- ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if (ctrl & INFINIPATH_C_FREEZEMODE) {
- if (hwerrs) {
- /*
- * if any set that we aren't ignoring only make the
- * complaint once, in case it's stuck or recurring,
- * and we get here multiple times
- */
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Hardware Error (freeze "
- "mode), no longer usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- /*
- * Mark as having had an error for driver, and also
- * for /sys and status word mapped to user programs.
- * This marks unit as not usable, until reset
- */
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- dd->ipath_flags &= ~IPATH_INITTED;
- } else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
- }
- }
-
- *msg = '\0';
-
- if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
- msgl);
- /* ignore from now on, so disable until driver reloaded */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
- << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg,
- "[PCIe Mem Parity Errs %x] ", bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
-
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
- INFINIPATH_HWE_COREPLL_RFSLIP )
-
- if (hwerrs & _IPATH_PLL_FAIL) {
- snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), InfiniPath hardware unusable]",
- (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
- strlcat(msg, bitsmsg, msgl);
- /* ignore from now on, so disable until driver reloaded */
- dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
- /*
- * If it occurs, it is left masked since the eternal
- * interface is unused
- */
- dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
- strlcat(msg, "[PCIe Poisoned TLP]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
- strlcat(msg, "[PCIe completion timeout]", msgl);
-
- /*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
- * parity or memory parity error failures, because most likely we
- * won't be able to talk to the core of the chip. Nonetheless, we
- * might see them, if they are in parts of the PCIe core that aren't
- * essential.
- */
- if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
- strlcat(msg, "[PCIePLL1]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
- strlcat(msg, "[PCIePLL0]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
- strlcat(msg, "[PCIe XTLH core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
- strlcat(msg, "[PCIe ADM TX core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
- strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
- ipath_dev_err(dd, "%s hardware error\n", msg);
- if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
- /*
- * for /sys status file ; if no trailing } is copied, we'll
- * know it was truncated.
- */
- snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
- "{%s}", msg);
- }
-}
-
-/**
- * ipath_pe_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * info is based on the board revision register
- */
-static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen)
-{
- char *n = NULL;
- u8 boardrev = dd->ipath_boardrev;
- int ret;
-
- switch (boardrev) {
- case 0:
- n = "InfiniPath_Emulation";
- break;
- case 1:
- n = "InfiniPath_QLE7140-Bringup";
- break;
- case 2:
- n = "InfiniPath_QLE7140";
- break;
- case 3:
- n = "InfiniPath_QMI7140";
- break;
- case 4:
- n = "InfiniPath_QEM7140";
- break;
- case 5:
- n = "InfiniPath_QMH7140";
- break;
- default:
- ipath_dev_err(dd,
- "Don't yet know about board with ID %u\n",
- boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
- boardrev);
- break;
- }
- if (n)
- snprintf(name, namelen, "%s", n);
-
- if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
- ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
- dd->ipath_majrev, dd->ipath_minrev);
- ret = 1;
- } else
- ret = 0;
-
- return ret;
-}
-
-/**
- * ipath_pe_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
-{
- ipath_err_t val;
- u64 extsval;
-
- extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
- if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
- ipath_dev_err(dd, "MemBIST did not complete!\n");
-
- val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
-
- if (!dd->ipath_boardrev) // no PLL for Emulator
- val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-
- /* workaround bug 9460 in internal interface bus parity checking */
- val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
- dd->ipath_hwerrmask = val;
-}
-
-/**
- * ipath_pe_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
-{
- u64 val, tmp, config1;
- int ret = 0, change = 0;
-
- ipath_dbg("Trying to bringup serdes\n");
-
- if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
- INFINIPATH_HWE_SERDESPLLFAILED) {
- ipath_dbg("At start, serdes PLL failed bit set "
- "in hwerrstatus, clearing and continuing\n");
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- INFINIPATH_HWE_SERDESPLLFAILED);
- }
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
- config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
- ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, "
- "xgxsconfig %llx\n", (unsigned long long) val,
- (unsigned long long) config1, (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
- /*
- * Force reset on, also set rxdetect enable. Must do before reading
- * serdesstatus at least for simulation, or some of the bits in
- * serdes status will come back as undefined and cause simulation
- * failures
- */
- val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN
- | INFINIPATH_SERDC0_L1PWR_DN;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
- /* be sure chip saw it */
- tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- udelay(5); /* need pll reset set at least for a bit */
- /*
- * after PLL is reset, set the per-lane Resets and TxIdle and
- * clear the PLL reset and rxdetect (to get falling edge).
- * Leave L1PWR bits set (permanently)
- */
- val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL
- | INFINIPATH_SERDC0_L1PWR_DN);
- val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE;
- ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets "
- "and txidle (%llx)\n", (unsigned long long) val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
- /* be sure chip saw it */
- tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- /* need PLL reset clear for at least 11 usec before lane
- * resets cleared; give it a few more to be sure */
- udelay(15);
- val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE);
-
- ipath_cdbg(VERBOSE, "Clearing lane resets and txidle "
- "(writing %llx)\n", (unsigned long long) val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
- /* be sure chip saw it */
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
- INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
- val &=
- ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
- INFINIPATH_XGXS_MDIOADDR_SHIFT);
- /* MDIO address 3 */
- val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
- change = 1;
- }
- if (val & INFINIPATH_XGXS_RESET) {
- val &= ~INFINIPATH_XGXS_RESET;
- change = 1;
- }
- if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
- INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
- /* need to compensate for Tx inversion in partner */
- val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
- INFINIPATH_XGXS_RX_POL_SHIFT);
- val |= dd->ipath_rx_pol_inv <<
- INFINIPATH_XGXS_RX_POL_SHIFT;
- change = 1;
- }
- if (change)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
- /* clear current and de-emphasis bits */
- config1 &= ~0x0ffffffff00ULL;
- /* set current to 20ma */
- config1 |= 0x00000000000ULL;
- /* set de-emphasis to -5.68dB */
- config1 |= 0x0cccc000000ULL;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
- ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx "
- "config1=%llx, sstatus=%llx xgxs=%llx\n",
- (unsigned long long) val, (unsigned long long) config1,
- (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
- (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
- if (!ipath_waitfor_mdio_cmdready(dd)) {
- ipath_write_kreg(
- dd, dd->ipath_kregs->kr_mdio,
- ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
- IPATH_MDIO_CTRL_XGXS_REG_8, 0));
- if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
- IPATH_MDIO_DATAVALID, &val))
- ipath_dbg("Never got MDIO data for XGXS "
- "status read\n");
- else
- ipath_cdbg(VERBOSE, "MDIO Read reg8, "
- "'bank' 31 %x\n", (u32) val);
- } else
- ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
-
- return ret;
-}
-
-/**
- * ipath_pe_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * Called when driver is being unloaded
- */
-static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
-{
- u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
- val |= INFINIPATH_SERDC0_TXIDLE;
- ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
- (unsigned long long) val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/* this is not yet needed on this chip, so just return 0. */
-static int ipath_pe_intconfig(struct ipath_devdata *dd)
-{
- return 0;
-}
-
-/**
- * ipath_setup_pe_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
-
- * These LEDs indicate the physical and logical state of IB link.
- * For this chip (at least with recommended board pinouts), LED1
- * is Yellow (logical state) and LED2 is Green (physical state),
- *
- * Note: We try to match the Mellanox HCA LED behavior as best
- * we can. Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate. That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
- u64 ltst)
-{
- u64 extctl;
-
- /* the diags use the LED to indicate diag info, so we leave
- * the external LED alone when the diags are running */
- if (ipath_diag_inuse)
- return;
-
- extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
- INFINIPATH_EXTC_LED2PRIPORT_ON);
-
- if (ltst & INFINIPATH_IBCS_LT_STATE_LINKUP)
- extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
- if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
- extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
- dd->ipath_extctrl = extctl;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-}
-
-/**
- * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * This is called during driver unload.
- * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT chips. If we do end up needing pci_enable_msi
- * at some point in the future for HT, we'll move the call back
- * into the main init_one code.
- */
-static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
-{
- dd->ipath_msi_lo = 0; /* just in case unload fails */
- pci_disable_msi(dd->pcidev);
-}
-
-/**
- * ipath_setup_pe_config - setup PCIe config related stuff
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * The pci_enable_msi() call will fail on systems with MSI quirks
- * such as those with AMD8131, even if the device of interest is not
- * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
- * late in 2.6.16).
- * All that can be done is to edit the kernel source to remove the quirk
- * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip,
- * even though it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for PCIe. If we do end up needing
- * pci_enable_msi at some point in the future for HT, we'll move the
- * call back into the main init_one code.
- * We save the msi lo and hi values, so we can restore them after
- * chip reset (the kernel PCI infrastructure doesn't yet handle that
- * correctly).
- */
-static int ipath_setup_pe_config(struct ipath_devdata *dd,
- struct pci_dev *pdev)
-{
- int pos, ret;
-
- dd->ipath_msi_lo = 0; /* used as a flag during reset processing */
- ret = pci_enable_msi(dd->pcidev);
- if (ret)
- ipath_dev_err(dd, "pci_enable_msi failed: %d, "
- "interrupts may not work\n", ret);
- /* continue even if it fails, we may still be OK... */
-
- if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
- u16 control;
- pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
- &dd->ipath_msi_lo);
- pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
- &dd->ipath_msi_hi);
- pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
- &control);
- /* now save the data (vector) info */
- pci_read_config_word(dd->pcidev,
- pos + ((control & PCI_MSI_FLAGS_64BIT)
- ? 12 : 8),
- &dd->ipath_msi_data);
- ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset "
- "0x%x, control=0x%x\n", dd->ipath_msi_data,
- pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
- control);
- /* we save the cachelinesize also, although it doesn't
- * really matter */
- pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
- &dd->ipath_pci_cacheline);
- } else
- ipath_dev_err(dd, "Can't find MSI capability, "
- "can't save MSI settings for reset\n");
- if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP))) {
- u16 linkstat;
- pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
- &linkstat);
- linkstat >>= 4;
- linkstat &= 0x1f;
- if (linkstat != 8)
- ipath_dev_err(dd, "PCIe width %u, "
- "performance reduced\n", linkstat);
- }
- else
- ipath_dev_err(dd, "Can't find PCI Express "
- "capability!\n");
- return 0;
-}
-
-static void ipath_init_pe_variables(void)
-{
- /*
- * bits for selecting i2c direction and values,
- * used for I2C serial flash
- */
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
-
- /* variables for sanity checking interrupt and errors */
- infinipath_hwe_bitsextant =
- (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
- (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
- INFINIPATH_HWE_PCIE1PLLFAILED |
- INFINIPATH_HWE_PCIE0PLLFAILED |
- INFINIPATH_HWE_PCIEPOISONEDTLP |
- INFINIPATH_HWE_PCIECPLTIMEOUT |
- INFINIPATH_HWE_PCIEBUSPARITYXTLH |
- INFINIPATH_HWE_PCIEBUSPARITYXADM |
- INFINIPATH_HWE_PCIEBUSPARITYRADM |
- INFINIPATH_HWE_MEMBISTFAILED |
- INFINIPATH_HWE_COREPLL_FBSLIP |
- INFINIPATH_HWE_COREPLL_RFSLIP |
- INFINIPATH_HWE_SERDESPLLFAILED |
- INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
- INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_bitsextant =
- (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
- (INFINIPATH_I_RCVAVAIL_MASK <<
- INFINIPATH_I_RCVAVAIL_SHIFT) |
- INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
- INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
- INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
- INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
- INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
- INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
- INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
- INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
- INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
- INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
- INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
- INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
- INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
- INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
- INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
- INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
- INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
- INFINIPATH_E_HARDWARE;
-
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-}
-
-/* setup the MSI stuff again after a reset. I'd like to just call
- * pci_enable_msi() and request_irq() again, but when I do that,
- * the MSI enable bit doesn't get set in the command word, and
- * we switch to to a different interrupt vector, which is confusing,
- * so I instead just do it all inline. Perhaps somehow can tie this
- * into the PCIe hotplug support at some point
- * Note, because I'm doing it all here, I don't call pci_disable_msi()
- * or free_irq() at the start of ipath_setup_pe_reset().
- */
-static int ipath_reinit_msi(struct ipath_devdata *dd)
-{
- int pos;
- u16 control;
- int ret;
-
- if (!dd->ipath_msi_lo) {
- dev_info(&dd->pcidev->dev, "Can't restore MSI config, "
- "initial setup failed?\n");
- ret = 0;
- goto bail;
- }
-
- if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
- ipath_dev_err(dd, "Can't find MSI capability, "
- "can't restore MSI settings\n");
- ret = 0;
- goto bail;
- }
- ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
- dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
- pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
- dd->ipath_msi_lo);
- ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
- dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
- pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
- dd->ipath_msi_hi);
- pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
- if (!(control & PCI_MSI_FLAGS_ENABLE)) {
- ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
- "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
- control, control | PCI_MSI_FLAGS_ENABLE);
- control |= PCI_MSI_FLAGS_ENABLE;
- pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
- control);
- }
- /* now rewrite the data (vector) info */
- pci_write_config_word(dd->pcidev, pos +
- ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
- dd->ipath_msi_data);
- /* we restore the cachelinesize also, although it doesn't really
- * matter */
- pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
- dd->ipath_pci_cacheline);
- /* and now set the pci master bit again */
- pci_set_master(dd->pcidev);
- ret = 1;
-
-bail:
- return ret;
-}
-
-/* This routine sleeps, so it can only be called from user context, not
- * from interrupt context. If we need interrupt context, we can split
- * it into two routines.
-*/
-static int ipath_setup_pe_reset(struct ipath_devdata *dd)
-{
- u64 val;
- int i;
- int ret;
-
- /* Use ERROR so it shows up in logs, etc. */
- ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
- /* keep chip from being accessed in a few places */
- dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
- val = dd->ipath_control | INFINIPATH_C_RESET;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
- mb();
-
- for (i = 1; i <= 5; i++) {
- int r;
- /* allow MBIST, etc. to complete; longer on each retry.
- * We sometimes get machine checks from bus timeout if no
- * response, so for now, make it *really* long.
- */
- msleep(1000 + (1 + i) * 2000);
- if ((r =
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
- dd->ipath_pcibar0)))
- ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n",
- r);
- if ((r =
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
- dd->ipath_pcibar1)))
- ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
- r);
- /* now re-enable memory access */
- if ((r = pci_enable_device(dd->pcidev)))
- ipath_dev_err(dd, "pci_enable_device failed after "
- "reset: %d\n", r);
- /* whether it worked or not, mark as present, again */
- dd->ipath_flags |= IPATH_PRESENT;
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
- if (val == dd->ipath_revision) {
- ipath_cdbg(VERBOSE, "Got matching revision "
- "register %llx on try %d\n",
- (unsigned long long) val, i);
- ret = ipath_reinit_msi(dd);
- goto bail;
- }
- /* Probably getting -1 back */
- ipath_dbg("Didn't get expected revision register, "
- "got %llx, try %d\n", (unsigned long long) val,
- i + 1);
- }
- ret = 0; /* failed */
-
-bail:
- return ret;
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
- u32 type, unsigned long pa)
-{
- u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
- unsigned long flags = 0; /* keep gcc quiet */
-
- if (pa != dd->ipath_tidinvalid) {
- if (pa & ((1U << 11) - 1)) {
- dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
- "not 4KB aligned!\n", pa);
- return;
- }
- pa >>= 11;
- /* paranoia check */
- if (pa & (7<<29))
- ipath_dev_err(dd,
- "BUG: Physical page address 0x%lx "
- "has bits set in 31-29\n", pa);
-
- if (type == 0)
- pa |= dd->ipath_tidtemplate;
- else /* for now, always full 4KB page */
- pa |= 2 << 29;
- }
-
- /* workaround chip bug 9437 by writing each TID twice
- * and holding a spinlock around the writes, so they don't
- * intermix with other TID (eager or expected) writes
- * Unfortunately, this call can be done from interrupt level
- * for the port 0 eager TIDs, so we have to use irqsave
- */
- spin_lock_irqsave(&dd->ipath_tid_lock, flags);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
- if (dd->ipath_kregbase)
- writel(pa, tidp32);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
- mmiowb();
- spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
-}
-
-/**
- * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * clear all TID entries for a port, expected and eager.
- * Used from ipath_close(). On this chip, TIDs are only 32 bits,
- * not 64, but they are still on 64 bit boundaries, so tidbase
- * is declared as u64 * for the pointer math, even though we write 32 bits
- */
-static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
- u64 __iomem *tidbase;
- unsigned long tidinv;
- int i;
-
- if (!dd->ipath_kregbase)
- return;
-
- ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
- tidinv = dd->ipath_tidinvalid;
- tidbase = (u64 __iomem *)
- ((char __iomem *)(dd->ipath_kregbase) +
- dd->ipath_rcvtidbase +
- port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
-
- for (i = 0; i < dd->ipath_rcvtidcnt; i++)
- ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
-
- tidbase = (u64 __iomem *)
- ((char __iomem *)(dd->ipath_kregbase) +
- dd->ipath_rcvegrbase +
- port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
-
- for (i = 0; i < dd->ipath_rcvegrcnt; i++)
- ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
-}
-
-/**
- * ipath_pe_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
-{
- u32 egrsize = dd->ipath_rcvegrbufsize;
-
- /* For now, we always allocate 4KB buffers (at init) so we can
- * receive max size packets. We may want a module parameter to
- * specify 2KB or 4KB and/or make be per port instead of per device
- * for those who want to reduce memory footprint. Note that the
- * ipath_rcvhdrentsize size must be large enough to hold the largest
- * IB header (currently 96 bytes) that we expect to handle (plus of
- * course the 2 dwords of RHF).
- */
- if (egrsize == 2048)
- dd->ipath_tidtemplate = 1U << 29;
- else if (egrsize == 4096)
- dd->ipath_tidtemplate = 2U << 29;
- else {
- egrsize = 4096;
- dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
- "%u, using %u\n", dd->ipath_rcvegrbufsize,
- egrsize);
- dd->ipath_tidtemplate = 2U << 29;
- }
- dd->ipath_tidinvalid = 0;
-}
-
-static int ipath_pe_early_init(struct ipath_devdata *dd)
-{
- dd->ipath_flags |= IPATH_4BYTE_TID;
-
- /*
- * For openfabrics, we need to be able to handle an IB header of
- * 24 dwords. HT chip has arbitrary sized receive buffers, so we
- * made them the same size as the PIO buffers. This chip does not
- * handle arbitrary size buffers, so we need the header large enough
- * to handle largest IB header, but still have room for a 2KB MTU
- * standard IB packet.
- */
- dd->ipath_rcvhdrentsize = 24;
- dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
- /*
- * To truly support a 4KB MTU (for usermode), we need to
- * bump this to a larger value. For now, we use them for
- * the kernel only.
- */
- dd->ipath_rcvegrbufsize = 2048;
- /*
- * the min() check here is currently a nop, but it may not always
- * be, depending on just how we do ipath_rcvegrbufsize
- */
- dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
- dd->ipath_rcvegrbufsize +
- (dd->ipath_rcvhdrentsize << 2));
- dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-
- /*
- * We can request a receive interrupt for 1 or
- * more packets from current offset. For now, we set this
- * up for a single packet.
- */
- dd->ipath_rhdrhead_intr_off = 1ULL<<32;
-
- ipath_get_eeprom_info(dd);
-
- return 0;
-}
-
-int __attribute__((weak)) ipath_unordered_wc(void)
-{
- return 0;
-}
-
-/**
- * ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
- */
-static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
- struct ipath_base_info *kinfo = kbase;
-
- if (ipath_unordered_wc()) {
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
- ipath_cdbg(PROC, "Intel processor, forcing WC order\n");
- }
- else
- ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
-
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
-
- return 0;
-}
-
-/**
- * ipath_init_iba6120_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
-{
- dd->ipath_f_intrsetup = ipath_pe_intconfig;
- dd->ipath_f_bus = ipath_setup_pe_config;
- dd->ipath_f_reset = ipath_setup_pe_reset;
- dd->ipath_f_get_boardname = ipath_pe_boardname;
- dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors;
- dd->ipath_f_early_init = ipath_pe_early_init;
- dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors;
- dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
- dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
- dd->ipath_f_clear_tids = ipath_pe_clear_tids;
- dd->ipath_f_put_tid = ipath_pe_put_tid;
- dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
- dd->ipath_f_setextled = ipath_setup_pe_setextled;
- dd->ipath_f_get_base_info = ipath_pe_get_base_info;
-
- /* initialize chip-specific variables */
- dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
-
- /*
- * setup the register offsets, since they are different for each
- * chip
- */
- dd->ipath_kregs = &ipath_pe_kregs;
- dd->ipath_cregs = &ipath_pe_cregs;
-
- ipath_init_pe_variables();
-}
-
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 44669dc2e22..be2a60e142b 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,6 +33,9 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
@@ -41,7 +44,7 @@
/*
* min buffers we want to have per port, after driver
*/
-#define IPATH_MIN_USER_PORT_BUFCNT 8
+#define IPATH_MIN_USER_PORT_BUFCNT 7
/*
* Number of ports we are configured to use (to allow for more pio
@@ -54,13 +57,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
* Number of buffers reserved for driver (verbs and layered drivers.)
- * Reserved at end of buffer list. Initialized based on
- * number of PIO buffers if not set via module interface.
+ * Initialized based on number of PIO buffers if not set via module interface.
* The problem with this is that it's global, but we'll use different
- * numbers for different chip types. So the default value is not
- * very useful. I've redefined it for the 1.3 release so that it's
- * zero unless set by the user to something else, in which case we
- * try to respect it.
+ * numbers for different chip types.
*/
static ushort ipath_kpiobufs;
@@ -88,13 +87,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
static int create_port0_egr(struct ipath_devdata *dd)
{
unsigned e, egrcnt;
- struct sk_buff **skbs;
+ struct ipath_skbinfo *skbinfo;
int ret;
- egrcnt = dd->ipath_rcvegrcnt;
+ egrcnt = dd->ipath_p0_rcvegrcnt;
- skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
- if (skbs == NULL) {
+ skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+ if (skbinfo == NULL) {
ipath_dev_err(dd, "allocation error for eager TID "
"skb array\n");
ret = -ENOMEM;
@@ -109,13 +108,13 @@ static int create_port0_egr(struct ipath_devdata *dd)
* 4 bytes so that the data buffer stays word aligned.
* See ipath_kreceive() for more details.
*/
- skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
- if (!skbs[e]) {
+ skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+ if (!skbinfo[e].skb) {
ipath_dev_err(dd, "SKB allocation error for "
"eager TID %u\n", e);
while (e != 0)
- dev_kfree_skb(skbs[--e]);
- vfree(skbs);
+ dev_kfree_skb(skbinfo[--e].skb);
+ vfree(skbinfo);
ret = -ENOMEM;
goto bail;
}
@@ -124,14 +123,18 @@ static int create_port0_egr(struct ipath_devdata *dd)
* After loop above, so we can test non-NULL to see if ready
* to use at receive, etc.
*/
- dd->ipath_port0_skbs = skbs;
+ dd->ipath_port0_skbinfo = skbinfo;
for (e = 0; e < egrcnt; e++) {
- unsigned long phys =
- virt_to_phys(dd->ipath_port0_skbs[e]->data);
+ dd->ipath_port0_skbinfo[e].phys =
+ ipath_map_single(dd->pcidev,
+ dd->ipath_port0_skbinfo[e].skb->data,
+ dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0, phys);
+ dd->ipath_rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER,
+ dd->ipath_port0_skbinfo[e].phys);
}
ret = 0;
@@ -151,24 +154,13 @@ static int bringup_link(struct ipath_devdata *dd)
dd->ipath_control);
/*
- * Note that prior to try 14 or 15 of IB, the credit scaling
- * wasn't working, because it was swapped for writes with the
- * 1 bit default linkstate field
+ * set initial max size pkt IBC will send, including ICRC; it's the
+ * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
*/
+ val = (dd->ipath_ibmaxlen >> 2) + 1;
+ ibc = val << dd->ibcc_mpl_shift;
- /* ignore pbc and align word */
- val = dd->ipath_piosize2k - 2 * sizeof(u32);
- /*
- * for ICRC, which we only send in diag test pkt mode, and we
- * don't need to worry about that for mtu
- */
- val += 1;
- /*
- * Set the IBC maxpktlength to the size of our pio buffers the
- * maxpktlength is in words. This is *not* the IB data MTU.
- */
- ibc = (val / sizeof(u32)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
- /* in KB */
+ /* flowcontrolwatermark is in units of KBytes */
ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
/*
* How often flowctrl sent. More or less in usecs; balance against
@@ -187,10 +179,13 @@ static int bringup_link(struct ipath_devdata *dd)
/*
* Want to start out with both LINKCMD and LINKINITCMD in NOP
* (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that
- * to stay a NOP
+ * to stay a NOP. Flag that we are disabled, for the (unlikely)
+ * case that some recovery path is trying to bring the link up
+ * before we are ready.
*/
ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT;
+ dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
(unsigned long long) ibc);
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
@@ -213,32 +208,57 @@ static int bringup_link(struct ipath_devdata *dd)
return ret;
}
-static int init_chip_first(struct ipath_devdata *dd,
- struct ipath_portdata **pdp)
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
{
struct ipath_portdata *pd = NULL;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (pd) {
+ pd->port_dd = dd;
+ pd->port_cnt = 1;
+ /* The port 0 pkey table is used by the layer interface. */
+ pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ pd->port_seq_cnt = 1;
+ }
+ return pd;
+}
+
+static int init_chip_first(struct ipath_devdata *dd)
+{
+ struct ipath_portdata *pd;
int ret = 0;
u64 val;
+ spin_lock_init(&dd->ipath_kernel_tid_lock);
+ spin_lock_init(&dd->ipath_user_tid_lock);
+ spin_lock_init(&dd->ipath_sendctrl_lock);
+ spin_lock_init(&dd->ipath_uctxt_lock);
+ spin_lock_init(&dd->ipath_sdma_lock);
+ spin_lock_init(&dd->ipath_gpio_lock);
+ spin_lock_init(&dd->ipath_eep_st_lock);
+ spin_lock_init(&dd->ipath_sdepb_lock);
+ mutex_init(&dd->ipath_eep_lock);
+
/*
* skip cfgports stuff because we are not allocating memory,
* and we don't want problems if the portcnt changed due to
* cfgports. We do still check and report a difference, if
* not same (should be impossible).
*/
- dd->ipath_portcnt =
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
+ dd->ipath_f_config_ports(dd, ipath_cfgports);
if (!ipath_cfgports)
dd->ipath_cfgports = dd->ipath_portcnt;
else if (ipath_cfgports <= dd->ipath_portcnt) {
dd->ipath_cfgports = ipath_cfgports;
ipath_dbg("Configured to use %u ports out of %u in chip\n",
- dd->ipath_cfgports, dd->ipath_portcnt);
+ dd->ipath_cfgports, ipath_read_kreg32(dd,
+ dd->ipath_kregs->kr_portcnt));
} else {
dd->ipath_cfgports = dd->ipath_portcnt;
ipath_dbg("Tried to configured to use %u ports; chip "
"only supports %u\n", ipath_cfgports,
- dd->ipath_portcnt);
+ ipath_read_kreg32(dd,
+ dd->ipath_kregs->kr_portcnt));
}
/*
* Allocate full portcnt array, rather than just cfgports, because
@@ -254,34 +274,15 @@ static int init_chip_first(struct ipath_devdata *dd,
goto done;
}
- dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads)
- * dd->ipath_cfgports,
- GFP_KERNEL);
- dd->ipath_lastrcvhdrqtails =
- kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails)
- * dd->ipath_cfgports, GFP_KERNEL);
-
- if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) {
- ipath_dev_err(dd, "Unable to allocate head arrays, "
- "failing\n");
- ret = -ENOMEM;
- goto done;
- }
-
- dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
-
- if (!dd->ipath_pd[0]) {
+ pd = create_portdata0(dd);
+ if (!pd) {
ipath_dev_err(dd, "Unable to allocate portdata for port "
"0, failing\n");
ret = -ENOMEM;
goto done;
}
- pd = dd->ipath_pd[0];
- pd->port_dd = dd;
- pd->port_port = 0;
- pd->port_cnt = 1;
- /* The port 0 pkey table is used by the layer interface. */
- pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ dd->ipath_pd[0] = pd;
+
dd->ipath_rcvtidcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
dd->ipath_rcvtidbase =
@@ -297,7 +298,9 @@ static int init_chip_first(struct ipath_devdata *dd,
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
dd->ipath_piosize2k = val & ~0U;
dd->ipath_piosize4k = val >> 32;
- dd->ipath_ibmtu = 4096; /* default to largest legal MTU */
+ if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
+ ipath_mtu4096 = 0; /* 4KB not supported by this chip */
+ dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
dd->ipath_piobcnt2k = val & ~0U;
dd->ipath_piobcnt4k = val >> 32;
@@ -325,40 +328,46 @@ static int init_chip_first(struct ipath_devdata *dd,
else ipath_dbg("%u 2k piobufs @ %p\n",
dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
- spin_lock_init(&dd->ipath_tid_lock);
-
done:
- *pdp = pd;
return ret;
}
/**
* init_chip_reset - re-initialize after a reset, or enable
* @dd: the infinipath device
- * @pdp: output for port data
*
* sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explictly, in case reset
+ * ensure no receive or transmit (explicitly, in case reset
* failed
*/
-static int init_chip_reset(struct ipath_devdata *dd,
- struct ipath_portdata **pdp)
+static int init_chip_reset(struct ipath_devdata *dd)
{
- struct ipath_portdata *pd;
u32 rtmp;
+ int i;
+ unsigned long flags;
- *pdp = pd = dd->ipath_pd[0];
- /* ensure chip does no sends or receives while we re-initialize */
- dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0);
+ /*
+ * ensure chip does no sends or receives, tail updates, or
+ * pioavail updates while we re-initialize
+ */
+ dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
+ for (i = 0; i < dd->ipath_portcnt; i++) {
+ clear_bit(dd->ipath_r_portenable_shift + i,
+ &dd->ipath_rcvctrl);
+ clear_bit(dd->ipath_r_intravail_shift + i,
+ &dd->ipath_rcvctrl);
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl = 0U; /* no sdma, etc */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
- rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
- if (dd->ipath_portcnt != rtmp)
- dev_info(&dd->pcidev->dev, "portcnt was %u before "
- "reset, now %u, using original\n",
- dd->ipath_portcnt, rtmp);
rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
if (rtmp != dd->ipath_rcvtidcnt)
dev_info(&dd->pcidev->dev, "tidcnt was %u before "
@@ -432,22 +441,37 @@ done:
*/
static void init_shadow_tids(struct ipath_devdata *dd)
{
- dd->ipath_pageshadow = (struct page **)
- vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ struct page **pages;
+ dma_addr_t *addrs;
+
+ pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
- if (!dd->ipath_pageshadow)
+ if (!pages) {
ipath_dev_err(dd, "failed to allocate shadow page * "
"array, no expected sends!\n");
- else
- memset(dd->ipath_pageshadow, 0,
- dd->ipath_cfgports * dd->ipath_rcvtidcnt *
- sizeof(struct page *));
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(dma_addr_t));
+ if (!addrs) {
+ ipath_dev_err(dd, "failed to allocate shadow dma handle "
+ "array, no expected sends!\n");
+ vfree(pages);
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ dd->ipath_pageshadow = pages;
+ dd->ipath_physshadow = addrs;
}
-static void enable_chip(struct ipath_devdata *dd,
- struct ipath_portdata *pd, int reinit)
+static void enable_chip(struct ipath_devdata *dd, int reinit)
{
u32 val;
+ u64 rcvmask;
+ unsigned long flags;
int i;
if (!reinit)
@@ -456,19 +480,32 @@ static void enable_chip(struct ipath_devdata *dd,
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
/* Enable PIO send, and update of PIOavail regs to memory. */
dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
INFINIPATH_S_PIOBUFAVAILUPD;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
/*
- * enable port 0 receive, and receive interrupt. other ports
- * done as user opens and inits them.
+ * Set the PIO avail update threshold to host memory
+ * on chips that support it.
*/
- dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD |
- (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) |
- (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT);
+ if (dd->ipath_pioupd_thresh)
+ dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+ << INFINIPATH_S_UPDTHRESH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /*
+ * Enable kernel ports' receive and receive interrupt.
+ * Other ports done as user opens and inits them.
+ */
+ rcvmask = 1ULL;
+ dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
+ (rcvmask << dd->ipath_r_intravail_shift);
+ if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
+ dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
+
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
@@ -479,17 +516,16 @@ static void enable_chip(struct ipath_devdata *dd,
dd->ipath_flags |= IPATH_INITTED;
/*
- * init our shadow copies of head from tail values, and write
- * head values to match.
+ * Init our shadow copies of head from tail values,
+ * and write head values to match.
*/
val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
- (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
- dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
/* Initialize so we interrupt on next packet received */
- (void)ipath_write_ureg(dd, ur_rcvhdrhead,
- dd->ipath_rhdrhead_intr_off |
- dd->ipath_port0head, 0);
+ ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off |
+ dd->ipath_pd[0]->port_head, 0);
/*
* by now pioavail updates to memory should have occurred, so
@@ -498,29 +534,29 @@ static void enable_chip(struct ipath_devdata *dd,
* initial values of the generation bit correct.
*/
for (i = 0; i < dd->ipath_pioavregs; i++) {
- __le64 val;
+ __le64 pioavail;
/*
* Chip Errata bug 6641; even and odd qwords>3 are swapped.
*/
- if (i > 3) {
- if (i & 1)
- val = dd->ipath_pioavailregs_dma[i - 1];
- else
- val = dd->ipath_pioavailregs_dma[i + 1];
- }
+ if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
+ pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
else
- val = dd->ipath_pioavailregs_dma[i];
- dd->ipath_pioavailshadow[i] = le64_to_cpu(val);
+ pioavail = dd->ipath_pioavailregs_dma[i];
+ /*
+ * don't need to worry about ipath_pioavailkernel here
+ * because we will call ipath_chg_pioavailkernel() later
+ * in initialization, to busy out buffers as needed
+ */
+ dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
}
/* can get counters, stats, etc. */
dd->ipath_flags |= IPATH_PRESENT;
}
-static int init_housekeeping(struct ipath_devdata *dd,
- struct ipath_portdata **pdp, int reinit)
+static int init_housekeeping(struct ipath_devdata *dd, int reinit)
{
- char boardn[32];
+ char boardn[40];
int ret = 0;
/*
@@ -571,22 +607,17 @@ static int init_housekeeping(struct ipath_devdata *dd,
goto done;
}
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
+
/* clear the initial reset flag, in case first driver load */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
INFINIPATH_E_RESET);
- if (reinit)
- ret = init_chip_reset(dd, pdp);
- else
- ret = init_chip_first(dd, pdp);
-
- if (ret)
- goto done;
-
- ipath_cdbg(VERBOSE, "Revision %llx (PCI %x), %u ports, %u tids, "
- "%u egrtids\n", (unsigned long long) dd->ipath_revision,
- dd->ipath_pcirev, dd->ipath_portcnt, dd->ipath_rcvtidcnt,
- dd->ipath_rcvegrcnt);
+ ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
+ (unsigned long long) dd->ipath_revision,
+ dd->ipath_pcirev);
if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
@@ -613,7 +644,7 @@ static int init_housekeeping(struct ipath_devdata *dd,
ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
- "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
+ "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
"SW Compat %u\n",
IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
(unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
@@ -625,10 +656,39 @@ static int init_housekeeping(struct ipath_devdata *dd,
ipath_dbg("%s", dd->ipath_boardversion);
+ if (ret)
+ goto done;
+
+ if (reinit)
+ ret = init_chip_reset(dd);
+ else
+ ret = init_chip_first(dd);
+
done:
return ret;
}
+static void verify_interrupt(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+
+ if (!dd)
+ return; /* being torn down */
+
+ /*
+ * If we don't have any interrupts, let the user know and
+ * don't bother checking again.
+ */
+ if (dd->ipath_int_counter == 0) {
+ if (!dd->ipath_f_intr_fallback(dd))
+ dev_err(&dd->pcidev->dev, "No interrupts detected, "
+ "not usable.\n");
+ else /* re-arm the timer to see if fallback works */
+ mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
+ } else
+ ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
+ dd->ipath_int_counter);
+}
/**
* ipath_init_chip - do the actual initialization sequence on the chip
@@ -647,33 +707,24 @@ done:
*/
int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
- int ret = 0, i;
- u32 val32, kpiobufs;
+ int ret = 0;
+ u32 kpiobufs, defkbufs;
+ u32 piobufs, uports;
u64 val;
- struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
+ struct ipath_portdata *pd;
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
- ret = init_housekeeping(dd, &pd, reinit);
+ ret = init_housekeeping(dd, reinit);
if (ret)
goto done;
/*
- * we ignore most issues after reporting them, but have to specially
- * handle hardware-disabled chips.
- */
- if (ret == 2) {
- /* unique error, known to ipath_init_one */
- ret = -EPERM;
- goto done;
- }
-
- /*
* We could bump this to allow for full rcvegrcnt + rcvtidcnt,
* but then it no longer nicely fits power of two, and since
* we now use routines that backend onto __get_free_pages, the
* rest would be wasted.
*/
- dd->ipath_rcvhdrcnt = dd->ipath_rcvegrcnt;
+ dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
dd->ipath_rcvhdrcnt);
@@ -683,65 +734,64 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
* the in memory DMA'ed copies of the registers. This has to
* be done early, before we calculate lastport, etc.
*/
- val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/*
* calc number of pioavail registers, and save it; we have 2
* bits per buffer.
*/
- dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
+ dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
- if (ipath_kpiobufs == 0) {
- /* not set by user (this is default) */
- if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
- kpiobufs = 32;
- else
- kpiobufs = 16;
- }
+ uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
+ if (piobufs > 144)
+ defkbufs = 32 + dd->ipath_pioreserved;
else
- kpiobufs = ipath_kpiobufs;
-
- if (kpiobufs >
- (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
- (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
- i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
- (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
- if (i < 0)
- i = 0;
- dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
- "kernel leaves too few for %d user ports "
- "(%d each); using %u\n", kpiobufs,
- dd->ipath_cfgports - 1,
- IPATH_MIN_USER_PORT_BUFCNT, i);
+ defkbufs = 16 + dd->ipath_pioreserved;
+
+ if (ipath_kpiobufs && (ipath_kpiobufs +
+ (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
+ int i = (int) piobufs -
+ (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
+ if (i < 1)
+ i = 1;
+ dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+ "%d for kernel leaves too few for %d user ports "
+ "(%d each); using %u\n", ipath_kpiobufs,
+ piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
- }
- dd->ipath_lastport_piobuf =
- dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
- dd->ipath_pbufsport = dd->ipath_cfgports > 1
- ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
- : 0;
- val32 = dd->ipath_lastport_piobuf -
- (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
- if (val32 > 0) {
- ipath_dbg("allocating %u pbufs/port leaves %u unused, "
- "add to kernel\n", dd->ipath_pbufsport, val32);
- dd->ipath_lastport_piobuf -= val32;
- ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
- dd->ipath_pbufsport, val32);
- }
- dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
+ } else if (ipath_kpiobufs)
+ kpiobufs = ipath_kpiobufs;
+ else
+ kpiobufs = defkbufs;
+ dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+ dd->ipath_pbufsport =
+ uports ? dd->ipath_lastport_piobuf / uports : 0;
+ /* if not an even divisor, some user ports get extra buffers */
+ dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
+ (dd->ipath_pbufsport * uports);
+ if (dd->ipath_ports_extrabuf)
+ ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
+ "ports <= %u\n", dd->ipath_pbufsport,
+ dd->ipath_ports_extrabuf);
+ dd->ipath_lastpioindex = 0;
+ dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
+ /* ipath_pioavailshadow initialized earlier */
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
- dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
- dd->ipath_pbufsport, dd->ipath_cfgports - 1);
-
- dd->ipath_f_early_init(dd);
+ piobufs, dd->ipath_pbufsport, uports);
+ ret = dd->ipath_f_early_init(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Early initialization failure\n");
+ goto done;
+ }
- /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
- * done after early_init */
+ /*
+ * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
+ * done after early_init.
+ */
dd->ipath_hdrqlast =
dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
@@ -756,8 +806,9 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
goto done;
}
- (void)ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
- dd->ipath_pioavailregs_phys);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
+ dd->ipath_pioavailregs_phys);
+
/*
* this is to detect s/w errors, which the h/w works around by
* ignoring the low 6 bits of address, if it wasn't aligned.
@@ -783,8 +834,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_PIOENABLE);
/*
* before error clears, since we expect serdes pll errors during
@@ -807,49 +856,83 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
- dd->ipath_maskederrs = dd->ipath_ignorederrs;
/* clear all */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
/* enable errors that are masked, at least this first time. */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
- /* clear any interrups up to this point (ints still not enabled) */
+ dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
+ dd->ipath_errormask =
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+ /* clear any interrupts up to this point (ints still not enabled) */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+ dd->ipath_f_tidtemplate(dd);
+
/*
* Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
* existing, and re-allocate.
+ * Need to re-create rest of port 0 portdata as well.
*/
+ pd = dd->ipath_pd[0];
if (reinit) {
- struct ipath_portdata *pd = dd->ipath_pd[0];
- dd->ipath_pd[0] = NULL;
- ipath_free_pddata(dd, pd);
+ struct ipath_portdata *npd;
+
+ /*
+ * Alloc and init new ipath_portdata for port0,
+ * Then free old pd. Could lead to fragmentation, but also
+ * makes later support for hot-swap easier.
+ */
+ npd = create_portdata0(dd);
+ if (npd) {
+ ipath_free_pddata(dd, pd);
+ dd->ipath_pd[0] = npd;
+ pd = npd;
+ } else {
+ ipath_dev_err(dd, "Unable to allocate portdata"
+ " for port 0, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
}
- dd->ipath_f_tidtemplate(dd);
ret = ipath_create_rcvhdrq(dd, pd);
- if (!ret) {
- dd->ipath_hdrqtailptr =
- (volatile __le64 *)pd->port_rcvhdrtail_kvaddr;
+ if (!ret)
ret = create_port0_egr(dd);
- }
- if (ret)
- ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
+ if (ret) {
+ ipath_dev_err(dd, "failed to allocate kernel port's "
"rcvhdrq and/or egr bufs\n");
+ goto done;
+ }
else
- enable_chip(dd, pd, reinit);
+ enable_chip(dd, reinit);
+ /* after enable_chip, so pioavailshadow setup */
+ ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
- if (!ret && !reinit) {
- /* used when we close a port, for DMA already in flight at close */
+ /*
+ * Cancel any possible active sends from early driver load.
+ * Follows early_init because some chips have to initialize
+ * PIO buffers in early_init to avoid false parity errors.
+ * After enable and ipath_chg_pioavailkernel so we can safely
+ * enable pioavail updates and PIOENABLE; packets are now
+ * ready to go out.
+ */
+ ipath_cancel_sends(dd, 1);
+
+ if (!reinit) {
+ /*
+ * Used when we close a port, for DMA already in flight
+ * at close.
+ */
dd->ipath_dummy_hdrq = dma_alloc_coherent(
- &dd->pcidev->dev, pd->port_rcvhdrq_size,
+ &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
&dd->ipath_dummy_hdrq_phys,
gfp_flags);
- if (!dd->ipath_dummy_hdrq ) {
+ if (!dd->ipath_dummy_hdrq) {
dev_info(&dd->pcidev->dev,
"Couldn't allocate 0x%lx bytes for dummy hdrq\n",
- pd->port_rcvhdrq_size);
+ dd->ipath_pd[0]->port_rcvhdrq_size);
/* fallback to just 0'ing */
dd->ipath_dummy_hdrq_phys = 0UL;
}
@@ -861,7 +944,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
- if(!dd->ipath_stats_timer_active) {
+ if (!dd->ipath_stats_timer_active) {
/*
* first init, or after an admin disable/enable
* set up stats retrieval timer, even if we had errors
@@ -877,6 +960,16 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_stats_timer_active = 1;
}
+ /* Set up SendDMA if chip supports it */
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ ret = setup_sdma(dd);
+
+ /* Set up HoL state */
+ init_timer(&dd->ipath_hol_timer);
+ dd->ipath_hol_timer.function = ipath_hol_event;
+ dd->ipath_hol_timer.data = (unsigned long)dd;
+ dd->ipath_hol_state = IPATH_HOL_UP;
+
done:
if (!ret) {
*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
@@ -889,6 +982,20 @@ done:
0ULL);
/* chip is usable; mark it as initialized */
*dd->ipath_statusp |= IPATH_STATUS_INITTED;
+
+ /*
+ * setup to verify we get an interrupt, and fallback
+ * to an alternate if necessary and possible
+ */
+ if (!reinit) {
+ init_timer(&dd->ipath_intrchk_timer);
+ dd->ipath_intrchk_timer.function =
+ verify_interrupt;
+ dd->ipath_intrchk_timer.data =
+ (unsigned long) dd;
+ }
+ dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
+ add_timer(&dd->ipath_intrchk_timer);
} else
ipath_dev_err(dd, "No interrupts enabled, couldn't "
"setup interrupt address\n");
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 49bf7bb15b0..01ba792791a 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,11 +32,62 @@
*/
#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
#include "ipath_common.h"
+
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+{
+ u32 piobcnt;
+ unsigned long sbuf[4];
+ /*
+ * it's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling
+ */
+ piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ /* read these before writing errorclear */
+ sbuf[0] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror);
+ sbuf[1] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 1);
+ if (piobcnt > 128)
+ sbuf[2] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+ if (piobcnt > 192)
+ sbuf[3] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 3);
+ else
+ sbuf[3] = 0;
+
+ if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
+ int i;
+ if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
+ time_after(dd->ipath_lastcancel, jiffies)) {
+ __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+ "SendbufErrs %lx %lx", sbuf[0],
+ sbuf[1]);
+ if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+ printk(" %lx %lx ", sbuf[2], sbuf[3]);
+ printk("\n");
+ }
+
+ for (i = 0; i < piobcnt; i++)
+ if (test_bit(i, sbuf))
+ ipath_disarm_piobufs(dd, i, 1);
+ /* ignore armlaunch errs for a bit */
+ dd->ipath_lastcancel = jiffies+3;
+ }
+}
+
+
/* These are all rcv-related errors which we want to count for stats */
#define E_SUM_PKTERRS \
(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
@@ -55,6 +106,17 @@
INFINIPATH_E_INVALIDADDR)
/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers. Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+ (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
+ INFINIPATH_E_SPKTLEN)
+
+/*
* these are errors that can occur when the link changes state while
* a packet is being sent or received. This doesn't cover things
* like EBP or VCRC that can be the result of a sending having the
@@ -68,53 +130,9 @@
static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
- unsigned long sbuf[4];
u64 ignore_this_time = 0;
- u32 piobcnt;
-
- /* if possible that sendbuffererror could be valid */
- piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
- /* read these before writing errorclear */
- sbuf[0] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror);
- sbuf[1] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 1);
- if (piobcnt > 128) {
- sbuf[2] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 2);
- sbuf[3] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 3);
- }
-
- if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
- int i;
- ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
- if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
- printk("%lx %lx ", sbuf[2], sbuf[3]);
- for (i = 0; i < piobcnt; i++) {
- if (test_bit(i, sbuf)) {
- u32 __iomem *piobuf;
- if (i < dd->ipath_piobcnt2k)
- piobuf = (u32 __iomem *)
- (dd->ipath_pio2kbase +
- i * dd->ipath_palign);
- else
- piobuf = (u32 __iomem *)
- (dd->ipath_pio4kbase +
- (i - dd->ipath_piobcnt2k) *
- dd->ipath_4kalign);
-
- ipath_cdbg(PKT,
- "PIObuf[%u] @%p pbc is %x; ",
- i, piobuf, readl(piobuf));
-
- ipath_disarm_piobufs(dd, i, 1);
- }
- }
- if (ipath_debug & __IPATH_PKTDBG)
- printk("\n");
- }
+ ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -132,205 +150,327 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
return ignore_this_time;
}
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
+ .msg = "TXE " #a " Memory Parity" \
+ }
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
+ .msg = "RXE " #a " Memory Parity" \
+ }
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+ INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+ strlcat(msg, "[", msgl);
+ strlcat(msg, hwmsg, msgl);
+ strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t msgl)
+{
+ int i;
+ const int glen =
+ ARRAY_SIZE(ipath_generic_hwerror_msgs);
+
+ for (i=0; i<glen; i++) {
+ if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl,
+ ipath_generic_hwerror_msgs[i].msg);
+ }
+ }
+
+ for (i=0; i<nhwerrmsgs; i++) {
+ if (hwerrs & hwerrmsgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+ }
+ }
+}
+
/* return the strings for the most common link states */
-static char *ib_linkstate(u32 linkstate)
+static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
{
char *ret;
+ u32 state;
- switch (linkstate) {
- case IPATH_IBSTATE_INIT:
+ state = ipath_ib_state(dd, ibcs);
+ if (state == dd->ib_init)
ret = "Init";
- break;
- case IPATH_IBSTATE_ARM:
+ else if (state == dd->ib_arm)
ret = "Arm";
- break;
- case IPATH_IBSTATE_ACTIVE:
+ else if (state == dd->ib_active)
ret = "Active";
- break;
- default:
+ else
ret = "Down";
- }
-
return ret;
}
+void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
+{
+ struct ib_event event;
+
+ event.device = &dd->verbs_dev->ibdev;
+ event.element.port_num = 1;
+ event.event = ev;
+ ib_dispatch_event(&event);
+}
+
static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
- ipath_err_t errs, int noprint)
+ ipath_err_t errs)
{
- u64 val;
- u32 ltstate, lstate;
+ u32 ltstate, lstate, ibstate, lastlstate;
+ u32 init = dd->ib_init;
+ u32 arm = dd->ib_arm;
+ u32 active = dd->ib_active;
+ const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+
+ lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
+ ibstate = ipath_ib_state(dd, ibcs);
+ /* linkstate at last interrupt */
+ lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+ ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
/*
- * even if diags are enabled, we want to notice LINKINIT, etc.
- * We just don't want to change the LED state, or
- * dd->ipath_kregs->kr_ibcctrl
+ * Since going into a recovery state causes the link state to go
+ * down and since recovery is transitory, it is better if we "miss"
+ * ever seeing the link training state go into recovery (i.e.,
+ * ignore this transition for link state special handling purposes)
+ * without even updating ipath_lastibcstat.
*/
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
- lstate = val & IPATH_IBSTATE_MASK;
+ if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
+ (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
+ (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
+ goto done;
/*
- * this is confusing enough when it happens that I want to always put it
- * on the console and in the logs. If it was a requested state change,
- * we'll have already cleared the flags, so we won't print this warning
+ * if linkstate transitions into INIT from any of the various down
+ * states, or if it transitions from any of the up (INIT or better)
+ * states into any of the down states (except link recovery), then
+ * call the chip-specific code to take appropriate actions.
*/
- if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE)
- && (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
- dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n",
- (dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE",
- ib_linkstate(lstate));
- /*
- * Flush all queued sends when link went to DOWN or INIT,
- * to be sure that they don't block SMA and other MAD packets
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
- ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
- (unsigned)(dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k) -
- dd->ipath_lastport_piobuf);
- }
- else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
- lstate == IPATH_IBSTATE_ACTIVE) {
- /*
- * only print at SMA if there is a change, debug if not
- * (sometimes we want to know that, usually not).
- */
- if (lstate == ((unsigned) dd->ipath_lastibcstat
- & IPATH_IBSTATE_MASK)) {
- ipath_dbg("Status change intr but no change (%s)\n",
- ib_linkstate(lstate));
+ if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
+ lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
+ /* transitioned to UP */
+ if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
+ /* link came up, so we must no longer be disabled */
+ dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
+ ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
+ goto skip_ibchange; /* chip-code handled */
+ }
+ } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
+ (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
+ ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
+ ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+ int handled;
+ handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
+ dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
+ if (handled) {
+ ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
+ goto skip_ibchange; /* chip-code handled */
}
- else
- ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
- "was %s\n", dd->ipath_unit,
- ib_linkstate(lstate),
- ib_linkstate((unsigned)
- dd->ipath_lastibcstat
- & IPATH_IBSTATE_MASK));
- }
- else {
- lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
- if (lstate == IPATH_IBSTATE_INIT ||
- lstate == IPATH_IBSTATE_ARM ||
- lstate == IPATH_IBSTATE_ACTIVE)
- ipath_cdbg(VERBOSE, "Unit %u link state down"
- " (state 0x%x), from %s\n",
- dd->ipath_unit,
- (u32)val & IPATH_IBSTATE_MASK,
- ib_linkstate(lstate));
- else
- ipath_cdbg(VERBOSE, "Unit %u link state changed "
- "to 0x%x from down (%x)\n",
- dd->ipath_unit, (u32) val, lstate);
}
- ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
- INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
- lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
- INFINIPATH_IBCS_LINKSTATE_MASK;
+
+ /*
+ * Significant enough to always print and get into logs, if it was
+ * unexpected. If it was a requested state change, we'll have
+ * already cleared the flags, so we won't print this warning
+ */
+ if ((ibstate != arm && ibstate != active) &&
+ (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
+ dev_info(&dd->pcidev->dev, "Link state changed from %s "
+ "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
+ "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
+ }
if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
- u32 last_ltstate;
-
+ u32 lastlts;
+ lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
/*
- * Ignore cycling back and forth from Polling.Active
- * to Polling.Quiet while waiting for the other end of
- * the link to come up. We will cycle back and forth
- * between them if no cable is plugged in,
- * the other device is powered off or disabled, etc.
+ * Ignore cycling back and forth from Polling.Active to
+ * Polling.Quiet while waiting for the other end of the link
+ * to come up, except to try and decide if we are connected
+ * to a live IB device or not. We will cycle back and
+ * forth between them if no cable is plugged in, the other
+ * device is powered off or disabled, etc.
*/
- last_ltstate = (dd->ipath_lastibcstat >>
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
- & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
- if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
- || last_ltstate ==
- INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
- if (dd->ipath_ibpollcnt > 40) {
+ if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
+ lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+ if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
+ (++dd->ipath_ibpollcnt == 40)) {
dd->ipath_flags |= IPATH_NOCABLE;
*dd->ipath_statusp |=
IPATH_STATUS_IB_NOCABLE;
- } else
- dd->ipath_ibpollcnt++;
+ ipath_cdbg(LINKVERB, "Set NOCABLE\n");
+ }
+ ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
+ ipath_ibcstatus_str[ltstate], ibstate);
goto skip_ibchange;
}
}
- dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */
+
+ dd->ipath_ibpollcnt = 0; /* not poll*, now */
ipath_stats.sps_iblink++;
- if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+
+ if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
+ u64 linkrecov;
+ linkrecov = ipath_snap_cntr(dd,
+ dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ if (linkrecov != dd->ipath_lastlinkrecov) {
+ ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
+ (unsigned long long) ibcs,
+ ib_linkstate(dd, ibcs),
+ ipath_ibcstatus_str[ltstate],
+ (unsigned long long) linkrecov);
+ /* and no more until active again */
+ dd->ipath_lastlinkrecov = 0;
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ goto skip_ibchange;
+ }
+ }
+
+ if (ibstate == init || ibstate == arm || ibstate == active) {
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
+ if (ibstate == init || ibstate == arm) {
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ signal_ib_event(dd, IB_EVENT_PORT_ERR);
+ }
+ if (ibstate == arm) {
+ dd->ipath_flags |= IPATH_LINKARMED;
+ dd->ipath_flags &= ~(IPATH_LINKUNK |
+ IPATH_LINKINIT | IPATH_LINKDOWN |
+ IPATH_LINKACTIVE | IPATH_NOCABLE);
+ ipath_hol_down(dd);
+ } else if (ibstate == init) {
+ /*
+ * set INIT and DOWN. Down is checked by
+ * most of the other code, but INIT is
+ * useful to know in a few places.
+ */
+ dd->ipath_flags |= IPATH_LINKINIT |
+ IPATH_LINKDOWN;
+ dd->ipath_flags &= ~(IPATH_LINKUNK |
+ IPATH_LINKARMED | IPATH_LINKACTIVE |
+ IPATH_NOCABLE);
+ ipath_hol_down(dd);
+ } else { /* active */
+ dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
+ dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ *dd->ipath_statusp |=
+ IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
+ dd->ipath_flags |= IPATH_LINKACTIVE;
+ dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
+ | IPATH_LINKDOWN | IPATH_LINKARMED |
+ IPATH_NOCABLE);
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ ipath_restart_sdma(dd);
+ signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
+ /* LED active not handled in chip _f_updown */
+ dd->ipath_f_setextled(dd, lstate, ltstate);
+ ipath_hol_up(dd);
+ }
+
+ /*
+ * print after we've already done the work, so as not to
+ * delay the state changes and notifications, for debugging
+ */
+ if (lstate == lastlstate)
+ ipath_cdbg(LINKVERB, "Unchanged from last: %s "
+ "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
+ else
+ ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
+ dd->ipath_unit, ib_linkstate(dd, ibcs),
+ ipath_ibcstatus_str[ltstate], ibstate);
+ } else { /* down */
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ signal_ib_event(dd, IB_EVENT_PORT_ERR);
dd->ipath_flags |= IPATH_LINKDOWN;
dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
| IPATH_LINKACTIVE |
IPATH_LINKARMED);
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
dd->ipath_lli_counter = 0;
- if (!noprint) {
- if (((dd->ipath_lastibcstat >>
- INFINIPATH_IBCS_LINKSTATE_SHIFT) &
- INFINIPATH_IBCS_LINKSTATE_MASK)
- == INFINIPATH_IBCS_L_STATE_ACTIVE)
- /* if from up to down be more vocal */
- ipath_cdbg(VERBOSE,
- "Unit %u link now down (%s)\n",
- dd->ipath_unit,
- ipath_ibcstatus_str[ltstate]);
- else
- ipath_cdbg(VERBOSE, "Unit %u link is "
- "down (%s)\n", dd->ipath_unit,
- ipath_ibcstatus_str[ltstate]);
- }
- dd->ipath_f_setextled(dd, lstate, ltstate);
- } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
- dd->ipath_flags |= IPATH_LINKACTIVE;
- dd->ipath_flags &=
- ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
- IPATH_LINKARMED | IPATH_NOCABLE);
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
- *dd->ipath_statusp |=
- IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
- dd->ipath_f_setextled(dd, lstate, ltstate);
- } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
- /*
- * set INIT and DOWN. Down is checked by most of the other
- * code, but INIT is useful to know in a few places.
- */
- dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
- dd->ipath_flags &=
- ~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
- | IPATH_NOCABLE);
- *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
- | IPATH_STATUS_IB_READY);
- dd->ipath_f_setextled(dd, lstate, ltstate);
- } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
- dd->ipath_flags |= IPATH_LINKARMED;
- dd->ipath_flags &=
- ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
- IPATH_LINKACTIVE | IPATH_NOCABLE);
- *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
- | IPATH_STATUS_IB_READY);
- dd->ipath_f_setextled(dd, lstate, ltstate);
- } else {
- if (!noprint)
- ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
- dd->ipath_unit,
- ipath_ibcstatus_str[ltstate], ltstate);
+ if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
+ ipath_cdbg(VERBOSE, "Unit %u link state down "
+ "(state 0x%x), from %s\n",
+ dd->ipath_unit, lstate,
+ ib_linkstate(dd, dd->ipath_lastibcstat));
+ else
+ ipath_cdbg(LINKVERB, "Unit %u link state changed "
+ "to %s (0x%x) from down (%x)\n",
+ dd->ipath_unit,
+ ipath_ibcstatus_str[ltstate],
+ ibstate, lastlstate);
}
+
skip_ibchange:
- dd->ipath_lastibcstat = val;
+ dd->ipath_lastibcstat = ibcs;
+done:
+ return;
}
static void handle_supp_msgs(struct ipath_devdata *dd,
- unsigned supp_msgs, char msg[512])
+ unsigned supp_msgs, char *msg, u32 msgsz)
{
/*
* Print the message unless it's ibc status change only, which
* happens so often we never want to count it.
*/
if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
- ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
- ~INFINIPATH_E_IBSTATUSCHANGED);
- if (dd->ipath_lasterror &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+ int iserr;
+ ipath_err_t mask;
+ iserr = ipath_decode_err(dd, msg, msgsz,
+ dd->ipath_lasterror &
+ ~INFINIPATH_E_IBSTATUSCHANGED);
+
+ mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
+
+ /* if we're in debug, then don't mask SDMADISABLED msgs */
+ if (ipath_debug & __IPATH_DBG)
+ mask &= ~INFINIPATH_E_SDMADISABLED;
+
+ if (dd->ipath_lasterror & ~mask)
ipath_dev_err(dd, "Suppressed %u messages for "
"fast-repeating errors (%s) (%llx)\n",
supp_msgs, msg,
@@ -344,15 +484,20 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
* them. So only complain about these at debug
* level.
*/
- ipath_dbg("Suppressed %u messages for %s\n",
- supp_msgs, msg);
+ if (iserr)
+ ipath_dbg("Suppressed %u messages for %s\n",
+ supp_msgs, msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Suppressed %u messages for %s\n",
+ supp_msgs, msg);
}
}
}
static unsigned handle_frequent_errors(struct ipath_devdata *dd,
- ipath_err_t errs, char msg[512],
- int *noprint)
+ ipath_err_t errs, char *msg,
+ u32 msgsz, int *noprint)
{
unsigned long nc;
static unsigned long nextmsg_time;
@@ -371,7 +516,7 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd,
nextmsg_time = nc + HZ * 3;
}
else if (supp_msgs) {
- handle_supp_msgs(dd, supp_msgs, msg);
+ handle_supp_msgs(dd, supp_msgs, msg, msgsz);
supp_msgs = 0;
nmsgs = 0;
}
@@ -382,32 +527,146 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd,
return supp_msgs;
}
+static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
+{
+ unsigned long flags;
+ int expected;
+
+ if (ipath_debug & __IPATH_DBG) {
+ char msg[128];
+ ipath_decode_err(dd, msg, sizeof msg, errs &
+ INFINIPATH_E_SDMAERRS);
+ ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
+ }
+ if (ipath_debug & __IPATH_VERBDBG) {
+ unsigned long tl, hd, status, lengen;
+ tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+ hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+ status = ipath_read_kreg64(dd
+ , dd->ipath_kregs->kr_senddmastatus);
+ lengen = ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_senddmalengen);
+ ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
+ "lengen 0x%lx\n", tl, hd, status, lengen);
+ }
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!expected)
+ ipath_cancel_sends(dd, 1);
+}
+
+static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
+{
+ unsigned long flags;
+ int expected;
+
+ if ((istat & INFINIPATH_I_SDMAINT) &&
+ !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ ipath_sdma_intr(dd);
+
+ if (istat & INFINIPATH_I_SDMADISABLED) {
+ expected = test_bit(IPATH_SDMA_ABORTING,
+ &dd->ipath_sdma_status);
+ ipath_dbg("%s SDmaDisabled intr\n",
+ expected ? "expected" : "unexpected");
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!expected)
+ ipath_cancel_sends(dd, 1);
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ }
+}
+
+static int handle_hdrq_full(struct ipath_devdata *dd)
+{
+ int chkerrpkts = 0;
+ u32 hd, tl;
+ u32 i;
+
+ ipath_stats.sps_hdrqfull++;
+ for (i = 0; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ if (i == 0) {
+ /*
+ * For kernel receive queues, we just want to know
+ * if there are packets in the queue that we can
+ * process.
+ */
+ if (pd->port_head != ipath_get_hdrqtail(pd))
+ chkerrpkts |= 1 << i;
+ continue;
+ }
+
+ /* Skip if user context is not open */
+ if (!pd || !pd->port_cnt)
+ continue;
+
+ /* Don't report the same point multiple times. */
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL)
+ tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
+ else
+ tl = ipath_get_rcvhdrtail(pd);
+ if (tl == pd->port_lastrcvhdrqtail)
+ continue;
+
+ hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
+ if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
+ pd->port_lastrcvhdrqtail = tl;
+ pd->port_hdrqfull++;
+ /* flush hdrqfull so that poll() sees it */
+ wmb();
+ wake_up_interruptible(&pd->port_wait);
+ }
+ }
+
+ return chkerrpkts;
+}
+
static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
{
- char msg[512];
+ char msg[128];
u64 ignore_this_time = 0;
- int i;
+ u64 iserr = 0;
int chkerrpkts = 0, noprint = 0;
unsigned supp_msgs;
-
- supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
+ int log_idx;
/*
- * don't report errors that are masked (includes those always
- * ignored)
+ * don't report errors that are masked, either at init
+ * (not set in ipath_errormask), or temporarily (set in
+ * ipath_maskederrs)
*/
- errs &= ~dd->ipath_maskederrs;
+ errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
+
+ supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
+ &noprint);
/* do these first, they are most important */
if (errs & INFINIPATH_E_HARDWARE) {
/* reuse same msg buf */
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
+ } else {
+ u64 mask;
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
+ mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
+ if (errs & mask)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+ }
}
- if (!noprint && (errs & ~infinipath_e_bitsextant))
+ if (errs & INFINIPATH_E_SDMAERRS)
+ handle_sdma_errors(dd, errs);
+
+ if (!noprint && (errs & ~dd->ipath_e_bitsextant))
ipath_dev_err(dd, "error interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (errs & ~infinipath_e_bitsextant));
+ (errs & ~dd->ipath_e_bitsextant));
if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs);
@@ -426,6 +685,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
}
if (supp_msgs == 250000) {
+ int s_iserr;
/*
* It's not entirely reasonable assuming that the errors set
* in the last clear period are all responsible for the
@@ -433,19 +693,20 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* ones on this particular interrupt, which also isn't great
*/
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
+
+ dd->ipath_errormask &= ~dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
- ~dd->ipath_maskederrs);
- ipath_decode_err(msg, sizeof msg,
- (dd->ipath_maskederrs & ~dd->
- ipath_ignorederrs));
-
- if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
- ipath_dev_err(dd, "Disabling error(s) %llx because "
- "occurring too frequently (%s)\n",
- (unsigned long long)
- (dd->ipath_maskederrs &
- ~dd->ipath_ignorederrs), msg);
+ dd->ipath_errormask);
+ s_iserr = ipath_decode_err(dd, msg, sizeof msg,
+ dd->ipath_maskederrs);
+
+ if (dd->ipath_maskederrs &
+ ~(INFINIPATH_E_RRCVEGRFULL |
+ INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+ ipath_dev_err(dd, "Temporarily disabling "
+ "error(s) %llx reporting; too frequent (%s)\n",
+ (unsigned long long) dd->ipath_maskederrs,
+ msg);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal",
@@ -454,8 +715,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* processing them. So only complain about
* these at debug level.
*/
- ipath_dbg("Disabling frequent queue full errors "
- "(%s)\n", msg);
+ if (s_iserr)
+ ipath_dbg("Temporarily disabling reporting "
+ "too frequent queue full errors (%s)\n",
+ msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Temporarily disabling reporting too"
+ " frequent packet errors (%s)\n",
+ msg);
}
/*
@@ -478,19 +746,44 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
~(INFINIPATH_E_HARDWARE |
INFINIPATH_E_IBSTATUSCHANGED);
}
+
+ if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
+ dd->ipath_spectriggerhit++;
+ ipath_dbg("%lu special trigger hits\n",
+ dd->ipath_spectriggerhit);
+ }
+
+ /* likely due to cancel; so suppress message unless verbose */
+ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+ time_after(dd->ipath_lastcancel, jiffies)) {
+ /* armlaunch takes precedence; it often causes both. */
+ ipath_cdbg(VERBOSE,
+ "Suppressed %s error (%llx) after sendbuf cancel\n",
+ (errs & INFINIPATH_E_SPIOARMLAUNCH) ?
+ "armlaunch" : "sendpktlen", (unsigned long long)errs);
+ errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+ }
+
if (!errs)
return 0;
- if (!noprint)
+ if (!noprint) {
+ ipath_err_t mask;
/*
- * the ones we mask off are handled specially below or above
+ * The ones we mask off are handled specially below
+ * or above. Also mask SDMADISABLED by default as it
+ * is too chatty.
*/
- ipath_decode_err(msg, sizeof msg,
- errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
- INFINIPATH_E_RRCVEGRFULL |
- INFINIPATH_E_RRCVHDRFULL |
- INFINIPATH_E_HARDWARE));
- else
+ mask = INFINIPATH_E_IBSTATUSCHANGED |
+ INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
+
+ /* if we're in debug, then don't mask SDMADISABLED msgs */
+ if (ipath_debug & __IPATH_DBG)
+ mask &= ~INFINIPATH_E_SDMADISABLED;
+
+ ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
+ } else
/* so we don't need if (!noprint) at strlcat's below */
*msg = 0;
@@ -505,6 +798,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
ipath_stats.sps_crcerrs++;
chkerrpkts = 1;
}
+ iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
/*
* We don't want to print these two as they happen, or we can make
@@ -513,39 +808,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* fast_stats, no more than every 5 seconds, user ports get printed
* on close
*/
- if (errs & INFINIPATH_E_RRCVHDRFULL) {
- int any;
- u32 hd, tl;
- ipath_stats.sps_hdrqfull++;
- for (any = i = 0; i < dd->ipath_cfgports; i++) {
- struct ipath_portdata *pd = dd->ipath_pd[i];
- if (i == 0) {
- hd = dd->ipath_port0head;
- tl = (u32) le64_to_cpu(
- *dd->ipath_hdrqtailptr);
- } else if (pd && pd->port_cnt &&
- pd->port_rcvhdrtail_kvaddr) {
- /*
- * don't report same point multiple times,
- * except kernel
- */
- tl = (u32) * pd->port_rcvhdrtail_kvaddr;
- if (tl == dd->ipath_lastrcvhdrqtails[i])
- continue;
- hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
- i);
- } else
- continue;
- if (hd == (tl + 1) ||
- (!hd && tl == dd->ipath_hdrqlast)) {
- if (i == 0)
- chkerrpkts = 1;
- dd->ipath_lastrcvhdrqtails[i] = tl;
- pd->port_hdrqfull++;
- }
- }
- }
+ if (errs & INFINIPATH_E_RRCVHDRFULL)
+ chkerrpkts |= handle_hdrq_full(dd);
if (errs & INFINIPATH_E_RRCVEGRFULL) {
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+
/*
* since this is of less importance and not likely to
* happen without also getting hdrfull, only count
@@ -553,9 +820,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* vs user)
*/
ipath_stats.sps_etidfull++;
- if (dd->ipath_port0head !=
- (u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
- chkerrpkts = 1;
+ if (pd->port_head != ipath_get_hdrqtail(pd))
+ chkerrpkts |= 1;
}
/*
@@ -573,16 +839,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
| IPATH_LINKARMED | IPATH_LINKACTIVE);
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- if (!noprint) {
- u64 st = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_ibcstatus);
- ipath_dbg("Lost link, link now down (%s)\n",
- ipath_ibcstatus_str[st & 0xf]);
- }
+ ipath_dbg("Lost link, link now down (%s)\n",
+ ipath_ibcstatus_str[ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_ibcstatus) & 0xf]);
}
if (errs & INFINIPATH_E_IBSTATUSCHANGED)
- handle_e_ibstatuschanged(dd, errs, noprint);
+ handle_e_ibstatuschanged(dd, errs);
if (errs & INFINIPATH_E_RESET) {
if (!noprint)
@@ -594,8 +857,10 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
}
- if (!noprint && *msg)
- ipath_dev_err(dd, "%s error\n", msg);
+ if (!noprint && *msg) {
+ if (iserr)
+ ipath_dev_err(dd, "%s error\n", msg);
+ }
if (dd->ipath_state_wanted & dd->ipath_flags) {
ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
"waking\n", dd->ipath_state_wanted,
@@ -606,9 +871,56 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
return chkerrpkts;
}
+/*
+ * try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ */
+void ipath_clear_freeze(struct ipath_devdata *dd)
+{
+ /* disable error interrupts, to avoid confusion */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
+
+ /* also disable interrupts; errormask is sometimes overwriten */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+
+ ipath_cancel_sends(dd, 1);
+
+ /* clear the freeze, and be sure chip saw it */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+
+ /* force in-memory update now we are out of freeze */
+ ipath_force_pio_avail_update(dd);
+
+ /*
+ * force new interrupt if any hwerr, error or interrupt bits are
+ * still set, and clear "safe" send packet errors related to freeze
+ * and cancelling sends. Re-enable error interrupts before possible
+ * force of re-interrupt on pending interrupts.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+ E_SPKT_ERRS_IGNORE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+}
+
+
/* this is separate to allow for better optimization of ipath_intr() */
-static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
+static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
{
/*
* sometimes happen during driver init and unload, don't want
@@ -626,17 +938,17 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
* linuxbios development work, and it may happen in
* the future again.
*/
- if (dd->pcidev && dd->pcidev->irq) {
+ if (dd->pcidev && dd->ipath_irq) {
ipath_dev_err(dd, "Now %u unexpected "
"interrupts, unregistering "
"interrupt handler\n",
*unexpectp);
- ipath_dbg("free_irq of irq %x\n",
- dd->pcidev->irq);
- free_irq(dd->pcidev->irq, dd);
+ ipath_dbg("free_irq of irq %d\n",
+ dd->ipath_irq);
+ dd->ipath_f_free_irq(dd);
}
}
- if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) {
+ if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
ipath_dev_err(dd, "%u unexpected interrupts, "
"disabling interrupts completely\n",
*unexpectp);
@@ -651,7 +963,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
"ignoring\n");
}
-static void ipath_bad_regread(struct ipath_devdata *dd)
+static noinline void ipath_bad_regread(struct ipath_devdata *dd)
{
static int allbits;
@@ -669,7 +981,7 @@ static void ipath_bad_regread(struct ipath_devdata *dd)
if (allbits == 2) {
ipath_dev_err(dd, "Still bad interrupt status, "
"unregistering interrupt\n");
- free_irq(dd->pcidev->irq, dd);
+ dd->ipath_f_free_irq(dd);
} else if (allbits > 2) {
if ((allbits % 10000) == 0)
printk(".");
@@ -679,31 +991,9 @@ static void ipath_bad_regread(struct ipath_devdata *dd)
}
}
-static void handle_port_pioavail(struct ipath_devdata *dd)
-{
- u32 i;
- /*
- * start from port 1, since for now port 0 is never using
- * wait_event for PIO
- */
- for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) {
- struct ipath_portdata *pd = dd->ipath_pd[i];
-
- if (pd && pd->port_cnt &&
- dd->ipath_portpiowait & (1U << i)) {
- clear_bit(i, &dd->ipath_portpiowait);
- if (test_bit(IPATH_PORT_WAITING_PIO,
- &pd->port_flag)) {
- clear_bit(IPATH_PORT_WAITING_PIO,
- &pd->port_flag);
- wake_up_interruptible(&pd->port_wait);
- }
- }
- }
-}
-
static void handle_layer_pioavail(struct ipath_devdata *dd)
{
+ unsigned long flags;
int ret;
ret = ipath_ib_piobufavail(dd->verbs_dev);
@@ -712,9 +1002,12 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
return;
set:
- set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
/*
@@ -722,32 +1015,46 @@ set:
* process was waiting for a packet to arrive, and didn't want
* to poll
*/
-static void handle_urcv(struct ipath_devdata *dd, u32 istat)
+static void handle_urcv(struct ipath_devdata *dd, u64 istat)
{
u64 portr;
int i;
int rcvdint = 0;
- portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
- infinipath_i_rcvavail_mask)
- | ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
- infinipath_i_rcvurg_mask);
+ /*
+ * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
+ * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
+ * would both like timely updates of the bits so that
+ * we don't pass them by unnecessarily. the rmb()
+ * here ensures that we see them promptly -- the
+ * corresponding wmb()'s are in ipath_poll_urgent()
+ * and ipath_poll_next()...
+ */
+ rmb();
+ portr = ((istat >> dd->ipath_i_rcvavail_shift) &
+ dd->ipath_i_rcvavail_mask) |
+ ((istat >> dd->ipath_i_rcvurg_shift) &
+ dd->ipath_i_rcvurg_mask);
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
- if (portr & (1 << i) && pd && pd->port_cnt &&
- test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
- int rcbit;
- clear_bit(IPATH_PORT_WAITING_RCV,
- &pd->port_flag);
- rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
- clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
- wake_up_interruptible(&pd->port_wait);
- rcvdint = 1;
+
+ if (portr & (1 << i) && pd && pd->port_cnt) {
+ if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
+ &pd->port_flag)) {
+ clear_bit(i + dd->ipath_r_intravail_shift,
+ &dd->ipath_rcvctrl);
+ wake_up_interruptible(&pd->port_wait);
+ rcvdint = 1;
+ } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
+ &pd->port_flag)) {
+ pd->port_urgent++;
+ wake_up_interruptible(&pd->port_wait);
+ }
}
}
if (rcvdint) {
/* only want to take one interrupt, so turn off the rcv
- * interrupt for all the ports that we did the wakeup on
+ * interrupt for all the ports that we set the rcv_waiting
* (but never for kernel port)
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
@@ -755,19 +1062,20 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
}
}
-irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
+irqreturn_t ipath_intr(int irq, void *data)
{
struct ipath_devdata *dd = data;
- u32 istat, chk0rcv = 0;
+ u64 istat, chk0rcv = 0;
ipath_err_t estat = 0;
irqreturn_t ret;
- u32 oldhead, curtail;
static unsigned unexpected = 0;
- static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) |
- (1U<<INFINIPATH_I_RCVURG_SHIFT);
+ u64 kportrbits;
ipath_stats.sps_ints++;
+ if (dd->ipath_int_counter != (u32) -1)
+ dd->ipath_int_counter++;
+
if (!(dd->ipath_flags & IPATH_PRESENT)) {
/*
* This return value is not great, but we do not want the
@@ -791,37 +1099,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
goto bail;
}
- /*
- * We try to avoid reading the interrupt status register, since
- * that's a PIO read, and stalls the processor for up to about
- * ~0.25 usec. The idea is that if we processed a port0 packet,
- * we blindly clear the port 0 receive interrupt bits, and nothing
- * else, then return. If other interrupts are pending, the chip
- * will re-interrupt us as soon as we write the intclear register.
- * We then won't process any more kernel packets (if not the 2nd
- * time, then the 3rd or 4th) and we'll then handle the other
- * interrupts. We clear the interrupts first so that we don't
- * lose intr for later packets that arrive while we are processing.
- */
- oldhead = dd->ipath_port0head;
- curtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
- if (oldhead != curtail) {
- if (dd->ipath_flags & IPATH_GPIO_INTR) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
- istat = port0rbits | INFINIPATH_I_GPIO;
- }
- else
- istat = port0rbits;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
- ipath_kreceive(dd);
- if (oldhead != dd->ipath_port0head) {
- ipath_stats.sps_fastrcvint++;
- goto done;
- }
- }
-
- istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
+ istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
if (unlikely(!istat)) {
ipath_stats.sps_nullintr++;
@@ -838,20 +1116,23 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (unexpected)
unexpected = 0;
- if (unlikely(istat & ~infinipath_i_bitsextant))
+ if (unlikely(istat & ~dd->ipath_i_bitsextant))
ipath_dev_err(dd,
- "interrupt with unknown interrupts %x set\n",
- istat & (u32) ~ infinipath_i_bitsextant);
- else
- ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
-
- if (unlikely(istat & INFINIPATH_I_ERROR)) {
+ "interrupt with unknown interrupts %Lx set\n",
+ (unsigned long long)
+ istat & ~dd->ipath_i_bitsextant);
+ else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
+ ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
+ (unsigned long long) istat);
+
+ if (istat & INFINIPATH_I_ERROR) {
ipath_stats.sps_errints++;
estat = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_errorstatus);
if (!estat)
- dev_info(&dd->pcidev->dev, "error interrupt (%x), "
- "but no error bits set!\n", istat);
+ dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
+ "but no error bits set!\n",
+ (unsigned long long) istat);
else if (estat == -1LL)
/*
* should we try clearing all, or hope next read
@@ -860,35 +1141,86 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
ipath_dev_err(dd, "Read of error status failed "
"(all bits set); ignoring\n");
else
- if (handle_errors(dd, estat))
- /* force calling ipath_kreceive() */
- chk0rcv = 1;
+ chk0rcv |= handle_errors(dd, estat);
}
if (istat & INFINIPATH_I_GPIO) {
/*
- * Packets are available in the port 0 rcv queue.
- * Eventually this needs to be generalized to check
- * IPATH_GPIO_INTR, and the specific GPIO bit, if
- * GPIO interrupts are used for anything else.
+ * GPIO interrupts fall in two broad classes:
+ * GPIO_2 indicates (on some HT4xx boards) that a packet
+ * has arrived for Port 0. Checking for this
+ * is controlled by flag IPATH_GPIO_INTR.
+ * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
+ * errors that we need to count. Checking for this
+ * is controlled by flag IPATH_GPIO_ERRINTRS.
*/
- if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
- u32 gpiostatus;
- gpiostatus = ipath_read_kreg32(
- dd, dd->ipath_kregs->kr_gpio_status);
- ipath_dbg("Unexpected GPIO interrupt bits %x\n",
- gpiostatus);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- gpiostatus);
+ u32 gpiostatus;
+ u32 to_clear = 0;
+
+ gpiostatus = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_status);
+ /* First the error-counter case. */
+ if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+ /* want to clear the bits we see asserted. */
+ to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
+
+ /*
+ * Count appropriately, clear bits out of our copy,
+ * as they have been "handled".
+ */
+ if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+ ipath_dbg("FlowCtl on UnsupVL\n");
+ dd->ipath_rxfc_unsupvl_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+ ipath_dbg("Overrun Threshold exceeded\n");
+ dd->ipath_overrun_thresh_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+ ipath_dbg("Local Link Integrity error\n");
+ dd->ipath_lli_errs++;
+ }
+ gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
}
- else {
- /* Clear GPIO status bit 2 */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
+ /* Now the Port0 Receive case */
+ if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+ (dd->ipath_flags & IPATH_GPIO_INTR)) {
+ /*
+ * GPIO status bit 2 is set, and we expected it.
+ * clear it and indicate in p0bits.
+ * This probably only happens if a Port0 pkt
+ * arrives at _just_ the wrong time, and we
+ * handle that by seting chk0rcv;
+ */
+ to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+ gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
chk0rcv = 1;
}
+ if (gpiostatus) {
+ /*
+ * Some unexpected bits remain. If they could have
+ * caused the interrupt, complain and clear.
+ * To avoid repetition of this condition, also clear
+ * the mask. It is almost certainly due to error.
+ */
+ const u32 mask = (u32) dd->ipath_gpio_mask;
+
+ if (mask & gpiostatus) {
+ ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+ gpiostatus & mask);
+ to_clear |= (gpiostatus & mask);
+ dd->ipath_gpio_mask &= ~(gpiostatus & mask);
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ }
+ }
+ if (to_clear) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+ (u64) to_clear);
+ }
}
- chk0rcv |= istat & port0rbits;
/*
* Clear the interrupt bits we found set, unless they are receive
@@ -901,34 +1233,39 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
/*
- * handle port0 receive before checking for pio buffers available,
- * since receives can overflow; piobuf waiters can afford a few
- * extra cycles, since they were waiting anyway, and user's waiting
- * for receive are at the bottom.
+ * Handle kernel receive queues before checking for pio buffers
+ * available since receives can overflow; piobuf waiters can afford
+ * a few extra cycles, since they were waiting anyway, and user's
+ * waiting for receive are at the bottom.
*/
- if (chk0rcv) {
- ipath_kreceive(dd);
- istat &= ~port0rbits;
+ kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
+ (1ULL << dd->ipath_i_rcvurg_shift);
+ if (chk0rcv || (istat & kportrbits)) {
+ istat &= ~kportrbits;
+ ipath_kreceive(dd->ipath_pd[0]);
}
- if (istat & ((infinipath_i_rcvavail_mask <<
- INFINIPATH_I_RCVAVAIL_SHIFT)
- | (infinipath_i_rcvurg_mask <<
- INFINIPATH_I_RCVURG_SHIFT)))
+ if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
+ (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
handle_urcv(dd, istat);
+ if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
+ handle_sdma_intr(dd, istat);
+
if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
- clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
- if (dd->ipath_portpiowait)
- handle_port_pioavail(dd);
-
+ /* always process; sdma verbs uses PIO for acks and VL15 */
handle_layer_pioavail(dd);
}
-done:
ret = IRQ_HANDLED;
bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index a8a56276ff1..6559af60bff 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1,7 +1,7 @@
#ifndef _IPATH_KERNEL_H
#define _IPATH_KERNEL_H
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -39,7 +39,13 @@
*/
#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
#include <asm/io.h>
+#include <rdma/ib_verbs.h>
#include "ipath_common.h"
#include "ipath_debug.h"
@@ -55,6 +61,24 @@
extern struct infinipath_stats ipath_stats;
#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define IPATH_EEP_LOG_CNT (4)
+struct ipath_eep_log_mask {
+ u64 errs_to_log;
+ u64 hwerrs_to_log;
+};
struct ipath_portdata {
void **port_rcvegrbuf;
@@ -62,7 +86,7 @@ struct ipath_portdata {
/* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
- volatile __le64 *port_rcvhdrtail_kvaddr;
+ void *port_rcvhdrtail_kvaddr;
/*
* temp buffer for expected send setup, allocated at open, instead
* of each setup call
@@ -79,8 +103,8 @@ struct ipath_portdata {
dma_addr_t port_rcvhdrq_phys;
dma_addr_t port_rcvhdrqtailaddr_phys;
/*
- * number of opens on this instance (0 or 1; ignoring forks, dup,
- * etc. for now)
+ * number of opens (including slave subports) on this instance
+ * (ignoring forks, dup, etc. for now)
*/
int port_cnt;
/*
@@ -89,6 +113,14 @@ struct ipath_portdata {
*/
/* instead of calculating it */
unsigned port_port;
+ /* non-zero if port is being shared. */
+ u16 port_subport_cnt;
+ /* non-zero if port is being shared. */
+ u16 port_subport_id;
+ /* number of pio bufs for this port (all procs, if shared) */
+ u32 port_piocnt;
+ /* first pio buffer for this port */
+ u32 port_pio_base;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -103,6 +135,8 @@ struct ipath_portdata {
u32 port_tidcursor;
/* next expected TID to check */
unsigned long port_flag;
+ /* what happened */
+ unsigned long int_flag;
/* WAIT_RCV that timed out, no interrupt */
u32 port_rcvwait_to;
/* WAIT_PIO that timed out, no interrupt */
@@ -113,17 +147,47 @@ struct ipath_portdata {
u32 port_pionowait;
/* total number of rcvhdrqfull errors */
u32 port_hdrqfull;
+ /*
+ * Used to suppress multiple instances of same
+ * port staying stuck at same point.
+ */
+ u32 port_lastrcvhdrqtail;
+ /* saved total number of rcvhdrqfull errors for poll edge trigger */
+ u32 port_hdrqfull_poll;
+ /* total number of polled urgent packets */
+ u32 port_urgent;
+ /* saved total number of polled urgent packets for poll edge trigger */
+ u32 port_urgent_poll;
/* pid of process using this port */
- pid_t port_pid;
+ struct pid *port_pid;
+ struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
/* same size as task_struct .comm[] */
char port_comm[16];
/* pkeys set by this use of this port */
u16 port_pkeys[4];
/* so file ops can get at unit */
struct ipath_devdata *port_dd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subport_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subport_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subport_rcvhdr_base;
+ /* The version of the library which opened this port */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
+ /* Type of packets or conditions we want to poll for */
+ u16 poll_type;
+ /* port rcvhdrq head offset */
+ u32 port_head;
+ /* receive packet sequence counter */
+ u32 port_seq_cnt;
};
struct sk_buff;
+struct ipath_sge_state;
+struct ipath_verbs_txreq;
/*
* control information for layered drivers
@@ -132,6 +196,66 @@ struct _ipath_layer {
void *l_arg;
};
+struct ipath_skbinfo {
+ struct sk_buff *skb;
+ dma_addr_t phys;
+};
+
+struct ipath_sdma_txreq {
+ int flags;
+ int sg_count;
+ union {
+ struct scatterlist *sg;
+ void *map_addr;
+ };
+ void (*callback)(void *, int);
+ void *callback_cookie;
+ int callback_status;
+ u16 start_idx; /* sdma private */
+ u16 next_descq_idx; /* sdma private */
+ struct list_head list; /* sdma private */
+};
+
+struct ipath_sdma_desc {
+ __le64 qw[2];
+};
+
+#define IPATH_SDMA_TXREQ_F_USELARGEBUF 0x1
+#define IPATH_SDMA_TXREQ_F_HEADTOHOST 0x2
+#define IPATH_SDMA_TXREQ_F_INTREQ 0x4
+#define IPATH_SDMA_TXREQ_F_FREEBUF 0x8
+#define IPATH_SDMA_TXREQ_F_FREEDESC 0x10
+#define IPATH_SDMA_TXREQ_F_VL15 0x20
+
+#define IPATH_SDMA_TXREQ_S_OK 0
+#define IPATH_SDMA_TXREQ_S_SENDERROR 1
+#define IPATH_SDMA_TXREQ_S_ABORTED 2
+#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3
+
+#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63)
+#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62)
+#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61)
+#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30)
+
+/* max dwords in small buffer packet */
+#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
+
+/*
+ * Possible IB config parameters for ipath_f_get/set_ib_cfg()
+ */
+#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
+#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
+#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
+#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
+#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
+#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
+#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
+#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
+#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
+#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
+#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
+
+
struct ipath_devdata {
struct list_head ipath_list;
@@ -146,15 +270,10 @@ struct ipath_devdata {
unsigned long ipath_physaddr;
/* base of memory alloced for ipath_kregbase, for free */
u64 *ipath_kregalloc;
- /*
- * virtual address where port0 rcvhdrqtail updated for this unit.
- * only written to by the chip, not the driver.
- */
- volatile __le64 *ipath_hdrqtailptr;
/* ipath_cfgports pointers */
struct ipath_portdata **ipath_pd;
/* sk_buffs used by port 0 eager receive queue */
- struct sk_buff **ipath_port0_skbs;
+ struct ipath_skbinfo *ipath_port0_skbinfo;
/* kvirt address of 1st 2k pio buffer */
void __iomem *ipath_pio2kbase;
/* kvirt address of 1st 4k pio buffer */
@@ -172,6 +291,8 @@ struct ipath_devdata {
struct _ipath_layer ipath_layer;
/* setup intr */
int (*ipath_f_intrsetup)(struct ipath_devdata *);
+ /* fallback to alternate interrupt type if possible */
+ int (*ipath_f_intr_fallback)(struct ipath_devdata *);
/* setup on-chip bus config */
int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
/* hard reset chip */
@@ -192,6 +313,21 @@ struct ipath_devdata {
void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
/* fill out chip-specific fields */
int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
+ /* free irq */
+ void (*ipath_f_free_irq)(struct ipath_devdata *);
+ struct ipath_message_header *(*ipath_f_get_msgheader)
+ (struct ipath_devdata *, __le32 *);
+ void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
+ int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
+ int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
+ void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
+ void (*ipath_f_read_counters)(struct ipath_devdata *,
+ struct infinipath_counters *);
+ void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
+ /* per chip actions needed for IB Link up/down changes */
+ int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
+
+ unsigned ipath_lastegr_idx;
struct ipath_ibdev *verbs_dev;
struct timer_list verbs_timer;
/* total dwords sent (summed from counter) */
@@ -216,18 +352,24 @@ struct ipath_devdata {
* limiting of hwerror reporting
*/
ipath_err_t ipath_lasthwerror;
- /*
- * errors masked because they occur too fast, also includes errors
- * that are always ignored (ipath_ignorederrs)
- */
+ /* errors masked because they occur too fast */
ipath_err_t ipath_maskederrs;
+ u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
+ /* these 5 fields are used to establish deltas for IB Symbol
+ * errors and linkrecovery errors. They can be reported on
+ * some chips during link negotiation prior to INIT, and with
+ * DDR when faking DDR negotiations with non-IBTA switches.
+ * The chip counters are adjusted at driver unload if there is
+ * a non-zero delta.
+ */
+ u64 ibdeltainprog;
+ u64 ibsymdelta;
+ u64 ibsymsnap;
+ u64 iblnkerrdelta;
+ u64 iblnkerrsnap;
+
/* time in jiffies at which to re-enable maskederrs */
unsigned long ipath_unmasktime;
- /*
- * errors always ignored (masked), at least for a given
- * chip/device, because they are wrong or not useful
- */
- ipath_err_t ipath_ignorederrs;
/* count of egrfull errors, combined for all ports */
u64 ipath_last_tidfull;
/* for ipath_qcheck() */
@@ -252,6 +394,8 @@ struct ipath_devdata {
u32 ipath_lastport_piobuf;
/* is a stats timer active */
u32 ipath_stats_timer_active;
+ /* number of interrupts for this device -- saturates... */
+ u32 ipath_int_counter;
/* dwords sent read from counter */
u32 ipath_lastsword;
/* dwords received read from counter */
@@ -262,27 +406,20 @@ struct ipath_devdata {
u32 ipath_lastrpkts;
/* pio bufs allocated per port */
u32 ipath_pbufsport;
+ /* if remainder on bufs/port, ports < extrabuf get 1 extra */
+ u32 ipath_ports_extrabuf;
+ u32 ipath_pioupd_thresh; /* update threshold, some chips */
/*
* number of ports configured as max; zero is set to number chip
* supports, less gives more pio bufs/port, etc.
*/
u32 ipath_cfgports;
- /* port0 rcvhdrq head offset */
- u32 ipath_port0head;
/* count of port 0 hdrqfull errors */
u32 ipath_p0_hdrqfull;
+ /* port 0 number of receive eager buffers */
+ u32 ipath_p0_rcvegrcnt;
/*
- * (*cfgports) used to suppress multiple instances of same
- * port staying stuck at same point
- */
- u32 *ipath_lastrcvhdrqtails;
- /*
- * (*cfgports) used to suppress multiple instances of same
- * port staying stuck at same point
- */
- u32 *ipath_lastegrheads;
- /*
* index of last piobuffer we used. Speeds up searching, by
* starting at this point. Doesn't matter if multiple cpu's use and
* update, last updater is only write that matters. Whenever it
@@ -290,6 +427,7 @@ struct ipath_devdata {
* get to multiple devices
*/
u32 ipath_lastpioindex;
+ u32 ipath_lastpioindexl;
/* max length of freezemsg */
u32 ipath_freezelen;
/*
@@ -306,7 +444,18 @@ struct ipath_devdata {
u32 ipath_pcibar0;
/* so we can rewrite it after a chip reset */
u32 ipath_pcibar1;
-
+ u32 ipath_x1_fix_tries;
+ u32 ipath_autoneg_tries;
+ u32 serdes_first_init_done;
+
+ struct ipath_relock {
+ atomic_t ipath_relock_timer_active;
+ struct timer_list ipath_relock_timer;
+ unsigned int ipath_relock_interval; /* in jiffies */
+ } ipath_relock_singleton;
+
+ /* interrupt number */
+ int ipath_irq;
/* HT/PCI Vendor ID (here for NodeInfo) */
u16 ipath_vendorid;
/* HT/PCI Device ID (here for NodeInfo) */
@@ -315,14 +464,21 @@ struct ipath_devdata {
u8 ipath_ht_slave_off;
/* for write combining settings */
unsigned long ipath_wc_cookie;
+ unsigned long ipath_wc_base;
+ unsigned long ipath_wc_len;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
- /* shadow copy of all exptids physaddr; used only by funcsim */
- u64 *ipath_tidsimshadow;
/* shadow copy of struct page *'s for exp tid pages */
struct page **ipath_pageshadow;
- /* lock to workaround chip bug 9437 */
- spinlock_t ipath_tid_lock;
+ /* shadow copy of dma handles for exp tid pages */
+ dma_addr_t *ipath_physshadow;
+ u64 __iomem *ipath_egrtidbase;
+ /* lock to workaround chip bug 9437 and others */
+ spinlock_t ipath_kernel_tid_lock;
+ spinlock_t ipath_user_tid_lock;
+ spinlock_t ipath_sendctrl_lock;
+ /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
+ spinlock_t ipath_uctxt_lock;
/*
* IPATH_STATUS_*,
@@ -336,16 +492,52 @@ struct ipath_devdata {
struct pci_dev *pcidev;
struct cdev *user_cdev;
struct cdev *diag_cdev;
- struct class_device *user_class_dev;
- struct class_device *diag_class_dev;
+ struct device *user_dev;
+ struct device *diag_dev;
/* timer used to prevent stats overflow, error throttling, etc. */
struct timer_list ipath_stats_timer;
- /* check for stale messages in rcv queue */
- /* only allow one intr at a time. */
- unsigned long ipath_rcv_pending;
+ /* timer to verify interrupts work, and fallback if possible */
+ struct timer_list ipath_intrchk_timer;
void *ipath_dummy_hdrq; /* used after port close */
dma_addr_t ipath_dummy_hdrq_phys;
+ /* SendDMA related entries */
+ spinlock_t ipath_sdma_lock;
+ unsigned long ipath_sdma_status;
+ unsigned long ipath_sdma_abort_jiffies;
+ unsigned long ipath_sdma_abort_intr_timeout;
+ unsigned long ipath_sdma_buf_jiffies;
+ struct ipath_sdma_desc *ipath_sdma_descq;
+ u64 ipath_sdma_descq_added;
+ u64 ipath_sdma_descq_removed;
+ int ipath_sdma_desc_nreserved;
+ u16 ipath_sdma_descq_cnt;
+ u16 ipath_sdma_descq_tail;
+ u16 ipath_sdma_descq_head;
+ u16 ipath_sdma_next_intr;
+ u16 ipath_sdma_reset_wait;
+ u8 ipath_sdma_generation;
+ struct tasklet_struct ipath_sdma_abort_task;
+ struct tasklet_struct ipath_sdma_notify_task;
+ struct list_head ipath_sdma_activelist;
+ struct list_head ipath_sdma_notifylist;
+ atomic_t ipath_sdma_vl15_count;
+ struct timer_list ipath_sdma_vl15_timer;
+
+ dma_addr_t ipath_sdma_descq_phys;
+ volatile __le64 *ipath_sdma_head_dma;
+ dma_addr_t ipath_sdma_head_phys;
+
+ unsigned long ipath_ureg_align; /* user register alignment */
+
+ struct delayed_work ipath_autoneg_work;
+ wait_queue_head_t ipath_autoneg_wait;
+
+ /* HoL blocking / user app forward-progress state */
+ unsigned ipath_hol_state;
+ unsigned ipath_hol_next;
+ struct timer_list ipath_hol_timer;
+
/*
* Shadow copies of registers; size indicates read access size.
* Most of them are readonly, but some are write-only register,
@@ -366,8 +558,14 @@ struct ipath_devdata {
* init time.
*/
unsigned long ipath_pioavailshadow[8];
+ /* bitmap of send buffers available for the kernel to use with PIO. */
+ unsigned long ipath_pioavailkernel[8];
/* shadow of kr_gpio_out, for rmw ops */
u64 ipath_gpio_out;
+ /* shadow the gpio mask register */
+ u64 ipath_gpio_mask;
+ /* shadow the gpio output enable, etc... */
+ u64 ipath_extctrl;
/* kr_revision shadow */
u64 ipath_revision;
/*
@@ -382,10 +580,13 @@ struct ipath_devdata {
u64 ipath_lastibcstat;
/* hwerrmask shadow */
ipath_err_t ipath_hwerrmask;
+ ipath_err_t ipath_errormask; /* errormask shadow */
/* interrupt config reg shadow */
u64 ipath_intconfig;
/* kr_sendpiobufbase value */
u64 ipath_piobufbase;
+ /* kr_ibcddrctrl shadow */
+ u64 ipath_ibcddrctrl;
/* these are the "32 bit" regs */
@@ -402,6 +603,10 @@ struct ipath_devdata {
unsigned long ipath_rcvctrl;
/* shadow kr_sendctrl */
unsigned long ipath_sendctrl;
+ /* to not count armlaunch after cancel */
+ unsigned long ipath_lastcancel;
+ /* count cases where special trigger was needed (double write) */
+ unsigned long ipath_spectriggerhit;
/* value we put in kr_rcvhdrcnt */
u32 ipath_rcvhdrcnt;
@@ -423,6 +628,7 @@ struct ipath_devdata {
u32 ipath_piobcnt4k;
/* size in bytes of "4KB" PIO buffers */
u32 ipath_piosize4k;
+ u32 ipath_pioreserved; /* reserved special-inkernel; */
/* kr_rcvegrbase value */
u32 ipath_rcvegrbase;
/* kr_rcvegrcnt value */
@@ -439,8 +645,6 @@ struct ipath_devdata {
u32 ipath_cregbase;
/* shadow the control register contents */
u32 ipath_control;
- /* shadow the gpio output contents */
- u32 ipath_extctrl;
/* PCI revision register (HTC rev on FPGA) */
u32 ipath_pcirev;
@@ -461,12 +665,10 @@ struct ipath_devdata {
u32 ipath_init_ibmaxlen;
/* size of each rcvegrbuffer */
u32 ipath_rcvegrbufsize;
- /* width (2,4,8,16,32) from HT config reg */
- u32 ipath_htwidth;
- /* HT speed (200,400,800,1000) from HT config */
- u32 ipath_htspeed;
- /* ports waiting for PIOavail intr */
- unsigned long ipath_portpiowait;
+ /* localbus width (1, 2,4,8,16,32) from config space */
+ u32 ipath_lbus_width;
+ /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
+ u32 ipath_lbus_speed;
/*
* number of sequential ibcstatus change for polling active/quiet
* (i.e., link not coming up).
@@ -490,28 +692,181 @@ struct ipath_devdata {
*/
u8 ipath_serial[16];
/* human readable board version */
- u8 ipath_boardversion[80];
+ u8 ipath_boardversion[96];
+ u8 ipath_lbus_info[32]; /* human readable localbus info */
/* chip major rev, from ipath_revision */
u8 ipath_majrev;
/* chip minor rev, from ipath_revision */
u8 ipath_minrev;
/* board rev, from ipath_revision */
u8 ipath_boardrev;
- /* unit # of this chip, if present */
- int ipath_unit;
/* saved for restore after reset */
u8 ipath_pci_cacheline;
/* LID mask control */
u8 ipath_lmc;
+ /* link width supported */
+ u8 ipath_link_width_supported;
+ /* link speed supported */
+ u8 ipath_link_speed_supported;
+ u8 ipath_link_width_enabled;
+ u8 ipath_link_speed_enabled;
+ u8 ipath_link_width_active;
+ u8 ipath_link_speed_active;
/* Rx Polarity inversion (compensate for ~tx on partner) */
u8 ipath_rx_pol_inv;
+ u8 ipath_r_portenable_shift;
+ u8 ipath_r_intravail_shift;
+ u8 ipath_r_tailupd_shift;
+ u8 ipath_r_portcfg_shift;
+
+ /* unit # of this chip, if present */
+ int ipath_unit;
+
/* local link integrity counter */
u32 ipath_lli_counter;
/* local link integrity errors */
u32 ipath_lli_errors;
+ /*
+ * Above counts only cases where _successive_ LocalLinkIntegrity
+ * errors were seen in the receive headers of kern-packets.
+ * Below are the three (monotonically increasing) counters
+ * maintained via GPIO interrupts on iba6120-rev2.
+ */
+ u32 ipath_rxfc_unsupvl_errs;
+ u32 ipath_overrun_thresh_errs;
+ u32 ipath_lli_errs;
+
+ /*
+ * Not all devices managed by a driver instance are the same
+ * type, so these fields must be per-device.
+ */
+ u64 ipath_i_bitsextant;
+ ipath_err_t ipath_e_bitsextant;
+ ipath_err_t ipath_hwe_bitsextant;
+
+ /*
+ * Below should be computable from number of ports,
+ * since they are never modified.
+ */
+ u64 ipath_i_rcvavail_mask;
+ u64 ipath_i_rcvurg_mask;
+ u16 ipath_i_rcvurg_shift;
+ u16 ipath_i_rcvavail_shift;
+
+ /*
+ * Register bits for selecting i2c direction and values, used for
+ * I2C serial flash.
+ */
+ u8 ipath_gpio_sda_num;
+ u8 ipath_gpio_scl_num;
+ u8 ipath_i2c_chain_type;
+ u64 ipath_gpio_sda;
+ u64 ipath_gpio_scl;
+
+ /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
+ spinlock_t ipath_gpio_lock;
+
+ /*
+ * IB link and linktraining states and masks that vary per chip in
+ * some way. Set at init, to avoid each IB status change interrupt
+ */
+ u8 ibcs_ls_shift;
+ u8 ibcs_lts_mask;
+ u32 ibcs_mask;
+ u32 ib_init;
+ u32 ib_arm;
+ u32 ib_active;
+
+ u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
+
+ /*
+ * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
+ * reg. Changes for IBA7220
+ */
+ u8 ibcc_lic_mask; /* LinkInitCmd */
+ u8 ibcc_lc_shift; /* LinkCmd */
+ u8 ibcc_mpl_shift; /* Maxpktlen */
+
+ u8 delay_mult;
+
+ /* used to override LED behavior */
+ u8 ipath_led_override; /* Substituted for normal value, if non-zero */
+ u16 ipath_led_override_timeoff; /* delta to next timer event */
+ u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
+ u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
+ atomic_t ipath_led_override_timer_active;
+ /* Used to flash LEDs in override mode */
+ struct timer_list ipath_led_override_timer;
+
+ /* Support (including locks) for EEPROM logging of errors and time */
+ /* control access to actual counters, timer */
+ spinlock_t ipath_eep_st_lock;
+ /* control high-level access to EEPROM */
+ struct mutex ipath_eep_lock;
+ /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
+ uint64_t ipath_traffic_wds;
+ /* active time is kept in seconds, but logged in hours */
+ atomic_t ipath_active_time;
+ /* Below are nominal shadow of EEPROM, new since last EEPROM update */
+ uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
+ uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
+ uint16_t ipath_eep_hrs;
+ /*
+ * masks for which bits of errs, hwerrs that cause
+ * each of the counters to increment.
+ */
+ struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
+
+ /* interrupt mitigation reload register info */
+ u16 ipath_jint_idle_ticks; /* idle clock ticks */
+ u16 ipath_jint_max_packets; /* max packets across all ports */
+
+ /*
+ * lock for access to SerDes, and flags to sequence preset
+ * versus steady-state. 7220-only at the moment.
+ */
+ spinlock_t ipath_sdepb_lock;
+ u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
};
+/* ipath_hol_state values (stopping/starting user proc, send flushing) */
+#define IPATH_HOL_UP 0
+#define IPATH_HOL_DOWN 1
+/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
+#define IPATH_HOL_DOWNSTOP 0
+#define IPATH_HOL_DOWNCONT 1
+
+/* bit positions for sdma_status */
+#define IPATH_SDMA_ABORTING 0
+#define IPATH_SDMA_DISARMED 1
+#define IPATH_SDMA_DISABLED 2
+#define IPATH_SDMA_LAYERBUF 3
+#define IPATH_SDMA_RUNNING 30
+#define IPATH_SDMA_SHUTDOWN 31
+
+/* bit combinations that correspond to abort states */
+#define IPATH_SDMA_ABORT_NONE 0
+#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
+#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISARMED))
+#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISABLED))
+#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
+#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
+
+#define IPATH_SDMA_BUF_NONE 0
+#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
+
+/* Private data for file operations */
+struct ipath_filedata {
+ struct ipath_portdata *pd;
+ unsigned subport;
+ unsigned tidcursor;
+ struct ipath_user_sdma_queue *pq;
+};
extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit);
@@ -519,14 +874,15 @@ extern struct ipath_devdata *ipath_lookup(int unit);
int ipath_init_chip(struct ipath_devdata *, int);
int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
+int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_clear_freeze(struct ipath_devdata *);
struct file_operations;
-int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
- struct cdev **cdevp, struct class_device **class_devp);
+int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp);
void ipath_cdev_cleanup(struct cdev **cdevp,
- struct class_device **class_devp);
+ struct device **devp);
int ipath_diag_add(struct ipath_devdata *);
void ipath_diag_remove(struct ipath_devdata *);
@@ -540,8 +896,9 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
extern int ipath_diag_inuse;
-irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *regs);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+irqreturn_t ipath_intr(int irq, void *devid);
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+ ipath_err_t err);
#if __IPATH_INFO || __IPATH_DBG
extern const char *ipath_ibcstatus_str[];
#endif
@@ -556,28 +913,47 @@ int ipath_unordered_wc(void);
void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
unsigned cnt);
+void ipath_cancel_sends(struct ipath_devdata *, int);
int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
int ipath_parse_ushort(const char *str, unsigned short *valp);
-void ipath_kreceive(struct ipath_devdata *);
+void ipath_kreceive(struct ipath_portdata *);
int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
int ipath_reset_device(int);
void ipath_get_faststats(unsigned long);
+int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
int ipath_set_linkstate(struct ipath_devdata *, u8);
int ipath_set_mtu(struct ipath_devdata *, u16);
int ipath_set_lid(struct ipath_devdata *, u32, u8);
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
+void ipath_enable_armlaunch(struct ipath_devdata *);
+void ipath_disable_armlaunch(struct ipath_devdata *);
+void ipath_hol_down(struct ipath_devdata *);
+void ipath_hol_up(struct ipath_devdata *);
+void ipath_hol_event(unsigned long);
+void ipath_toggle_rclkrls(struct ipath_devdata *);
+void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
+void ipath_set_relock_poll(struct ipath_devdata *, int);
+void ipath_shutdown_relock_poll(struct ipath_devdata *);
/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->tidcursor
+#define user_sdma_queue_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->pq
/*
* values for ipath_flags
*/
-/* The chip is up and initted */
+ /* chip can report link latency (IB 1.2) */
+#define IPATH_HAS_LINK_LATENCY 0x1
+ /* The chip is up and initted */
#define IPATH_INITTED 0x2
/* set if any user code has set kr_rcvhdrsize */
#define IPATH_RCVHDRSZ_SET 0x4
@@ -599,6 +975,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_LINKACTIVE 0x200
/* link current state is unknown */
#define IPATH_LINKUNK 0x400
+ /* Write combining flush needed for PIO */
+#define IPATH_PIO_FLUSH_WC 0x1000
+ /* DMA Receive tail pointer */
+#define IPATH_NODMA_RTAIL 0x2000
/* no IB cable, or no device on IB cable */
#define IPATH_NOCABLE 0x4000
/* Supports port zero per packet receive interrupts via
@@ -609,70 +989,109 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* packet/word counters are 32 bit, else those 4 counters
* are 64bit */
#define IPATH_32BITCOUNTERS 0x20000
+ /* Interrupt register is 64 bits */
+#define IPATH_INTREG_64 0x40000
/* can miss port0 rx interrupts */
-#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
+ /* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+#define IPATH_SWAP_PIOBUFS 0x200000
+ /* Supports Send DMA */
+#define IPATH_HAS_SEND_DMA 0x400000
+ /* Supports Send Count (not just word count) in PBC */
+#define IPATH_HAS_PBC_CNT 0x800000
+ /* Suppress heartbeat, even if turning off loopback */
+#define IPATH_NO_HRTBT 0x1000000
+#define IPATH_HAS_THRESH_UPDATE 0x4000000
+#define IPATH_HAS_MULT_IB_SPEED 0x8000000
+#define IPATH_IB_AUTONEG_INPROG 0x10000000
+#define IPATH_IB_AUTONEG_FAILED 0x20000000
+ /* Linkdown-disable intentionally, Do not attempt to bring up */
+#define IPATH_IB_LINK_DISABLED 0x40000000
+#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
/* portdata flag bit offsets */
/* waiting for a packet to arrive */
#define IPATH_PORT_WAITING_RCV 2
- /* waiting for a PIO buffer to be available */
-#define IPATH_PORT_WAITING_PIO 3
+ /* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
+ /* waiting for an urgent packet to arrive */
+#define IPATH_PORT_WAITING_URG 5
/* free up any allocated data at closes */
void ipath_free_data(struct ipath_portdata *dd);
-int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
-int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-void ipath_init_iba6120_funcs(struct ipath_devdata *);
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
+void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+ unsigned len, int avail);
void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
+int ipath_update_eeprom_log(struct ipath_devdata *dd);
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
+void ipath_force_pio_avail_update(struct ipath_devdata *);
+void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
/*
- * number of words used for protocol header if not set by ipath_userinit();
+ * Set LED override, only the two LSBs have "public" meaning, but
+ * any non-zero value substitutes them for the Link and LinkTrain
+ * LED states.
*/
-#define IPATH_DFLT_RCVHDRSIZE 9
+#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
+#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
+
+/* send dma routines */
+int setup_sdma(struct ipath_devdata *);
+void teardown_sdma(struct ipath_devdata *);
+void ipath_restart_sdma(struct ipath_devdata *);
+void ipath_sdma_intr(struct ipath_devdata *);
+int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
+ u32, struct ipath_verbs_txreq *);
+/* ipath_sdma_lock should be locked before calling this. */
+int ipath_sdma_make_progress(struct ipath_devdata *dd);
+
+/* must be called under ipath_sdma_lock */
+static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
+{
+ return dd->ipath_sdma_descq_cnt -
+ (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
+ 1 - dd->ipath_sdma_desc_nreserved;
+}
-#define IPATH_MDIO_CMD_WRITE 1
-#define IPATH_MDIO_CMD_READ 2
-#define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */
-#define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */
-#define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */
-#define IPATH_MDIO_CTRL_STD 0x0
+static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
+{
+ dd->ipath_sdma_desc_nreserved += cnt;
+}
-static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data)
+static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
{
- return (((u64) IPATH_MDIO_CLD_DIV) << 32) |
- (cmd << 26) |
- (dev << 21) |
- (reg << 16) |
- (data & 0xFFFF);
+ dd->ipath_sdma_desc_nreserved -= cnt;
}
- /* signal and fifo status, in bank 31 */
-#define IPATH_MDIO_CTRL_XGXS_REG_8 0x8
- /* controls loopback, redundancy */
-#define IPATH_MDIO_CTRL_8355_REG_1 0x10
- /* premph, encdec, etc. */
-#define IPATH_MDIO_CTRL_8355_REG_2 0x11
- /* Kchars, etc. */
-#define IPATH_MDIO_CTRL_8355_REG_6 0x15
-#define IPATH_MDIO_CTRL_8355_REG_9 0x18
-#define IPATH_MDIO_CTRL_8355_REG_10 0x1D
+/*
+ * number of words used for protocol header if not set by ipath_userinit();
+ */
+#define IPATH_DFLT_RCVHDRSIZE 9
int ipath_get_user_pages(unsigned long, size_t, struct page **);
-int ipath_get_user_pages_nocopy(unsigned long, struct page **);
void ipath_release_user_pages(struct page **, size_t);
void ipath_release_user_pages_on_close(struct page **, size_t);
int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
+int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
+int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
/* these are used for the registers that vary with port */
void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
unsigned, u64);
-u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
- unsigned);
/*
* We could have a single register get/put routine, that takes a group type,
@@ -685,8 +1104,7 @@ u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
/*
* At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg(), and none of the c-registers are writable, so no
- * ipath_write_creg().
+ * ipath_write_sreg().
*/
/**
@@ -708,7 +1126,7 @@ static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
return readl(regno + (u64 __iomem *)
(dd->ipath_uregbase +
(char __iomem *)dd->ipath_kregbase +
- dd->ipath_palign * port));
+ dd->ipath_ureg_align * port));
}
/**
@@ -725,7 +1143,7 @@ static inline void ipath_write_ureg(const struct ipath_devdata *dd,
{
u64 __iomem *ubase = (u64 __iomem *)
(dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
- dd->ipath_palign * port);
+ dd->ipath_ureg_align * port);
if (dd->ipath_kregbase)
writeq(value, &ubase[regno]);
}
@@ -775,6 +1193,89 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
(char __iomem *)dd->ipath_kregbase));
}
+static inline void ipath_write_creg(const struct ipath_devdata *dd,
+ ipath_creg regno, u64 value)
+{
+ if (dd->ipath_kregbase)
+ writeq(value, regno + (u64 __iomem *)
+ (dd->ipath_cregbase +
+ (char __iomem *)dd->ipath_kregbase));
+}
+
+static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
+{
+ *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
+}
+
+static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
+{
+ return (u32) le64_to_cpu(*((volatile __le64 *)
+ pd->port_rcvhdrtail_kvaddr));
+}
+
+static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
+{
+ const struct ipath_devdata *dd = pd->port_dd;
+ u32 hdrqtail;
+
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+ __le32 *rhf_addr;
+ u32 seq;
+
+ rhf_addr = (__le32 *) pd->port_rcvhdrq +
+ pd->port_head + dd->ipath_rhf_offset;
+ seq = ipath_hdrget_seq(rhf_addr);
+ hdrqtail = pd->port_head;
+ if (seq == pd->port_seq_cnt)
+ hdrqtail++;
+ } else
+ hdrqtail = ipath_get_rcvhdrtail(pd);
+
+ return hdrqtail;
+}
+
+static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
+{
+ return (dd->ipath_flags & IPATH_INTREG_64) ?
+ ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
+}
+
+/*
+ * from contents of IBCStatus (or a saved copy), return linkstate
+ * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
+ * everywhere, anyway (and should be, for almost all purposes).
+ */
+static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
+{
+ u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
+ INFINIPATH_IBCS_LINKSTATE_MASK;
+ if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
+ state = INFINIPATH_IBCS_L_STATE_ACTIVE;
+ return state;
+}
+
+/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
+static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
+{
+ return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+ dd->ibcs_lts_mask;
+}
+
+/*
+ * from contents of IBCStatus (or a saved copy), return logical link state
+ * combination of link state and linktraining state (down, active, init,
+ * arm, etc.
+ */
+static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
+{
+ u32 ibs;
+ ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+ dd->ibcs_lts_mask;
+ ibs |= (u32)(ibcs &
+ (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
+ return ibs;
+}
+
/*
* sysfs interface.
*/
@@ -783,22 +1284,26 @@ struct device_driver;
extern const char ib_ipath_version[];
-int ipath_driver_create_group(struct device_driver *);
-void ipath_driver_remove_group(struct device_driver *);
+extern const struct attribute_group *ipath_driver_attr_groups[];
int ipath_device_create_group(struct device *, struct ipath_devdata *);
void ipath_device_remove_group(struct device *, struct ipath_devdata *);
int ipath_expose_reset(struct device *);
-int ipath_diagpkt_add(void);
-void ipath_diagpkt_remove(void);
-
int ipath_init_ipathfs(void);
void ipath_exit_ipathfs(void);
int ipathfs_add_device(struct ipath_devdata *);
int ipathfs_remove_device(struct ipath_devdata *);
/*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+ size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+const char *ipath_get_unit_name(int unit);
+
+/*
* Flush write combining store buffers (if present) and perform a write
* barrier.
*/
@@ -809,9 +1314,8 @@ int ipathfs_remove_device(struct ipath_devdata *);
#endif
extern unsigned ipath_debug; /* debugging bit mask */
-
-const char *ipath_get_unit_name(int unit);
-
+extern unsigned ipath_linkrecovery;
+extern unsigned ipath_mtu4096;
extern struct mutex ipath_mutex;
#define IPATH_DRV_NAME "ib_ipath"
@@ -838,7 +1342,7 @@ extern struct mutex ipath_mutex;
# define __IPATH_DBG_WHICH(which,fmt,...) \
do { \
- if(unlikely(ipath_debug&(which))) \
+ if (unlikely(ipath_debug & (which))) \
printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
__func__,##__VA_ARGS__); \
} while(0)
@@ -855,4 +1359,20 @@ extern struct mutex ipath_mutex;
#endif /* _IPATH_DEBUGGING */
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+ u64 mask;
+ const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t lmsg);
+
#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index ba1b93226ca..c0e933fec21 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
- ipath_dbg(KERN_INFO "LKEY table full\n");
+ ipath_dbg("LKEY table full\n");
ret = 0;
goto bail;
}
@@ -118,29 +118,37 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
+ struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
int ret;
/*
- * We use LKEY == zero to mean a physical kmalloc() address.
- * This is a bit of a hack since we rely on dma_map_single()
- * being reversible by calling bus_to_virt().
+ * We use LKEY == zero for kernel virtual addresses
+ * (see ipath_get_dma_mr and ipath_dma.c).
*/
if (sge->lkey == 0) {
+ /* always a kernel port, no locking needed */
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
isge->mr = NULL;
- isge->vaddr = bus_to_virt(sge->addr);
+ isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
isge->sge_length = sge->length;
ret = 1;
goto bail;
}
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
+ if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
@@ -188,9 +196,10 @@ bail:
*
* Return 1 if successful, otherwise 0.
*/
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
@@ -199,12 +208,19 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
int ret;
/*
- * We use RKEY == zero for physical addresses
- * (see ipath_get_dma_mr).
+ * We use RKEY == zero for kernel virtual addresses
+ * (see ipath_get_dma_mr and ipath_dma.c).
*/
if (rkey == 0) {
+ /* always a kernel port, no locking needed */
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
sge->mr = NULL;
- sge->vaddr = phys_to_virt(vaddr);
+ sge->vaddr = (void *) vaddr;
sge->length = len;
sge->sge_length = len;
ss->sg_list = NULL;
@@ -214,7 +230,8 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
}
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != rkey)) {
+ if (unlikely(mr == NULL || mr->lkey != rkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
deleted file mode 100644
index e46aa4ed2a7..00000000000
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * These are the routines used by layered drivers, currently just the
- * layered ethernet driver and verbs layer.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <asm/byteorder.h>
-
-#include "ipath_kernel.h"
-#include "ipath_layer.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/* Acquire before ipath_devs_lock. */
-static DEFINE_MUTEX(ipath_layer_mutex);
-
-u16 ipath_layer_rcv_opcode;
-
-static int (*layer_intr)(void *, u32);
-static int (*layer_rcv)(void *, void *, struct sk_buff *);
-static int (*layer_rcv_lid)(void *, void *);
-
-static void *(*layer_add_one)(int, struct ipath_devdata *);
-static void (*layer_remove_one)(void *);
-
-int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
-{
- int ret = -ENODEV;
-
- if (dd->ipath_layer.l_arg && layer_intr)
- ret = layer_intr(dd->ipath_layer.l_arg, arg);
-
- return ret;
-}
-
-int ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
-{
- int ret;
-
- mutex_lock(&ipath_layer_mutex);
-
- ret = __ipath_layer_intr(dd, arg);
-
- mutex_unlock(&ipath_layer_mutex);
-
- return ret;
-}
-
-int __ipath_layer_rcv(struct ipath_devdata *dd, void *hdr,
- struct sk_buff *skb)
-{
- int ret = -ENODEV;
-
- if (dd->ipath_layer.l_arg && layer_rcv)
- ret = layer_rcv(dd->ipath_layer.l_arg, hdr, skb);
-
- return ret;
-}
-
-int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
-{
- int ret = -ENODEV;
-
- if (dd->ipath_layer.l_arg && layer_rcv_lid)
- ret = layer_rcv_lid(dd->ipath_layer.l_arg, hdr);
-
- return ret;
-}
-
-void ipath_layer_lid_changed(struct ipath_devdata *dd)
-{
- mutex_lock(&ipath_layer_mutex);
-
- if (dd->ipath_layer.l_arg && layer_intr)
- layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-void ipath_layer_add(struct ipath_devdata *dd)
-{
- mutex_lock(&ipath_layer_mutex);
-
- if (layer_add_one)
- dd->ipath_layer.l_arg =
- layer_add_one(dd->ipath_unit, dd);
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-void ipath_layer_remove(struct ipath_devdata *dd)
-{
- mutex_lock(&ipath_layer_mutex);
-
- if (dd->ipath_layer.l_arg && layer_remove_one) {
- layer_remove_one(dd->ipath_layer.l_arg);
- dd->ipath_layer.l_arg = NULL;
- }
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *),
- int (*l_intr)(void *, u32),
- int (*l_rcv)(void *, void *, struct sk_buff *),
- u16 l_rcv_opcode,
- int (*l_rcv_lid)(void *, void *))
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
-
- layer_add_one = l_add;
- layer_remove_one = l_remove;
- layer_intr = l_intr;
- layer_rcv = l_rcv;
- layer_rcv_lid = l_rcv_lid;
- ipath_layer_rcv_opcode = l_rcv_opcode;
-
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- if (!(dd->ipath_flags & IPATH_INITTED))
- continue;
-
- if (dd->ipath_layer.l_arg)
- continue;
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- mutex_unlock(&ipath_layer_mutex);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_register);
-
-void ipath_layer_unregister(void)
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- if (dd->ipath_layer.l_arg && layer_remove_one) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- layer_remove_one(dd->ipath_layer.l_arg);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- dd->ipath_layer.l_arg = NULL;
- }
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- layer_add_one = NULL;
- layer_remove_one = NULL;
- layer_intr = NULL;
- layer_rcv = NULL;
- layer_rcv_lid = NULL;
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_unregister);
-
-int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
-{
- int ret;
- u32 intval = 0;
-
- mutex_lock(&ipath_layer_mutex);
-
- if (!dd->ipath_layer.l_arg) {
- ret = -EINVAL;
- goto bail;
- }
-
- ret = ipath_setrcvhdrsize(dd, IPATH_HEADER_QUEUE_WORDS);
-
- if (ret < 0)
- goto bail;
-
- *pktmax = dd->ipath_ibmaxlen;
-
- if (*dd->ipath_statusp & IPATH_STATUS_IB_READY)
- intval |= IPATH_LAYER_INT_IF_UP;
- if (dd->ipath_lid)
- intval |= IPATH_LAYER_INT_LID;
- if (dd->ipath_mlid)
- intval |= IPATH_LAYER_INT_BCAST;
- /*
- * do this on open, in case low level is already up and
- * just layered driver was reloaded, etc.
- */
- if (intval)
- layer_intr(dd->ipath_layer.l_arg, intval);
-
- ret = 0;
-bail:
- mutex_unlock(&ipath_layer_mutex);
-
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_open);
-
-u16 ipath_layer_get_lid(struct ipath_devdata *dd)
-{
- return dd->ipath_lid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lid);
-
-/**
- * ipath_layer_get_mac - get the MAC address
- * @dd: the infinipath device
- * @mac: the MAC is put here
- *
- * This is the EUID-64 OUI octets (top 3), then
- * skip the next 2 (which should both be zero or 0xff).
- * The returned MAC is in network order
- * mac points to at least 6 bytes of buffer
- * We assume that by the time the LID is set, that the GUID is as valid
- * as it's ever going to be, rather than adding yet another status bit.
- */
-
-int ipath_layer_get_mac(struct ipath_devdata *dd, u8 * mac)
-{
- u8 *guid;
-
- guid = (u8 *) &dd->ipath_guid;
-
- mac[0] = guid[0];
- mac[1] = guid[1];
- mac[2] = guid[2];
- mac[3] = guid[5];
- mac[4] = guid[6];
- mac[5] = guid[7];
- if ((guid[3] || guid[4]) && !(guid[3] == 0xff && guid[4] == 0xff))
- ipath_dbg("Warning, guid bytes 3 and 4 not 0 or 0xffff: "
- "%x %x\n", guid[3], guid[4]);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_mac);
-
-u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
-{
- return dd->ipath_mlid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
-
-int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
-{
- int ret = 0;
- u32 __iomem *piobuf;
- u32 plen, *uhdr;
- size_t count;
- __be16 vlsllnh;
-
- if (!(dd->ipath_flags & IPATH_RCVHDRSZ_SET)) {
- ipath_dbg("send while not open\n");
- ret = -EINVAL;
- } else
- if ((dd->ipath_flags & (IPATH_LINKUNK | IPATH_LINKDOWN)) ||
- dd->ipath_lid == 0) {
- /*
- * lid check is for when sma hasn't yet configured
- */
- ret = -ENETDOWN;
- ipath_cdbg(VERBOSE, "send while not ready, "
- "mylid=%u, flags=0x%x\n",
- dd->ipath_lid, dd->ipath_flags);
- }
-
- vlsllnh = *((__be16 *) hdr);
- if (vlsllnh != htons(IPATH_LRH_BTH)) {
- ipath_dbg("Warning: lrh[0] wrong (%x, not %x); "
- "not sending\n", be16_to_cpu(vlsllnh),
- IPATH_LRH_BTH);
- ret = -EINVAL;
- }
- if (ret)
- goto done;
-
- /* Get a PIO buffer to use. */
- piobuf = ipath_getpiobuf(dd, NULL);
- if (piobuf == NULL) {
- ret = -EBUSY;
- goto done;
- }
-
- plen = (sizeof(*hdr) >> 2); /* actual length */
- ipath_cdbg(EPKT, "0x%x+1w pio %p\n", plen, piobuf);
-
- writeq(plen+1, piobuf); /* len (+1 for pad) to pbc, no flags */
- ipath_flush_wc();
- piobuf += 2;
- uhdr = (u32 *)hdr;
- count = plen-1; /* amount we can copy before trigger word */
- __iowrite32_copy(piobuf, uhdr, count);
- ipath_flush_wc();
- __raw_writel(uhdr[count], piobuf + count);
- ipath_flush_wc(); /* ensure it's sent, now */
-
- ipath_stats.sps_ether_spkts++; /* ether packet sent */
-
-done:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_send_hdr);
-
-int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
-{
- set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
-
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 72d1db89db8..43f2d0424d4 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,15 +32,16 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_pma.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
#include "ipath_common.h"
-#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004)
-#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C)
-#define IB_SMP_INVALID_FIELD __constant_htons(0x001C)
+#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004)
+#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008)
+#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
+#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C)
static int reply(struct ib_smp *smp)
{
@@ -60,7 +61,7 @@ static int recv_subn_get_nodedescription(struct ib_smp *smp,
if (smp->attr_mod)
smp->status |= IB_SMP_INVALID_FIELD;
- strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+ memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
return reply(smp);
}
@@ -87,7 +88,8 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
struct ipath_devdata *dd = to_idev(ibdev)->dd;
u32 vendor, majrev, minrev;
- if (smp->attr_mod)
+ /* GUID 0 is illegal */
+ if (smp->attr_mod || (dd->ipath_guid == 0))
smp->status |= IB_SMP_INVALID_FIELD;
nip->base_version = 1;
@@ -102,7 +104,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
/* This is already in network order */
nip->sys_guid = to_idev(ibdev)->sys_image_guid;
nip->node_guid = dd->ipath_guid;
- nip->port_guid = nip->sys_guid;
+ nip->port_guid = dd->ipath_guid;
nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
nip->device_id = cpu_to_be16(dd->ipath_deviceid);
majrev = dd->ipath_majrev;
@@ -110,9 +112,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
nip->revision = cpu_to_be32((majrev << 16) | minrev);
nip->local_port_num = port;
vendor = dd->ipath_vendorid;
- nip->vendor_id[0] = 0;
- nip->vendor_id[1] = vendor >> 8;
- nip->vendor_id[2] = vendor;
+ nip->vendor_id[0] = IPATH_SRC_OUI_1;
+ nip->vendor_id[1] = IPATH_SRC_OUI_2;
+ nip->vendor_id[2] = IPATH_SRC_OUI_3;
return reply(smp);
}
@@ -131,15 +133,29 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
* We only support one GUID for now. If this changes, the
* portinfo.guid_cap field needs to be updated too.
*/
- if (startgx == 0)
- /* The first is a copy of the read-only HW GUID. */
- *p = to_idev(ibdev)->dd->ipath_guid;
- else
+ if (startgx == 0) {
+ __be64 g = to_idev(ibdev)->dd->ipath_guid;
+ if (g == 0)
+ /* GUID 0 is illegal */
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ /* The first is a copy of the read-only HW GUID. */
+ *p = g;
+ } else
smp->status |= IB_SMP_INVALID_FIELD;
return reply(smp);
}
+static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
+{
+ (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
+}
+
+static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
+{
+ (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
+}
static int get_overrunthreshold(struct ipath_devdata *dd)
{
@@ -220,6 +236,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
struct ib_device *ibdev, u8 port)
{
struct ipath_ibdev *dev;
+ struct ipath_devdata *dd;
struct ib_port_info *pip = (struct ib_port_info *)smp->data;
u16 lid;
u8 ibcstat;
@@ -233,34 +250,38 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
}
dev = to_idev(ibdev);
+ dd = dev->dd;
/* Clear all fields. Only set the non-zero fields. */
memset(smp->data, 0, sizeof(smp->data));
/* Only return the mkey if the protection field allows it. */
if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
- (dev->mkeyprot_resv_lmc >> 6) == 0)
+ dev->mkeyprot == 0)
pip->mkey = dev->mkey;
pip->gid_prefix = dev->gid_prefix;
- lid = dev->dd->ipath_lid;
+ lid = dd->ipath_lid;
pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
pip->sm_lid = cpu_to_be16(dev->sm_lid);
pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
/* pip->diag_code; */
pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
pip->local_port_num = port;
- pip->link_width_enabled = dev->link_width_enabled;
- pip->link_width_supported = 3; /* 1x or 4x */
- pip->link_width_active = 2; /* 4x */
- pip->linkspeed_portstate = 0x10; /* 2.5Gbps */
- ibcstat = dev->dd->ipath_lastibcstat;
- pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
+ pip->link_width_enabled = dd->ipath_link_width_enabled;
+ pip->link_width_supported = dd->ipath_link_width_supported;
+ pip->link_width_active = dd->ipath_link_width_active;
+ pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
+ ibcstat = dd->ipath_lastibcstat;
+ /* map LinkState to IB portinfo values. */
+ pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
+
pip->portphysstate_linkdown =
- (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
- (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
- pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
- pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
- switch (dev->dd->ipath_ibmtu) {
+ (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
+ (get_linkdowndefaultstate(dd) ? 1 : 2);
+ pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
+ pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
+ dd->ipath_link_speed_enabled;
+ switch (dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
break;
@@ -286,14 +307,15 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
/* pip->vl_arb_high_cap; // only one VL */
/* pip->vl_arb_low_cap; // only one VL */
/* InitTypeReply = 0 */
- pip->inittypereply_mtucap = IB_MTU_4096;
- // HCAs ignore VLStallCount and HOQLife
+ /* our mtu cap depends on whether 4K MTU enabled or not */
+ pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
+ /* HCAs ignore VLStallCount and HOQLife */
/* pip->vlstallcnt_hoqlife; */
pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
/* P_KeyViolations are counted by hardware. */
pip->pkey_violations =
- cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
+ cpu_to_be16((ipath_get_cr_errpkey(dd) -
dev->z_pkey_violations) & 0xFFFF);
pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
/* Only the hardware GUID is supported for now */
@@ -302,10 +324,17 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
/* 32.768 usec. response time (guessing) */
pip->resv_resptimevalue = 3;
pip->localphyerrors_overrunerrors =
- (get_phyerrthreshold(dev->dd) << 4) |
- get_overrunthreshold(dev->dd);
+ (get_phyerrthreshold(dd) << 4) |
+ get_overrunthreshold(dd);
/* pip->max_credit_hint; */
- /* pip->link_roundtrip_latency[3]; */
+ if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+ u32 v;
+
+ v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
+ pip->link_roundtrip_latency[0] = v >> 16;
+ pip->link_roundtrip_latency[1] = v >> 8;
+ pip->link_roundtrip_latency[2] = v;
+ }
ret = reply(smp);
@@ -320,6 +349,7 @@ bail:
*/
static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
{
+ /* always a kernel port, no locking needed */
struct ipath_portdata *pd = dd->ipath_pd[0];
memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
@@ -390,7 +420,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
struct ib_port_info *pip = (struct ib_port_info *)smp->data;
struct ib_event event;
struct ipath_ibdev *dev;
- u32 flags;
+ struct ipath_devdata *dd;
char clientrereg = 0;
u16 lid, smlid;
u8 lwe;
@@ -404,6 +434,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
goto err;
dev = to_idev(ibdev);
+ dd = dev->dd;
event.device = ibdev;
event.element.port_num = port;
@@ -412,11 +443,12 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
lid = be16_to_cpu(pip->lid);
- if (lid != dev->dd->ipath_lid) {
+ if (dd->ipath_lid != lid ||
+ dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
/* Must be a valid unicast LID address. */
if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
goto err;
- ipath_set_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
+ ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
}
@@ -431,37 +463,43 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
ib_dispatch_event(&event);
}
- /* Only 4x supported but allow 1x or 4x to be set (see 14.2.6.6). */
+ /* Allow 1x or 4x to be set (see 14.2.6.6). */
lwe = pip->link_width_enabled;
- if ((lwe >= 4 && lwe <= 8) || (lwe >= 0xC && lwe <= 0xFE))
- goto err;
- if (lwe == 0xFF)
- dev->link_width_enabled = 3; /* 1x or 4x */
- else if (lwe)
- dev->link_width_enabled = lwe;
+ if (lwe) {
+ if (lwe == 0xFF)
+ lwe = dd->ipath_link_width_supported;
+ else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
+ goto err;
+ set_link_width_enabled(dd, lwe);
+ }
- /* Only 2.5 Gbs supported. */
+ /* Allow 2.5 or 5.0 Gbs. */
lse = pip->linkspeedactive_enabled & 0xF;
- if (lse >= 2 && lse <= 0xE)
- goto err;
+ if (lse) {
+ if (lse == 15)
+ lse = dd->ipath_link_speed_supported;
+ else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
+ goto err;
+ set_link_speed_enabled(dd, lse);
+ }
/* Set link down default state. */
switch (pip->portphysstate_linkdown & 0xF) {
case 0: /* NOP */
break;
case 1: /* SLEEP */
- if (set_linkdowndefaultstate(dev->dd, 1))
+ if (set_linkdowndefaultstate(dd, 1))
goto err;
break;
case 2: /* POLL */
- if (set_linkdowndefaultstate(dev->dd, 0))
+ if (set_linkdowndefaultstate(dd, 0))
goto err;
break;
default:
goto err;
}
- dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc;
+ dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
dev->vl_high_limit = pip->vl_high_limit;
switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
@@ -478,13 +516,15 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
mtu = 2048;
break;
case IB_MTU_4096:
+ if (!ipath_mtu4096)
+ goto err;
mtu = 4096;
break;
default:
/* XXX We have already partially updated our state! */
goto err;
}
- ipath_set_mtu(dev->dd, mtu);
+ ipath_set_mtu(dd, mtu);
dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
@@ -500,16 +540,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* later.
*/
if (pip->pkey_violations == 0)
- dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
+ dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
if (pip->qkey_violations == 0)
dev->qkey_violations = 0;
ore = pip->localphyerrors_overrunerrors;
- if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
+ if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
goto err;
- if (set_overrunthreshold(dev->dd, (ore & 0xF)))
+ if (set_overrunthreshold(dd, (ore & 0xF)))
goto err;
dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -527,7 +567,6 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* is down or is being set to down.
*/
state = pip->linkspeed_portstate & 0xF;
- flags = dev->dd->ipath_flags;
lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
goto err;
@@ -543,10 +582,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* FALLTHROUGH */
case IB_PORT_DOWN:
if (lstate == 0)
- if (get_linkdowndefaultstate(dev->dd))
- lstate = IPATH_IB_LINKDOWN_SLEEP;
- else
- lstate = IPATH_IB_LINKDOWN;
+ lstate = IPATH_IB_LINKDOWN_ONLY;
else if (lstate == 1)
lstate = IPATH_IB_LINKDOWN_SLEEP;
else if (lstate == 2)
@@ -555,27 +591,19 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
lstate = IPATH_IB_LINKDOWN_DISABLE;
else
goto err;
- ipath_set_linkstate(dev->dd, lstate);
- if (flags & IPATH_LINKACTIVE) {
- event.event = IB_EVENT_PORT_ERR;
- ib_dispatch_event(&event);
+ ipath_set_linkstate(dd, lstate);
+ if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
+ ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ goto done;
}
+ ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
+ IPATH_LINKACTIVE, 1000);
break;
case IB_PORT_ARMED:
- if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
- break;
- ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
- if (flags & IPATH_LINKACTIVE) {
- event.event = IB_EVENT_PORT_ERR;
- ib_dispatch_event(&event);
- }
+ ipath_set_linkstate(dd, IPATH_IB_LINKARM);
break;
case IB_PORT_ACTIVE:
- if (!(flags & IPATH_LINKARMED))
- break;
- ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
- event.event = IB_EVENT_PORT_ACTIVE;
- ib_dispatch_event(&event);
+ ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
break;
default:
/* XXX We have already partially updated our state! */
@@ -704,6 +732,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
int i;
int changed = 0;
+ /* always a kernel port, no locking needed */
pd = dd->ipath_pd[0];
for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
@@ -761,151 +790,18 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
return recv_subn_get_pkeytable(smp, ibdev);
}
-#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
-#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010)
-#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011)
-#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012)
-#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D)
-#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E)
-
-struct ib_perf {
- u8 base_version;
- u8 mgmt_class;
- u8 class_version;
- u8 method;
- __be16 status;
- __be16 unused;
- __be64 tid;
- __be16 attr_id;
- __be16 resv;
- __be32 attr_mod;
- u8 reserved[40];
- u8 data[192];
-} __attribute__ ((packed));
-
-struct ib_pma_classportinfo {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- u8 reserved[3];
- u8 resp_time_value; /* only lower 5 bits */
- union ib_gid redirect_gid;
- __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
- __be16 redirect_lid;
- __be16 redirect_pkey;
- __be32 redirect_qp; /* only lower 24 bits */
- __be32 redirect_qkey;
- union ib_gid trap_gid;
- __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
- __be16 trap_lid;
- __be16 trap_pkey;
- __be32 trap_hl_qp; /* 8, 24 bits respectively */
- __be32 trap_qkey;
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplescontrol {
- u8 opcode;
- u8 port_select;
- u8 tick;
- u8 counter_width; /* only lower 3 bits */
- __be32 counter_mask0_9; /* 2, 10 * 3, bits */
- __be16 counter_mask10_14; /* 1, 5 * 3, bits */
- u8 sample_mechanisms;
- u8 sample_status; /* only lower 2 bits */
- __be64 option_mask;
- __be64 vendor_mask;
- __be32 sample_start;
- __be32 sample_interval;
- __be16 tag;
- __be16 counter_select[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplesresult {
- __be16 tag;
- __be16 sample_status; /* only lower 2 bits */
- __be32 counter[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplesresult_ext {
- __be16 tag;
- __be16 sample_status; /* only lower 2 bits */
- __be32 extended_width; /* only upper 2 bits */
- __be64 counter[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portcounters {
- u8 reserved;
- u8 port_select;
- __be16 counter_select;
- __be16 symbol_error_counter;
- u8 link_error_recovery_counter;
- u8 link_downed_counter;
- __be16 port_rcv_errors;
- __be16 port_rcv_remphys_errors;
- __be16 port_rcv_switch_relay_errors;
- __be16 port_xmit_discards;
- u8 port_xmit_constraint_errors;
- u8 port_rcv_constraint_errors;
- u8 reserved1;
- u8 lli_ebor_errors; /* 4, 4, bits */
- __be16 reserved2;
- __be16 vl15_dropped;
- __be32 port_xmit_data;
- __be32 port_rcv_data;
- __be32 port_xmit_packets;
- __be32 port_rcv_packets;
-} __attribute__ ((packed));
-
-#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001)
-#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002)
-#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004)
-#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
-#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
-#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
-#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200)
-#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400)
-#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800)
-#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
-#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
-#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
-#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000)
-
-struct ib_pma_portcounters_ext {
- u8 reserved;
- u8 port_select;
- __be16 counter_select;
- __be32 reserved1;
- __be64 port_xmit_data;
- __be64 port_rcv_data;
- __be64 port_xmit_packets;
- __be64 port_rcv_packets;
- __be64 port_unicast_xmit_packets;
- __be64 port_unicast_rcv_packets;
- __be64 port_multicast_xmit_packets;
- __be64 port_multicast_rcv_packets;
-} __attribute__ ((packed));
-
-#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001)
-#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002)
-#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004)
-#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008)
-#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010)
-#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020)
-#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040)
-#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080)
-
-static int recv_pma_get_classportinfo(struct ib_perf *pmp)
+static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
{
- struct ib_pma_classportinfo *p =
- (struct ib_pma_classportinfo *)pmp->data;
+ struct ib_class_port_info *p =
+ (struct ib_class_port_info *)pmp->data;
memset(pmp->data, 0, sizeof(pmp->data));
- if (pmp->attr_mod != 0)
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
/* Indicate AllPortSelect is valid (only one port anyway) */
- p->cap_mask = __constant_cpu_to_be16(1 << 8);
+ p->capability_mask = cpu_to_be16(1 << 8);
p->base_version = 1;
p->class_version = 1;
/*
@@ -923,41 +819,48 @@ static int recv_pma_get_classportinfo(struct ib_perf *pmp)
* We support 5 counters which only count the mandatory quantities.
*/
#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 \
- __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \
- COUNTER_MASK(1, 1) | \
- COUNTER_MASK(1, 2) | \
- COUNTER_MASK(1, 3) | \
- COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
+#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
+ COUNTER_MASK(1, 1) | \
+ COUNTER_MASK(1, 2) | \
+ COUNTER_MASK(1, 3) | \
+ COUNTER_MASK(1, 4))
+
+static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplescontrol *p =
(struct ib_pma_portsamplescontrol *)pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
unsigned long flags;
u8 port_select = p->port_select;
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(port_select != port && port_select != 0xFF))
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
/*
* Ticks are 10x the link transfer period which for 2.5Gbs is 4
* nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample
* intervals are counted in ticks. Since we use Linux timers, that
* count in jiffies, we can't sample for less than 1000 ticks if HZ
- * == 1000 (4000 ticks if HZ is 250).
+ * == 1000 (4000 ticks if HZ is 250). link_speed_active returns 2 for
+ * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
+ * have hardware support for delaying packets.
*/
- /* XXX This is WRONG. */
- p->tick = 250; /* 1 usec. */
+ if (crp->cr_psstat)
+ p->tick = dev->dd->ipath_link_speed_active - 1;
+ else
+ p->tick = 250; /* 1 usec. */
p->counter_width = 4; /* 32 bit counters */
p->counter_mask0_9 = COUNTER_MASK0_9;
spin_lock_irqsave(&dev->pending_lock, flags);
- p->sample_status = dev->pma_sample_status;
+ if (crp->cr_psstat)
+ p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ p->sample_status = dev->pma_sample_status;
p->sample_start = cpu_to_be32(dev->pma_sample_start);
p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
p->tag = cpu_to_be16(dev->pma_tag);
@@ -971,76 +874,85 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
+static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplescontrol *p =
(struct ib_pma_portsamplescontrol *)pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
unsigned long flags;
- u32 start;
+ u8 status;
int ret;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(p->port_select != port && p->port_select != 0xFF)) {
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
- start = be32_to_cpu(p->sample_start);
- if (start != 0) {
- spin_lock_irqsave(&dev->pending_lock, flags);
- if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) {
- dev->pma_sample_status =
- IB_PMA_SAMPLE_STATUS_STARTED;
- dev->pma_sample_start = start;
- dev->pma_sample_interval =
- be32_to_cpu(p->sample_interval);
- dev->pma_tag = be16_to_cpu(p->tag);
- if (p->counter_select[0])
- dev->pma_counter_select[0] =
- p->counter_select[0];
- if (p->counter_select[1])
- dev->pma_counter_select[1] =
- p->counter_select[1];
- if (p->counter_select[2])
- dev->pma_counter_select[2] =
- p->counter_select[2];
- if (p->counter_select[3])
- dev->pma_counter_select[3] =
- p->counter_select[3];
- if (p->counter_select[4])
- dev->pma_counter_select[4] =
- p->counter_select[4];
- }
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (crp->cr_psstat)
+ status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ status = dev->pma_sample_status;
+ if (status == IB_PMA_SAMPLE_STATUS_DONE) {
+ dev->pma_sample_start = be32_to_cpu(p->sample_start);
+ dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
+ dev->pma_tag = be16_to_cpu(p->tag);
+ dev->pma_counter_select[0] = p->counter_select[0];
+ dev->pma_counter_select[1] = p->counter_select[1];
+ dev->pma_counter_select[2] = p->counter_select[2];
+ dev->pma_counter_select[3] = p->counter_select[3];
+ dev->pma_counter_select[4] = p->counter_select[4];
+ if (crp->cr_psstat) {
+ ipath_write_creg(dev->dd, crp->cr_psinterval,
+ dev->pma_sample_interval);
+ ipath_write_creg(dev->dd, crp->cr_psstart,
+ dev->pma_sample_start);
+ } else
+ dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
}
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
bail:
return ret;
}
-static u64 get_counter(struct ipath_ibdev *dev, __be16 sel)
+static u64 get_counter(struct ipath_ibdev *dev,
+ struct ipath_cregs const *crp,
+ __be16 sel)
{
u64 ret;
switch (sel) {
case IB_PMA_PORT_XMIT_DATA:
- ret = dev->ipath_sword;
+ ret = (crp->cr_psxmitdatacount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
+ dev->ipath_sword;
break;
case IB_PMA_PORT_RCV_DATA:
- ret = dev->ipath_rword;
+ ret = (crp->cr_psrcvdatacount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
+ dev->ipath_rword;
break;
case IB_PMA_PORT_XMIT_PKTS:
- ret = dev->ipath_spkts;
+ ret = (crp->cr_psxmitpktscount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
+ dev->ipath_spkts;
break;
case IB_PMA_PORT_RCV_PKTS:
- ret = dev->ipath_rpkts;
+ ret = (crp->cr_psrcvpktscount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
+ dev->ipath_rpkts;
break;
case IB_PMA_PORT_XMIT_WAIT:
- ret = dev->ipath_xmit_wait;
+ ret = (crp->cr_psxmitwaitcount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
+ dev->ipath_xmit_wait;
break;
default:
ret = 0;
@@ -1049,45 +961,59 @@ static u64 get_counter(struct ipath_ibdev *dev, __be16 sel)
return ret;
}
-static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
+static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
struct ib_device *ibdev)
{
struct ib_pma_portsamplesresult *p =
(struct ib_pma_portsamplesresult *)pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
+ u8 status;
int i;
memset(pmp->data, 0, sizeof(pmp->data));
p->tag = cpu_to_be16(dev->pma_tag);
- p->sample_status = cpu_to_be16(dev->pma_sample_status);
+ if (crp->cr_psstat)
+ status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ status = dev->pma_sample_status;
+ p->sample_status = cpu_to_be16(status);
for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
- p->counter[i] = cpu_to_be32(
- get_counter(dev, dev->pma_counter_select[i]));
+ p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
+ cpu_to_be32(
+ get_counter(dev, crp, dev->pma_counter_select[i]));
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
+static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev)
{
struct ib_pma_portsamplesresult_ext *p =
(struct ib_pma_portsamplesresult_ext *)pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
+ u8 status;
int i;
memset(pmp->data, 0, sizeof(pmp->data));
p->tag = cpu_to_be16(dev->pma_tag);
- p->sample_status = cpu_to_be16(dev->pma_sample_status);
+ if (crp->cr_psstat)
+ status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ status = dev->pma_sample_status;
+ p->sample_status = cpu_to_be16(status);
/* 64 bits */
- p->extended_width = __constant_cpu_to_be32(0x80000000);
+ p->extended_width = cpu_to_be32(0x80000000);
for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
- p->counter[i] = cpu_to_be64(
- get_counter(dev, dev->pma_counter_select[i]));
+ p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
+ cpu_to_be64(
+ get_counter(dev, crp, dev->pma_counter_select[i]));
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_get_portcounters(struct ib_perf *pmp,
+static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1115,16 +1041,18 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
dev->z_local_link_integrity_errors;
cntrs.excessive_buffer_overrun_errors -=
dev->z_excessive_buffer_overrun_errors;
+ cntrs.vl15_dropped -= dev->z_vl15_dropped;
+ cntrs.vl15_dropped += dev->n_vl15_dropped;
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(port_select != port && port_select != 0xFF))
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
if (cntrs.symbol_error_counter > 0xFFFFUL)
- p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF);
+ p->symbol_error_counter = cpu_to_be16(0xFFFF);
else
p->symbol_error_counter =
cpu_to_be16((u16)cntrs.symbol_error_counter);
@@ -1138,17 +1066,17 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
else
p->link_downed_counter = (u8)cntrs.link_downed_counter;
if (cntrs.port_rcv_errors > 0xFFFFUL)
- p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF);
+ p->port_rcv_errors = cpu_to_be16(0xFFFF);
else
p->port_rcv_errors =
cpu_to_be16((u16) cntrs.port_rcv_errors);
if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
- p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF);
+ p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
else
p->port_rcv_remphys_errors =
cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
if (cntrs.port_xmit_discards > 0xFFFFUL)
- p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF);
+ p->port_xmit_discards = cpu_to_be16(0xFFFF);
else
p->port_xmit_discards =
cpu_to_be16((u16)cntrs.port_xmit_discards);
@@ -1156,27 +1084,27 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
cntrs.local_link_integrity_errors = 0xFUL;
if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
cntrs.excessive_buffer_overrun_errors = 0xFUL;
- p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+ p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
cntrs.excessive_buffer_overrun_errors;
- if (dev->n_vl15_dropped > 0xFFFFUL)
- p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
+ if (cntrs.vl15_dropped > 0xFFFFUL)
+ p->vl15_dropped = cpu_to_be16(0xFFFF);
else
- p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped);
+ p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
- p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
else
p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
- p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
else
p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
- p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
else
p->port_xmit_packets =
cpu_to_be32((u32)cntrs.port_xmit_packets);
if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
- p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
else
p->port_rcv_packets =
cpu_to_be32((u32) cntrs.port_rcv_packets);
@@ -1184,7 +1112,7 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
+static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters_ext *p =
@@ -1205,9 +1133,9 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(port_select != port && port_select != 0xFF))
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
p->port_xmit_data = cpu_to_be64(swords);
p->port_rcv_data = cpu_to_be64(rwords);
@@ -1221,7 +1149,7 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_set_portcounters(struct ib_perf *pmp,
+static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1264,8 +1192,10 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
dev->z_excessive_buffer_overrun_errors =
cntrs.excessive_buffer_overrun_errors;
- if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED)
+ if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
dev->n_vl15_dropped = 0;
+ dev->z_vl15_dropped = cntrs.vl15_dropped;
+ }
if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
dev->z_port_xmit_data = cntrs.port_xmit_data;
@@ -1282,7 +1212,7 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
return recv_pma_get_portcounters(pmp, ibdev, port);
}
-static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
+static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1336,10 +1266,11 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
}
/* Is the mkey in the process of expiring? */
- if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
+ if (dev->mkey_lease_timeout &&
+ time_after_eq(jiffies, dev->mkey_lease_timeout)) {
/* Clear timeout and mkey protection field. */
dev->mkey_lease_timeout = 0;
- dev->mkeyprot_resv_lmc &= 0x3F;
+ dev->mkeyprot = 0;
}
/*
@@ -1350,7 +1281,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
dev->mkey != smp->mkey &&
(smp->method == IB_MGMT_METHOD_SET ||
(smp->method == IB_MGMT_METHOD_GET &&
- (dev->mkeyprot_resv_lmc >> 7) != 0))) {
+ dev->mkeyprot >= 2))) {
if (dev->mkey_violations != 0xFFFF)
++dev->mkey_violations;
if (dev->mkey_lease_timeout ||
@@ -1430,13 +1361,17 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
goto bail;
}
+ case IB_MGMT_METHOD_TRAP:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ case IB_MGMT_METHOD_TRAP_REPRESS:
case IB_MGMT_METHOD_GET_RESP:
/*
* The ib_mad module will call us to process responses
* before checking for other consumers.
* Just tell the caller to process it normally.
*/
- ret = IB_MAD_RESULT_FAILURE;
+ ret = IB_MAD_RESULT_SUCCESS;
goto bail;
default:
smp->status |= IB_SMP_UNSUP_METHOD;
@@ -1451,19 +1386,19 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
- struct ib_perf *pmp = (struct ib_perf *)out_mad;
+ struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
int ret;
*out_mad = *in_mad;
- if (pmp->class_version != 1) {
- pmp->status |= IB_SMP_UNSUP_VERSION;
+ if (pmp->mad_hdr.class_version != 1) {
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
- switch (pmp->method) {
+ switch (pmp->mad_hdr.method) {
case IB_MGMT_METHOD_GET:
- switch (pmp->attr_id) {
+ switch (pmp->mad_hdr.attr_id) {
case IB_PMA_CLASS_PORT_INFO:
ret = recv_pma_get_classportinfo(pmp);
goto bail;
@@ -1487,13 +1422,13 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
port_num);
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
case IB_MGMT_METHOD_SET:
- switch (pmp->attr_id) {
+ switch (pmp->mad_hdr.attr_id) {
case IB_PMA_PORT_SAMPLES_CONTROL:
ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
port_num);
@@ -1507,7 +1442,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
port_num);
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
@@ -1518,10 +1453,10 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
* before checking for other consumers.
* Just tell the caller to process it normally.
*/
- ret = IB_MAD_RESULT_FAILURE;
+ ret = IB_MAD_RESULT_SUCCESS;
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METHOD;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_smp *) pmp);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index 11b7378ff21..e7327422940 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,9 +30,9 @@
* SOFTWARE.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <asm/pgtable.h>
@@ -47,6 +47,11 @@ void ipath_release_mmap_info(struct kref *ref)
{
struct ipath_mmap_info *ip =
container_of(ref, struct ipath_mmap_info, ref);
+ struct ipath_ibdev *dev = to_idev(ip->context->device);
+
+ spin_lock_irq(&dev->pending_lock);
+ list_del(&ip->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
vfree(ip->obj);
kfree(ip);
@@ -61,18 +66,16 @@ static void ipath_vma_open(struct vm_area_struct *vma)
struct ipath_mmap_info *ip = vma->vm_private_data;
kref_get(&ip->ref);
- ip->mmap_cnt++;
}
static void ipath_vma_close(struct vm_area_struct *vma)
{
struct ipath_mmap_info *ip = vma->vm_private_data;
- ip->mmap_cnt--;
kref_put(&ip->ref, ipath_release_mmap_info);
}
-static struct vm_operations_struct ipath_vm_ops = {
+static const struct vm_operations_struct ipath_vm_ops = {
.open = ipath_vma_open,
.close = ipath_vma_close,
};
@@ -88,7 +91,7 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct ipath_ibdev *dev = to_idev(context->device);
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
- struct ipath_mmap_info *ip, **pp;
+ struct ipath_mmap_info *ip, *pp;
int ret = -EINVAL;
/*
@@ -97,15 +100,16 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
* CQ, QP, or SRQ is soon followed by a call to mmap().
*/
spin_lock_irq(&dev->pending_lock);
- for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+ list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+ pending_mmaps) {
/* Only the creator is allowed to mmap the object */
- if (context != ip->context || (void *) offset != ip->obj)
+ if (context != ip->context || (__u64) offset != ip->offset)
continue;
/* Don't allow a mmap larger than the object. */
if (size > ip->size)
break;
- *pp = ip->next;
+ list_del_init(&ip->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
ret = remap_vmalloc_range(vma, ip->obj, 0);
@@ -120,3 +124,51 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
done:
return ret;
}
+
+/*
+ * Allocate information for ipath_mmap
+ */
+struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj) {
+ struct ipath_mmap_info *ip;
+
+ ip = kmalloc(sizeof *ip, GFP_KERNEL);
+ if (!ip)
+ goto bail;
+
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ INIT_LIST_HEAD(&ip->pending_mmaps);
+ ip->size = size;
+ ip->context = context;
+ ip->obj = obj;
+ kref_init(&ip->ref);
+
+bail:
+ return ip;
+}
+
+void ipath_update_mmap_info(struct ipath_ibdev *dev,
+ struct ipath_mmap_info *ip,
+ u32 size, void *obj) {
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ ip->size = size;
+ ip->obj = obj;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index b36f6fb3e37..5e61e9bff69 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,6 +31,9 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+
+#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
@@ -54,6 +57,8 @@ static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
* @acc: access flags
*
* Returns the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see ipath_dma.c).
*/
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
{
@@ -138,18 +143,19 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf;
+ mr->umem = NULL;
m = 0;
n = 0;
for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr =
- phys_to_virt(buffer_list[i].addr);
+ mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
mr->mr.map[m]->segs[n].length = buffer_list[i].size;
mr->mr.length += buffer_list[i].size;
n++;
@@ -168,54 +174,67 @@ bail:
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
- * @region: the user memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int mr_access_flags, struct ib_udata *udata)
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
{
struct ipath_mr *mr;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ struct ib_umem *umem;
+ int n, m, entry;
+ struct scatterlist *sg;
struct ib_mr *ret;
- if (region->length == 0) {
+ if (length == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
- n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list)
- n += chunk->nents;
+ umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(umem))
+ return (void *) umem;
+ n = umem->nmap;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
+ ib_umem_release(umem);
goto bail;
}
- mr->mr.user_base = region->user_base;
- mr->mr.iova = region->virt_base;
- mr->mr.length = region->length;
- mr->mr.offset = region->offset;
+ mr->mr.pd = pd;
+ mr->mr.user_base = start;
+ mr->mr.iova = virt_addr;
+ mr->mr.length = length;
+ mr->mr.offset = umem->offset;
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
+ mr->umem = umem;
m = 0;
n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list) {
- for (i = 0; i < chunk->nmap; i++) {
- mr->mr.map[m]->segs[n].vaddr =
- page_address(chunk->page_list[i].page);
- mr->mr.map[m]->segs[n].length = region->page_size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
}
}
ret = &mr->ibmr;
@@ -244,6 +263,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
i--;
kfree(mr->mr.map[i]);
}
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
kfree(mr);
return 0;
}
@@ -289,6 +312,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
+ fmr->mr.pd = pd;
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
@@ -344,7 +368,7 @@ int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
n = 0;
ps = 1 << fmr->page_shift;
for (i = 0; i < list_len; i++) {
- fmr->mr.map[m]->segs[n].vaddr = phys_to_virt(page_list[i]);
+ fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
if (++n == IPATH_SEGSZ) {
m++;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 224b0f40767..face87602dc 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,6 +32,8 @@
*/
#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
@@ -81,11 +83,51 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static u32 alloc_qpn(struct ipath_qp_table *qpt)
+
+static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
+{
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long flags;
+
+ /*
+ * Free the page if someone raced with us installing it.
+ */
+
+ spin_lock_irqsave(&qpt->lock, flags);
+ if (map->page)
+ free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+
+static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
- u32 ret;
+ u32 ret = -1;
+
+ if (type == IB_QPT_SMI)
+ ret = 0;
+ else if (type == IB_QPT_GSI)
+ ret = 1;
+
+ if (ret != -1) {
+ map = &qpt->map[0];
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map);
+ if (unlikely(!map->page)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ if (!test_and_set_bit(ret, map->page))
+ atomic_dec(&map->n_free);
+ else
+ ret = -EBUSY;
+ goto bail;
+ }
qpn = qpt->last + 1;
if (qpn >= QPN_MAX)
@@ -95,19 +137,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- unsigned long flags;
-
- /*
- * Free the page if someone raced with us
- * installing it:
- */
- spin_lock_irqsave(&qpt->lock, flags);
- if (map->page)
- free_page(page);
- else
- map->page = (void *)page;
- spin_unlock_irqrestore(&qpt->lock, flags);
+ get_map_page(qpt, map);
if (unlikely(!map->page))
break;
}
@@ -151,7 +181,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
qpn = mk_qpn(qpt, map, offset);
}
- ret = 0;
+ ret = -ENOMEM;
bail:
return ret;
@@ -180,29 +210,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
enum ib_qp_type type)
{
unsigned long flags;
- u32 qpn;
int ret;
- if (type == IB_QPT_SMI)
- qpn = 0;
- else if (type == IB_QPT_GSI)
- qpn = 1;
- else {
- /* Allocate the next available QPN */
- qpn = alloc_qpn(qpt);
- if (qpn == 0) {
- ret = -ENOMEM;
- goto bail;
- }
- }
- qp->ibqp.qp_num = qpn;
+ ret = alloc_qpn(qpt, type);
+ if (ret < 0)
+ goto bail;
+ qp->ibqp.qp_num = ret;
/* Add the QP to the hash table. */
spin_lock_irqsave(&qpt->lock, flags);
- qpn %= qpt->max;
- qp->next = qpt->table[qpn];
- qpt->table[qpn] = qp;
+ ret %= qpt->max;
+ qp->next = qpt->table[ret];
+ qpt->table[ret] = qp;
atomic_inc(&qp->refcount);
spin_unlock_irqrestore(&qpt->lock, flags);
@@ -224,7 +244,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{
struct ipath_qp *q, **qpp;
unsigned long flags;
- int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags);
@@ -235,54 +254,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
*qpp = qp->next;
qp->next = NULL;
atomic_dec(&qp->refcount);
- fnd = 1;
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
-
- if (!fnd)
- return;
-
- /* If QPN is not reserved, mark QPN free in the bitmap. */
- if (qp->ibqp.qp_num > 1)
- free_qpn(qpt, qp->ibqp.qp_num);
-
- wait_event(qp->wait, !atomic_read(&qp->refcount));
}
/**
- * ipath_free_all_qps - remove all QPs from the table
+ * ipath_free_all_qps - check for QPs still in use
* @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
*/
-void ipath_free_all_qps(struct ipath_qp_table *qpt)
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
{
unsigned long flags;
- struct ipath_qp *qp, *nqp;
- u32 n;
+ struct ipath_qp *qp;
+ u32 n, qp_inuse = 0;
+ spin_lock_irqsave(&qpt->lock, flags);
for (n = 0; n < qpt->max; n++) {
- spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n];
qpt->table[n] = NULL;
- spin_unlock_irqrestore(&qpt->lock, flags);
-
- while (qp) {
- nqp = qp->next;
- if (qp->ibqp.qp_num > 1)
- free_qpn(qpt, qp->ibqp.qp_num);
- if (!atomic_dec_and_test(&qp->refcount) ||
- !ipath_destroy_qp(&qp->ibqp))
- ipath_dbg(KERN_INFO "QP memory leak!\n");
- qp = nqp;
- }
+
+ for (; qp; qp = qp->next)
+ qp_inuse++;
}
+ spin_unlock_irqrestore(&qpt->lock, flags);
- for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
+ for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
if (qpt->map[n].page)
- free_page((unsigned long)qpt->map[n].page);
- }
+ free_page((unsigned long) qpt->map[n].page);
+ return qp_inuse;
}
/**
@@ -314,18 +319,23 @@ struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
/**
* ipath_reset_qp - initialize the QP state to the reset state
* @qp: the QP to reset
+ * @type: the QP type
*/
-static void ipath_reset_qp(struct ipath_qp *qp)
+static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
{
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ atomic_set(&qp->s_dma_busy, 0);
+ qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
+ qp->s_wqe = NULL;
+ qp->s_pkt_delay = 0;
+ qp->s_draining = 0;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
- if (qp->ibqp.qp_type == IB_QPT_RC) {
+ if (type == IB_QPT_RC) {
qp->s_state = IB_OPCODE_RC_SEND_LAST;
qp->r_state = IB_OPCODE_RC_SEND_LAST;
} else {
@@ -333,8 +343,9 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -342,62 +353,59 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
+ memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ qp->r_head_ack_queue = 0;
+ qp->s_tail_ack_queue = 0;
+ qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
}
- qp->r_reuse_sge = 0;
}
/**
- * ipath_error_qp - put a QP into an error state
- * @qp: the QP to put into an error state
+ * ipath_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
+ * @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
- * QP s_lock should be held and interrupts disabled.
+ * Returns true if last WQE event should be generated.
+ * The QP s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
*/
-void ipath_error_qp(struct ipath_qp *qp)
+int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
+ int ret = 0;
- ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
- qp->ibqp.qp_num, qp->remote_qpn);
+ if (qp->state == IB_QPS_ERR)
+ goto bail;
+
+ qp->state = IB_QPS_ERR;
spin_lock(&dev->pending_lock);
- /* XXX What if its already removed by the timeout code? */
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.status = IB_WC_WR_FLUSH_ERR;
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
- wc.qp_num = qp->ibqp.qp_num;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
-
- while (qp->s_last != qp->s_head) {
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (qp->s_last != qp->s_head)
+ ipath_schedule_send(qp);
+
+ memset(&wc, 0, sizeof(wc));
+ wc.qp = &qp->ibqp;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
+ wc.wr_id = qp->r_wr_id;
+ wc.status = err;
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->s_hdrwords = 0;
- qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+ wc.status = IB_WC_WR_FLUSH_ERR;
if (qp->r_rq.wq) {
struct ipath_rwq *wq;
@@ -414,7 +422,6 @@ void ipath_error_qp(struct ipath_qp *qp)
tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
- wc.opcode = IB_WC_RECV;
while (tail != head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
if (++tail >= qp->r_rq.size)
@@ -424,7 +431,11 @@ void ipath_error_qp(struct ipath_qp *qp)
wq->tail = tail;
spin_unlock(&qp->r_rq.lock);
- }
+ } else if (qp->ibqp.event_handler)
+ ret = 1;
+
+bail:
+ return ret;
}
/**
@@ -442,17 +453,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
- unsigned long flags;
+ int lastwqe = 0;
int ret;
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irq(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
@@ -478,35 +489,62 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->port_num > ibqp->device->phys_port_cnt)
goto inval;
- if (attr_mask & IB_QP_PATH_MTU)
- if (attr->path_mtu > IB_MTU_4096)
- goto inval;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- if (attr->max_dest_rd_atomic > 1)
- goto inval;
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
- if (attr->max_rd_atomic > 1)
- goto inval;
+ /*
+ * don't allow invalid Path MTU values or greater than 2048
+ * unless we are configured for a 4KB MTU
+ */
+ if ((attr_mask & IB_QP_PATH_MTU) &&
+ (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
+ (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
+ goto inval;
if (attr_mask & IB_QP_PATH_MIG_STATE)
if (attr->path_mig_state != IB_MIG_MIGRATED &&
attr->path_mig_state != IB_MIG_REARM)
goto inval;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+ goto inval;
+
switch (new_state) {
case IB_QPS_RESET:
- ipath_reset_qp(qp);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ spin_lock_irq(&qp->s_lock);
+ }
+ ipath_reset_qp(qp, ibqp->qp_type);
+ break;
+
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
break;
case IB_QPS_ERR:
- ipath_error_qp(qp);
+ lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
+ qp->state = new_state;
break;
-
}
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -516,7 +554,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->remote_qpn = attr->dest_qp_num;
if (attr_mask & IB_QP_SQ_PSN) {
- qp->s_next_psn = attr->sq_psn;
+ qp->s_psn = qp->s_next_psn = attr->sq_psn;
qp->s_last_psn = qp->s_next_psn - 1;
}
@@ -526,8 +564,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_ACCESS_FLAGS)
qp->qp_access_flags = attr->qp_access_flags;
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
qp->remote_ah_attr = attr->ah_attr;
+ qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
+ }
if (attr_mask & IB_QP_PATH_MTU)
qp->path_mtu = attr->path_mtu;
@@ -551,14 +591,27 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
- qp->state = new_state;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ qp->s_max_rd_atomic = attr->max_rd_atomic;
+
+ spin_unlock_irq(&qp->s_lock);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
ret = 0;
goto bail;
inval:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
ret = -EINVAL;
bail:
@@ -589,14 +642,14 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
- attr->sq_draining = 0;
- attr->max_rd_atomic = 1;
- attr->max_dest_rd_atomic = 1;
+ attr->sq_draining = qp->s_draining;
+ attr->max_rd_atomic = qp->s_max_rd_atomic;
+ attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
attr->port_num = 1;
attr->timeout = qp->timeout;
attr->retry_cnt = qp->s_retry_cnt;
- attr->rnr_retry = qp->s_rnr_retry;
+ attr->rnr_retry = qp->s_rnr_retry_cnt;
attr->alt_port_num = 0;
attr->alt_timeout = 0;
@@ -606,7 +659,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->recv_cq = qp->ibqp.recv_cq;
init_attr->srq = qp->ibqp.srq;
init_attr->cap = attr->cap;
- if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+ if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
else
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -694,27 +747,42 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
struct ipath_swqe *swq = NULL;
struct ipath_ibdev *dev;
size_t sz;
+ size_t sg_list_sz;
struct ib_qp *ret;
- if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
- init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
- init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs ||
- init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
- ret = ERR_PTR(-ENOMEM);
+ if (init_attr->create_flags) {
+ ret = ERR_PTR(-EINVAL);
goto bail;
}
- if (init_attr->cap.max_send_sge +
- init_attr->cap.max_recv_sge +
- init_attr->cap.max_send_wr +
- init_attr->cap.max_recv_wr == 0) {
+ if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
+ init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
+ /* Check receive queue parameters if no SRQ is specified. */
+ if (!init_attr->srq) {
+ if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
+ init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ if (init_attr->cap.max_send_sge +
+ init_attr->cap.max_send_wr +
+ init_attr->cap.max_recv_sge +
+ init_attr->cap.max_recv_wr == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ }
+
switch (init_attr->qp_type) {
case IB_QPT_UC:
case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
sz = sizeof(struct ipath_sge) *
init_attr->cap.max_send_sge +
sizeof(struct ipath_swqe);
@@ -723,24 +791,32 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
ret = ERR_PTR(-ENOMEM);
goto bail;
}
- /* FALLTHROUGH */
- case IB_QPT_UD:
- case IB_QPT_SMI:
- case IB_QPT_GSI:
sz = sizeof(*qp);
+ sg_list_sz = 0;
if (init_attr->srq) {
struct ipath_srq *srq = to_isrq(init_attr->srq);
- sz += sizeof(*qp->r_sg_list) *
- srq->rq.max_sge;
- } else
- sz += sizeof(*qp->r_sg_list) *
- init_attr->cap.max_recv_sge;
- qp = kmalloc(sz, GFP_KERNEL);
+ if (srq->rq.max_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (srq->rq.max_sge - 1);
+ } else if (init_attr->cap.max_recv_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (init_attr->cap.max_recv_sge - 1);
+ qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
+ if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
+ init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI)) {
+ qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
+ if (!qp->r_ud_sg_list) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ } else
+ qp->r_ud_sg_list = NULL;
if (init_attr->srq) {
sz = 0;
qp->r_rq.size = 0;
@@ -757,7 +833,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->r_rq.size * sz);
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
+ goto bail_sg_list;
}
}
@@ -769,8 +845,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
- tasklet_init(&qp->s_task, ipath_do_ruc_send,
- (unsigned long)qp);
+ init_waitqueue_head(&qp->wait_dma);
+ tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
qp->state = IB_QPS_RESET;
@@ -778,7 +854,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
- qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+ qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
else
qp->s_flags = 0;
dev = to_idev(ibpd->device);
@@ -786,10 +862,12 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
init_attr->qp_type);
if (err) {
ret = ERR_PTR(err);
- goto bail_rwq;
+ vfree(qp->r_rq.wq);
+ goto bail_sg_list;
}
qp->ip = NULL;
- ipath_reset_qp(qp);
+ qp->s_tx = NULL;
+ ipath_reset_qp(qp, init_attr->qp_type);
break;
default:
@@ -805,34 +883,34 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- struct ipath_mmap_info *ip;
- __u64 offset = (__u64) qp->r_rq.wq;
- int err;
-
- err = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_rwq;
- }
-
- if (qp->r_rq.wq) {
- /* Allocate info for ipath_mmap(). */
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip) {
+ if (!qp->r_rq.wq) {
+ __u64 offset = 0;
+
+ err = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else {
+ u32 s = sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz;
+
+ qp->ip =
+ ipath_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ qp->r_rq.wq);
+ if (!qp->ip) {
ret = ERR_PTR(-ENOMEM);
- goto bail_rwq;
+ goto bail_ip;
+ }
+
+ err = ib_copy_to_udata(udata, &(qp->ip->offset),
+ sizeof(qp->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
}
- qp->ip = ip;
- ip->context = ibpd->uobject->context;
- ip->obj = qp->r_rq.wq;
- kref_init(&ip->ref);
- ip->mmap_cnt = 0;
- ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
- qp->r_rq.size * sz);
- spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
- spin_unlock_irq(&dev->pending_lock);
}
}
@@ -846,13 +924,24 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
dev->n_qps_allocated++;
spin_unlock(&dev->n_qps_lock);
+ if (qp->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
ret = &qp->ibqp;
goto bail;
bail_ip:
- kfree(qp->ip);
-bail_rwq:
- vfree(qp->r_rq.wq);
+ if (qp->ip)
+ kref_put(&qp->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
+ ipath_free_qp(&dev->qp_table, qp);
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+bail_sg_list:
+ kfree(qp->r_ud_sg_list);
bail_qp:
kfree(qp);
bail_swq:
@@ -874,38 +963,50 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
- unsigned long flags;
- spin_lock_irqsave(&qp->s_lock, flags);
- qp->state = IB_QPS_ERR;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ /* Make sure HW and driver activity is stopped. */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ } else
+ spin_unlock_irq(&qp->s_lock);
+
+ ipath_free_qp(&dev->qp_table, qp);
+
+ if (qp->s_tx) {
+ atomic_dec(&qp->refcount);
+ if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+ kfree(qp->s_tx->txreq.map_addr);
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irq(&dev->pending_lock);
+ qp->s_tx = NULL;
+ }
+
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+ /* all user's cleaned up, mark it available */
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
spin_lock(&dev->n_qps_lock);
dev->n_qps_allocated--;
spin_unlock(&dev->n_qps_lock);
- /* Stop the sending tasklet. */
- tasklet_kill(&qp->s_task);
-
- /* Make sure the QP isn't on the timeout list. */
- spin_lock_irqsave(&dev->pending_lock, flags);
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
-
- /*
- * Make sure that the QP is not in the QPN table so receive
- * interrupts will discard packets for this QP. XXX Also remove QP
- * from multicast table.
- */
- if (atomic_read(&qp->refcount) != 0)
- ipath_free_qp(&dev->qp_table, qp);
-
if (qp->ip)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
else
vfree(qp->r_rq.wq);
+ kfree(qp->r_ud_sg_list);
vfree(qp->s_wq);
kfree(qp);
return 0;
@@ -945,49 +1046,6 @@ bail:
}
/**
- * ipath_sqerror_qp - put a QP's send queue into an error state
- * @qp: QP who's send queue will be put into an error state
- * @wc: the WC responsible for putting the QP in this state
- *
- * Flushes the send work queue.
- * The QP s_lock should be held.
- */
-
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
-{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
- qp->ibqp.qp_num, qp->remote_qpn, wc->status);
-
- spin_lock(&dev->pending_lock);
- /* XXX What if its already removed by the timeout code? */
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- spin_unlock(&dev->pending_lock);
-
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
-
- wc->status = IB_WC_WR_FLUSH_ERR;
-
- while (qp->s_last != qp->s_head) {
- wc->wr_id = wqe->wr.wr_id;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
- }
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->state = IB_QPS_SQE;
-}
-
-/**
* ipath_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush
* @aeth: the Acknowledge Extended Transport Header
@@ -1013,9 +1071,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
}
/* Restart sending if it was blocked due to lack of credits. */
- if (qp->s_cur != qp->s_head &&
+ if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
+ qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0))
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index a08654042c0..79b3dbc9717 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,12 +31,27 @@
* SOFTWARE.
*/
+#include <linux/io.h>
+
#include "ipath_verbs.h"
#include "ipath_kernel.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ipath_skip_sge(ss, len);
+ return wqe->length - len;
+}
+
/**
* ipath_init_restart- initialize the qp->s_sge after a restart
* @qp: the QP who's SGE we're restarting
@@ -47,15 +62,9 @@
static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
{
struct ipath_ibdev *dev;
- u32 len;
- len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
- ib_mtu_enum_to_int(qp->path_mtu);
- qp->s_sge.sge = wqe->sg_list[0];
- qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
- ipath_skip_sge(&qp->s_sge, len);
- qp->s_len = wqe->length - len;
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+ ib_mtu_enum_to_int(qp->path_mtu));
dev = to_idev(qp->ibqp.device);
spin_lock(&dev->pending_lock);
if (list_empty(&qp->timerwait))
@@ -70,145 +79,202 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
* @ohdr: a pointer to the IB header being constructed
* @pmtu: the path MTU
*
- * Return bth0 if constructed; otherwise, return 0.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
* Note the QP s_lock must be held.
*/
-u32 ipath_make_rc_ack(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu)
+static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr, u32 pmtu)
{
+ struct ipath_ack_entry *e;
u32 hwords;
u32 len;
u32 bth0;
+ u32 bth2;
+
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto bail;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- /*
- * Send a response. Note that we are in the responder's
- * side of the QP context.
- */
switch (qp->s_ack_state) {
- case OP(RDMA_READ_REQUEST):
- qp->s_cur_sge = &qp->s_rdma_sge;
- len = qp->s_rdma_len;
- if (len > pmtu) {
- len = pmtu;
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
- } else
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
- qp->s_rdma_len -= len;
+ case OP(RDMA_READ_RESPONSE_LAST):
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ /*
+ * We can increment the tail pointer now that the last
+ * response has been sent instead of only being
+ * constructed.
+ */
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_ONLY):
+ case OP(ACKNOWLEDGE):
+ /* Check for no next entry in the queue. */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+ if (qp->s_flags & IPATH_S_ACK_PENDING)
+ goto normal;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ goto bail;
+ }
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST)) {
+ /* Copy SGE state in case we need to resend */
+ qp->s_ack_rdma_sge = e->rdma_sge;
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ len = e->rdma_sge.sge.sge_length;
+ if (len > pmtu) {
+ len = pmtu;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+ } else {
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ e->sent = 1;
+ }
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_rdma_psn = e->psn;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+ } else {
+ /* COMPARE_SWAP or FETCH_ADD */
+ qp->s_cur_sge = NULL;
+ len = 0;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ ohdr->u.at.aeth = ipath_compute_aeth(qp);
+ ohdr->u.at.atomic_ack_eth[0] =
+ cpu_to_be32(e->atomic_data >> 32);
+ ohdr->u.at.atomic_ack_eth[1] =
+ cpu_to_be32(e->atomic_data);
+ hwords += sizeof(ohdr->u.at) / sizeof(u32);
+ bth2 = e->psn;
+ e->sent = 1;
+ }
bth0 = qp->s_ack_state << 24;
- ohdr->u.aeth = ipath_compute_aeth(qp);
- hwords++;
break;
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- qp->s_cur_sge = &qp->s_rdma_sge;
- len = qp->s_rdma_len;
+ len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
else {
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
}
- qp->s_rdma_len -= len;
bth0 = qp->s_ack_state << 24;
- break;
-
- case OP(RDMA_READ_RESPONSE_LAST):
- case OP(RDMA_READ_RESPONSE_ONLY):
- /*
- * We have to prevent new requests from changing
- * the r_sge state while a ipath_verbs_send()
- * is in progress.
- */
- qp->s_ack_state = OP(ACKNOWLEDGE);
- bth0 = 0;
- goto bail;
-
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- qp->s_cur_sge = NULL;
- len = 0;
- /*
- * Set the s_ack_state so the receive interrupt handler
- * won't try to send an ACK (out of order) until this one
- * is actually sent.
- */
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
- ohdr->u.at.aeth = ipath_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
- hwords += sizeof(ohdr->u.at) / 4;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
break;
default:
- /* Send a regular ACK. */
- qp->s_cur_sge = NULL;
- len = 0;
+ normal:
/*
- * Set the s_ack_state so the receive interrupt handler
- * won't try to send an ACK (out of order) until this one
- * is actually sent.
+ * Send a regular ACK.
+ * Set the s_ack_state so we wait until after sending
+ * the ACK before setting s_ack_state to ACKNOWLEDGE
+ * (see above).
*/
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- bth0 = OP(ACKNOWLEDGE) << 24;
+ qp->s_ack_state = OP(SEND_ONLY);
+ qp->s_flags &= ~IPATH_S_ACK_PENDING;
+ qp->s_cur_sge = NULL;
if (qp->s_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
- (qp->s_nak_state <<
- IPATH_AETH_CREDIT_SHIFT));
+ ohdr->u.aeth =
+ cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->s_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
+ len = 0;
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
}
qp->s_hdrwords = hwords;
qp->s_cur_size = len;
+ ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
+ return 1;
bail:
- return bth0;
+ return 0;
}
/**
* ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- * @bth0p: pointer to the BTH opcode word
- * @bth2p: pointer to the BTH PSN word
*
* Return 1 if constructed; otherwise, return 0.
- * Note the QP s_lock must be held and interrupts disabled.
*/
-int ipath_make_rc_req(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu, u32 *bth0p, u32 *bth2p)
+int ipath_make_rc_req(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_other_headers *ohdr;
struct ipath_sge_state *ss;
struct ipath_swqe *wqe;
u32 hwords;
u32 len;
u32 bth0;
u32 bth2;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
char newreq;
+ unsigned long flags;
+ int ret = 0;
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
+
+ /*
+ * The lock is needed to synchronize between the sending tasklet,
+ * the receive interrupt handler, and timeout resends.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Sending responses has higher priority over sending requests. */
+ if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != OP(ACKNOWLEDGE)) &&
+ ipath_make_rc_ack(dev, qp, ohdr, pmtu))
+ goto done;
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
- qp->s_rnr_timeout)
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
+ }
+
+ /* Leave BUSY set until RNR timeout. */
+ if (qp->s_rnr_timeout) {
+ qp->s_flags |= IPATH_S_WAITING;
+ goto bail;
+ }
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- bth0 = 0;
+ bth0 = 1 << 22; /* Set M bit */
/* Send a request. */
wqe = get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) {
default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/*
* Resend an old request or start a new one.
*
@@ -220,8 +286,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
- goto done;
- qp->s_psn = wqe->psn = qp->s_next_psn;
+ goto bail;
+ /*
+ * If a fence is requested, wait for previous
+ * RDMA read and atomic operations to finish.
+ */
+ if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ qp->s_num_rd_atomic) {
+ qp->s_flags |= IPATH_S_FENCE_PENDING;
+ goto bail;
+ }
+ wqe->psn = qp->s_next_psn;
newreq = 1;
}
/*
@@ -229,10 +304,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
* original work request since we may need to resend
* it.
*/
- qp->s_sge.sge = wqe->sg_list[0];
- qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
- qp->s_len = len = wqe->length;
+ len = wqe->length;
ss = &qp->s_sge;
bth2 = 0;
switch (wqe->wr.opcode) {
@@ -240,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
- goto done;
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -254,7 +328,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
else {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -271,14 +345,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
- goto done;
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
- hwords += sizeof(struct ib_reth) / 4;
+ hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -292,7 +368,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
- ohdr->u.rc.imm_data = wqe->wr.imm_data;
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
@@ -303,14 +379,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
break;
case IB_WR_RDMA_READ:
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
- ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
- ohdr->u.rc.reth.length = cpu_to_be32(len);
- qp->s_state = OP(RDMA_READ_REQUEST);
- hwords += sizeof(ohdr->u.rc.reth) / 4;
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
if (qp->s_lsn != (u32) -1)
qp->s_lsn++;
/*
@@ -321,6 +400,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
}
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
ss = NULL;
len = 0;
if (++qp->s_cur == qp->s_size)
@@ -329,41 +415,66 @@ int ipath_make_rc_req(struct ipath_qp *qp,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
- qp->s_state = OP(COMPARE_SWAP);
- else
- qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.vaddr = cpu_to_be64(
- wqe->wr.wr.atomic.remote_addr);
- ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
- hwords += sizeof(struct ib_atomic_eth) / 4;
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
if (qp->s_lsn != (u32) -1)
qp->s_lsn++;
wqe->lpsn = wqe->psn;
}
- if (++qp->s_cur == qp->s_size)
- qp->s_cur = 0;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_state = OP(COMPARE_SWAP);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.swap);
+ ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ } else {
+ qp->s_state = OP(FETCH_ADD);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ ohdr->u.atomic_eth.compare_data = 0;
+ }
+ ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr >> 32);
+ ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr);
+ ohdr->u.atomic_eth.rkey = cpu_to_be32(
+ wqe->wr.wr.atomic.rkey);
+ hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
break;
default:
- goto done;
+ goto bail;
}
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_len = wqe->length;
if (newreq) {
qp->s_tail++;
if (qp->s_tail >= qp->s_size)
qp->s_tail = 0;
}
- bth2 |= qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
+ bth2 |= qp->s_psn & IPATH_PSN_MASK;
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ qp->s_psn = wqe->lpsn + 1;
+ else {
+ qp->s_psn++;
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ }
/*
* Put the QP on the pending list so lost ACKs will cause
* a retry. More than one request can be pending so the
@@ -388,17 +499,11 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -407,7 +512,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -430,17 +535,11 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -449,7 +548,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
else {
qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
@@ -473,41 +572,32 @@ int ipath_make_rc_req(struct ipath_qp *qp,
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
- hwords += sizeof(ohdr->u.rc.reth) / 4;
- bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ bth2 = qp->s_psn & IPATH_PSN_MASK;
+ qp->s_psn = wqe->lpsn + 1;
ss = NULL;
len = 0;
qp->s_cur++;
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
-
- case OP(RDMA_READ_REQUEST):
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- /*
- * We shouldn't start anything new until this request is
- * finished. The ACK will handle rescheduling us. XXX The
- * number of outstanding ones is negotiated at connection
- * setup time (see pg. 258,289)? XXX Also, if we support
- * multiple outstanding requests, we need to check the WQE
- * IB_SEND_FENCE flag and not send a new request if a RDMA
- * read or atomic is pending.
- */
- goto done;
}
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+ bth2 |= 1 << 31; /* Request ACK. */
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = ss;
qp->s_cur_size = len;
- *bth0p = bth0 | (qp->s_state << 24);
- *bth2p = bth2;
- return 1;
-
+ ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
- return 0;
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
}
/**
@@ -516,16 +606,48 @@ done:
*
* This is called from ipath_rc_rcv() and only uses the receive
* side QP state.
- * Note that RDMA reads are handled in the send side QP state and tasklet.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
*/
static void send_rc_ack(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_devdata *dd;
u16 lrh0;
u32 bth0;
u32 hwords;
+ u32 __iomem *piobuf;
struct ipath_ib_header hdr;
struct ipath_other_headers *ohdr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+ if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != OP(ACKNOWLEDGE))
+ goto queue_ack;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Don't try to send ACKs if the link isn't ACTIVE */
+ dd = dev->dd;
+ if (!(dd->ipath_flags & IPATH_LINKACTIVE))
+ goto done;
+
+ piobuf = ipath_getpiobuf(dd, 0, NULL);
+ if (!piobuf) {
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ goto queue_ack;
+ }
/* Construct the header. */
ohdr = &hdr.u.oth;
@@ -540,57 +662,54 @@ static void send_rc_ack(struct ipath_qp *qp)
lrh0 = IPATH_LRH_GRH;
}
/* read pkey_index w/o lock (its atomic) */
- bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
+ (OP(ACKNOWLEDGE) << 24) | (1 << 22);
if (qp->r_nak_state)
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
(qp->r_nak_state <<
IPATH_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = ipath_compute_aeth(qp);
- if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
- bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
- hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
- } else
- bth0 |= OP(ACKNOWLEDGE) << 24;
lrh0 |= qp->remote_ah_attr.sl << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+ hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
+ qp->remote_ah_attr.src_path_bits);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
- /*
- * If we can send the ACK, clear the ACK state.
- */
- if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
- qp->r_ack_state = OP(ACKNOWLEDGE);
- dev->n_unicast_xmit++;
- } else {
- /*
- * We are out of PIO buffers at the moment.
- * Pass responsibility for sending the ACK to the
- * send tasklet so that when a PIO buffer becomes
- * available, the ACK is sent ahead of other outgoing
- * packets.
- */
+ writeq(hwords + 1, piobuf);
+
+ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+ u32 *hdrp = (u32 *) &hdr;
+
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
+ } else
+ __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
+
+ ipath_flush_wc();
+
+ dev->n_unicast_xmit++;
+ goto done;
+
+queue_ack:
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
dev->n_rc_qacks++;
- spin_lock_irq(&qp->s_lock);
- /* Don't coalesce if a RDMA read or atomic is pending. */
- if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
- qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
- qp->s_ack_state = qp->r_ack_state;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
- qp->r_ack_state = OP(ACKNOWLEDGE);
- }
- spin_unlock_irq(&qp->s_lock);
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
}
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return;
}
/**
@@ -647,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
/*
* Set the state to restart in the middle of a request.
* Don't change the s_sge, s_cur_sge, or s_cur_size.
- * See ipath_do_rc_send().
+ * See ipath_make_rc_req().
*/
switch (opcode) {
case IB_WR_SEND:
@@ -683,32 +802,14 @@ done:
*
* The QP s_lock should be held and interrupts disabled.
*/
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
{
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
struct ipath_ibdev *dev;
- /*
- * If there are no requests pending, we are done.
- */
- if (ipath_cmp24(psn, qp->s_next_psn) >= 0 ||
- qp->s_last == qp->s_tail)
- goto done;
-
if (qp->s_retry == 0) {
- wc->wr_id = wqe->wr.wr_id;
- wc->status = IB_WC_RETRY_EXC_ERR;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc->vendor_err = 0;
- wc->byte_len = 0;
- wc->qp_num = qp->ibqp.qp_num;
- wc->src_qp = qp->remote_qpn;
- wc->pkey_index = 0;
- wc->slid = qp->remote_ah_attr.dlid;
- wc->sl = qp->remote_ah_attr.sl;
- wc->dlid_path_bits = 0;
- wc->port_num = 0;
- ipath_sqerror_qp(qp, wc);
+ ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
goto bail;
}
qp->s_retry--;
@@ -721,22 +822,27 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
if (wqe->wr.opcode == IB_WR_RDMA_READ)
dev->n_rc_resends++;
else
- dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+ dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
-
-done:
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
bail:
return;
}
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
+{
+ qp->s_last_psn = psn;
+}
+
/**
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
@@ -748,12 +854,16 @@ bail:
* Called at interrupt level with the QP s_lock held and interrupts disabled.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
+static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
+ u64 val)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
+ enum ib_wc_status status;
struct ipath_swqe *wqe;
int ret = 0;
+ u32 ack_psn;
+ int diff;
/*
* Remove the QP from the timeout queue (or RNR timeout queue).
@@ -772,20 +882,28 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* before the NAK'ed request. The MSN won't include the NAK'ed
* request but will include an ACK'ed request(s).
*/
+ ack_psn = psn;
+ if (aeth >> 29)
+ ack_psn--;
wqe = get_swqe_ptr(qp, qp->s_last);
- /* Nothing is pending to ACK/NAK. */
- if (qp->s_last == qp->s_tail)
- goto bail;
-
/*
* The MSN might be for a later WQE than the PSN indicates so
* only complete WQEs that the PSN finishes.
*/
- while (ipath_cmp24(psn, wqe->lpsn) >= 0) {
- /* If we are ACKing a WQE, the MSN should be >= the SSN. */
- if (ipath_cmp24(aeth, wqe->ssn) < 0)
- break;
+ while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+ /*
+ * RDMA_READ_RESPONSE_ONLY is a special case since
+ * we want to generate completion events for everything
+ * before the RDMA read, copy the data, then generate
+ * the completion for the read.
+ */
+ if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+ opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+ diff == 0) {
+ ret = 1;
+ goto bail;
+ }
/*
* If this request is a RDMA read or atomic, and the ACK is
* for a later operation, this ACK NAKs the RDMA read or
@@ -796,39 +914,49 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* is sent but before the response is received.
*/
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
- opcode != OP(RDMA_READ_RESPONSE_LAST)) ||
+ (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
- (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
- ipath_cmp24(wqe->psn, psn) != 0))) {
+ (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
/*
* The last valid PSN seen is the previous
* request's.
*/
- qp->s_last_psn = wqe->psn - 1;
+ update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
- ipath_restart_rc(qp, wqe->psn, &wc);
+ ipath_restart_rc(qp, wqe->psn);
/*
* No need to process the ACK/NAK since we are
* restarting an earlier request.
*/
goto bail;
}
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ *(u64 *) wqe->sg_list[0].vaddr = val;
+ if (qp->s_num_rd_atomic &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+ qp->s_num_rd_atomic--;
+ /* Restart sending task if fence is complete */
+ if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) ||
+ qp->s_flags & IPATH_S_RDMAR_PENDING)
+ ipath_schedule_send(qp);
+ }
/* Post a send completion queue entry if requested. */
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
}
qp->s_retry = qp->s_retry_cnt;
@@ -840,15 +968,21 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
if (qp->s_last == qp->s_cur) {
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
+ qp->s_last = qp->s_cur;
+ if (qp->s_last == qp->s_tail)
+ break;
wqe = get_swqe_ptr(qp, qp->s_cur);
qp->s_state = OP(SEND_LAST);
qp->s_psn = wqe->psn;
+ } else {
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
+ qp->s_draining = 0;
+ if (qp->s_last == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, qp->s_last);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
- if (qp->s_last == qp->s_tail)
- break;
}
switch (aeth >> 29) {
@@ -857,35 +991,49 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
/* If this is a partial ACK, reset the retransmit timer. */
if (qp->s_last != qp->s_tail) {
spin_lock(&dev->pending_lock);
- list_add_tail(&qp->timerwait,
- &dev->pending[dev->pending_index]);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
+ /*
+ * If we get a partial ACK for a resent operation,
+ * we can stop resending the earlier packets and
+ * continue with the next packet the receiver wants.
+ */
+ if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ reset_psn(qp, psn + 1);
+ ipath_schedule_send(qp);
+ }
+ } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
}
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
- qp->s_last_psn = psn;
+ update_last_psn(qp, psn);
ret = 1;
goto bail;
case 1: /* RNR NAK */
dev->n_rnr_naks++;
+ if (qp->s_last == qp->s_tail)
+ goto bail;
if (qp->s_rnr_retry == 0) {
- if (qp->s_last == qp->s_tail)
- goto bail;
-
- wc.status = IB_WC_RNR_RETRY_EXC_ERR;
+ status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7)
qp->s_rnr_retry--;
- if (qp->s_last == qp->s_tail)
- goto bail;
/* The last valid PSN is the previous PSN. */
- qp->s_last_psn = psn - 1;
+ update_last_psn(qp, psn - 1);
- dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ dev->n_rc_resends++;
+ else
+ dev->n_rc_resends +=
+ (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
@@ -893,58 +1041,43 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK];
ipath_insert_rnr_queue(qp);
+ ipath_schedule_send(qp);
goto bail;
case 3: /* NAK */
- /* The last valid PSN seen is the previous request's. */
- if (qp->s_last != qp->s_tail)
- qp->s_last_psn = wqe->psn - 1;
+ if (qp->s_last == qp->s_tail)
+ goto bail;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
dev->n_seq_naks++;
/*
- * Back up to the responder's expected PSN. XXX
+ * Back up to the responder's expected PSN.
* Note that we might get a NAK in the middle of an
* RDMA READ response which terminates the RDMA
* READ.
*/
- if (qp->s_last == qp->s_tail)
- break;
-
- if (ipath_cmp24(psn, wqe->psn) < 0)
- break;
-
- /* Retry the request. */
- ipath_restart_rc(qp, psn, &wc);
+ ipath_restart_rc(qp, psn);
break;
case 1: /* Invalid Request */
- wc.status = IB_WC_REM_INV_REQ_ERR;
+ status = IB_WC_REM_INV_REQ_ERR;
dev->n_other_naks++;
goto class_b;
case 2: /* Remote Access Error */
- wc.status = IB_WC_REM_ACCESS_ERR;
+ status = IB_WC_REM_ACCESS_ERR;
dev->n_other_naks++;
goto class_b;
case 3: /* Remote Operation Error */
- wc.status = IB_WC_REM_OP_ERR;
+ status = IB_WC_REM_OP_ERR;
dev->n_other_naks++;
class_b:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.qp_num = qp->ibqp.qp_num;
- wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_sqerror_qp(qp, &wc);
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
@@ -989,14 +1122,20 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
u32 psn, u32 hdrsize, u32 pmtu,
int header_in_data)
{
+ struct ipath_swqe *wqe;
+ enum ib_wc_status status;
unsigned long flags;
- struct ib_wc wc;
int diff;
u32 pad;
u32 aeth;
+ u64 val;
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto ack_done;
+
/* Ignore invalid responses. */
if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
goto ack_done;
@@ -1018,6 +1157,11 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done;
}
+ if (unlikely(qp->s_last == qp->s_tail))
+ goto ack_done;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ status = IB_WC_SUCCESS;
+
switch (opcode) {
case OP(ACKNOWLEDGE):
case OP(ATOMIC_ACKNOWLEDGE):
@@ -1028,103 +1172,143 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
aeth = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
}
- if (opcode == OP(ATOMIC_ACKNOWLEDGE))
- *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
- if (!do_rc_ack(qp, aeth, psn, opcode) ||
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+ if (!header_in_data) {
+ __be32 *p = ohdr->u.at.atomic_ack_eth;
+
+ val = ((u64) be32_to_cpu(p[0]) << 32) |
+ be32_to_cpu(p[1]);
+ } else
+ val = be64_to_cpu(((__be64 *) data)[0]);
+ } else
+ val = 0;
+ if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
hdrsize += 4;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
/*
- * do_rc_ack() has already checked the PSN so skip
- * the sequence check.
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
*/
- goto rdma_read;
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_middle;
case OP(RDMA_READ_RESPONSE_MIDDLE):
/* no AETH, no ACK */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done;
}
- rdma_read:
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ read_middle:
if (unlikely(tlen != (hdrsize + pmtu + 4)))
- goto ack_done;
- if (unlikely(pmtu >= qp->s_len))
- goto ack_done;
+ goto ack_len_err;
+ if (unlikely(pmtu >= qp->s_rdma_read_len))
+ goto ack_len_err;
+
/* We got a response so update the timeout. */
- if (unlikely(qp->s_last == qp->s_tail ||
- get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
- IB_WR_RDMA_READ))
- goto ack_done;
spin_lock(&dev->pending_lock);
if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
list_move_tail(&qp->timerwait,
&dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
+
+ if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
+ qp->s_retry = qp->s_retry_cnt;
+
/*
* Update the RDMA receive state but do the copy w/o
* holding the locks and blocking interrupts.
- * XXX Yet another place that affects relaxed RDMA order
- * since we don't want s_sge modified.
*/
- qp->s_len -= pmtu;
- qp->s_last_psn = psn;
+ qp->s_rdma_read_len -= pmtu;
+ update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- ipath_copy_sge(&qp->s_sge, data, pmtu);
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
goto bail;
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ if (!header_in_data)
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ else
+ aeth = be32_to_cpu(((__be32 *) data)[0]);
+ if (!do_rc_ack(qp, aeth, psn, opcode, 0))
+ goto ack_done;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 0 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen < (hdrsize + pad + 8)))
+ goto ack_len_err;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_last;
+
case OP(RDMA_READ_RESPONSE_LAST):
/* ACKs READ req. */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done;
}
- /* FALLTHROUGH */
- case OP(RDMA_READ_RESPONSE_ONLY):
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
- /*
- * Get the number of bytes the message was padded by.
- */
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for the AETH header (4) and
* ICRC (4).
*/
- if (unlikely(tlen <= (hdrsize + pad + 8))) {
- /* XXX Need to generate an error CQ entry. */
- goto ack_done;
- }
+ if (unlikely(tlen <= (hdrsize + pad + 8)))
+ goto ack_len_err;
+ read_last:
tlen -= hdrsize + pad + 8;
- if (unlikely(tlen != qp->s_len)) {
- /* XXX Need to generate an error CQ entry. */
- goto ack_done;
- }
+ if (unlikely(tlen != qp->s_rdma_read_len))
+ goto ack_len_err;
if (!header_in_data)
aeth = be32_to_cpu(ohdr->u.aeth);
else {
aeth = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
}
- ipath_copy_sge(&qp->s_sge, data, tlen);
- if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
- /*
- * Change the state so we contimue
- * processing new requests and wake up the
- * tasklet if there are posted sends.
- */
- qp->s_state = OP(SEND_LAST);
- if (qp->s_tail != qp->s_head)
- tasklet_hi_schedule(&qp->s_task);
- }
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+ (void) do_rc_ack(qp, aeth, psn,
+ OP(RDMA_READ_RESPONSE_LAST), 0);
goto ack_done;
}
+ack_op_err:
+ status = IB_WC_LOC_QP_OP_ERR;
+ goto ack_err;
+
+ack_len_err:
+ status = IB_WC_LOC_LEN_ERR;
+ack_err:
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
bail:
@@ -1146,7 +1330,7 @@ bail:
* incoming RC packet for the given QP.
* Called at interrupt level.
* Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent and the s_lock unlocked.
+ * schedule a response to be sent.
*/
static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
struct ipath_other_headers *ohdr,
@@ -1157,25 +1341,24 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
int diff,
int header_in_data)
{
- struct ib_reth *reth;
+ struct ipath_ack_entry *e;
+ u8 i, prev;
+ int old_req;
+ unsigned long flags;
if (diff > 0) {
/*
* Packet sequence error.
* A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or
- * NAK is pending though.
+ * Don't queue the NAK if we already sent one.
*/
- if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
- qp->r_nak_state != 0)
- goto done;
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- qp->r_ack_state = OP(SEND_ONLY);
+ if (!qp->r_nak_state) {
qp->r_nak_state = IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
}
- goto send_ack;
+ goto done;
}
/*
@@ -1188,8 +1371,54 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* can coalesce an outstanding duplicate ACK. We have to
* send the earliest so that RDMA reads can be restarted at
* the requester's expected PSN.
+ *
+ * First, find where this duplicate PSN falls within the
+ * ACKs previously sent.
*/
- if (opcode == OP(RDMA_READ_REQUEST)) {
+ psn &= IPATH_PSN_MASK;
+ e = NULL;
+ old_req = 1;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock_done;
+
+ for (i = qp->r_head_ack_queue; ; i = prev) {
+ if (i == qp->s_tail_ack_queue)
+ old_req = 0;
+ if (i)
+ prev = i - 1;
+ else
+ prev = IPATH_MAX_RDMA_ATOMIC;
+ if (prev == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[prev];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (ipath_cmp24(psn, e->psn) >= 0) {
+ if (prev == qp->s_tail_ack_queue)
+ old_req = 0;
+ break;
+ }
+ }
+ switch (opcode) {
+ case OP(RDMA_READ_REQUEST): {
+ struct ib_reth *reth;
+ u32 offset;
+ u32 len;
+
+ /*
+ * If we didn't find the RDMA read request in the ack queue,
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+ goto unlock_done;
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1198,88 +1427,131 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
data += sizeof(*reth);
}
/*
- * If we receive a duplicate RDMA request, it means the
- * requester saw a sequence error and needs to restart
- * from an earlier point. We can abort the current
- * RDMA read send in that case.
+ * Address range must be a subset of the original
+ * request and start on pmtu boundaries.
+ * We reuse the old ack_queue slot since the requester
+ * should not back up and request an earlier PSN for the
+ * same request.
*/
- spin_lock_irq(&qp->s_lock);
- if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
- (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
- /*
- * We are already sending earlier requested data.
- * Don't abort it to send later out of sequence data.
- */
- spin_unlock_irq(&qp->s_lock);
- goto done;
- }
- qp->s_rdma_len = be32_to_cpu(reth->length);
- if (qp->s_rdma_len != 0) {
+ offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+ ib_mtu_enum_to_int(qp->path_mtu);
+ len = be32_to_cpu(reth->length);
+ if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+ goto unlock_done;
+ if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
- /*
- * Address range must be a subset of the original
- * request and start on pmtu boundaries.
- */
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
- qp->s_rdma_len, vaddr, rkey,
+ ok = ipath_rkey_ok(qp, &e->rdma_sge,
+ len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok)) {
- spin_unlock_irq(&qp->s_lock);
- goto done;
- }
+ if (unlikely(!ok))
+ goto unlock_done;
} else {
- qp->s_rdma_sge.sg_list = NULL;
- qp->s_rdma_sge.num_sge = 0;
- qp->s_rdma_sge.sge.mr = NULL;
- qp->s_rdma_sge.sge.vaddr = NULL;
- qp->s_rdma_sge.sge.length = 0;
- qp->s_rdma_sge.sge.sge_length = 0;
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
}
- qp->s_ack_state = opcode;
- qp->s_ack_psn = psn;
- spin_unlock_irq(&qp->s_lock);
- tasklet_hi_schedule(&qp->s_task);
- goto send_ack;
+ e->psn = psn;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
}
- /*
- * A pending RDMA read will ACK anything before it so
- * ignore earlier duplicate requests.
- */
- if (qp->s_ack_state != OP(ACKNOWLEDGE))
- goto done;
-
- /*
- * If an ACK is pending, don't replace the pending ACK
- * with an earlier one since the later one will ACK the earlier.
- * Also, if we already have a pending atomic, send it.
- */
- if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
- (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
- qp->r_ack_state >= OP(COMPARE_SWAP)))
- goto send_ack;
- switch (opcode) {
case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
+ case OP(FETCH_ADD): {
/*
- * Check for the PSN of the last atomic operation
- * performed and resend the result if found.
+ * If we didn't find the atomic request in the ack queue
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
*/
- if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
- goto done;
+ if (!e || e->opcode != (u8) opcode || old_req)
+ goto unlock_done;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ default:
+ if (old_req)
+ goto unlock_done;
+ /*
+ * Resend the most recent ACK if this request is
+ * after all the previous RDMA reads and atomics.
+ */
+ if (i == qp->r_head_ack_queue) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->r_psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Try to send a simple ACK to work around a Mellanox bug
+ * which doesn't accept a RDMA read response or atomic
+ * response as an ACK for earlier SENDs or RDMA writes.
+ */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
+ !(qp->s_flags & IPATH_S_ACK_PENDING) &&
+ qp->s_ack_state == OP(ACKNOWLEDGE)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Resend the RDMA read or atomic op which
+ * ACKs this duplicate request.
+ */
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = i;
break;
}
- qp->r_ack_state = opcode;
qp->r_nak_state = 0;
- qp->r_ack_psn = psn;
-send_ack:
- return 0;
+ ipath_schedule_send(qp);
+unlock_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
done:
return 1;
+
+send_ack:
+ return 0;
+}
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+ unsigned long flags;
+ int lastwqe;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ lastwqe = ipath_error_qp(qp, err);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+ unsigned next;
+
+ next = n + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ if (n == qp->s_tail_ack_queue) {
+ qp->s_tail_ack_queue = next;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ }
}
/**
@@ -1308,6 +1580,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int diff;
struct ib_reth *reth;
int header_in_data;
+ unsigned long flags;
+
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
/* Check for GRH */
if (!has_grh) {
@@ -1362,20 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
- nack_inv:
- /*
- * A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or NAK
- * is pending though.
- */
- if (qp->r_ack_state >= OP(COMPARE_SWAP))
- goto send_ack;
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
- qp->r_ack_state = OP(SEND_ONLY);
- qp->r_nak_state = IB_NAK_INVALID_REQUEST;
- qp->r_ack_psn = qp->r_psn;
- goto send_ack;
+ goto nack_inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
@@ -1401,26 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
break;
}
- wc.imm_data = 0;
- wc.wc_flags = 0;
+ memset(&wc, 0, sizeof wc);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- if (!ipath_get_rwqe(qp, 0)) {
- rnr_nak:
- /*
- * A RNR NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read or atomic
- * is pending though.
- */
- if (qp->r_ack_state >= OP(COMPARE_SWAP))
- goto send_ack;
- qp->r_ack_state = OP(SEND_ONLY);
- qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
- qp->r_ack_psn = qp->r_psn;
- goto send_ack;
- }
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
qp->r_rcv_len = 0;
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
@@ -1452,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
+ wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
- wc.imm_data = ohdr->u.imm_data;
+ wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
@@ -1477,29 +1728,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
- if (opcode == OP(RDMA_WRITE_LAST) ||
- opcode == OP(RDMA_WRITE_ONLY))
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
- __constant_cpu_to_be32(1 << 23)) != 0);
+ cpu_to_be32(1 << 23)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE)))
+ goto nack_inv;
/* consume RWQE */
/* RETH comes after BTH */
if (!header_in_data)
@@ -1517,7 +1770,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->r_sge,
+ ok = ipath_rkey_ok(qp, &qp->r_sge,
qp->r_len, vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
@@ -1529,9 +1782,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE)))
- goto nack_acc;
if (opcode == OP(RDMA_WRITE_FIRST))
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
@@ -1540,7 +1790,27 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto rnr_nak;
goto send_last_imm;
- case OP(RDMA_READ_REQUEST):
+ case OP(RDMA_READ_REQUEST): {
+ struct ipath_ack_entry *e;
+ u32 len;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_READ)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ ipath_update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1548,139 +1818,152 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto nack_acc;
- spin_lock_irq(&qp->s_lock);
- qp->s_rdma_len = be32_to_cpu(reth->length);
- if (qp->s_rdma_len != 0) {
+ len = be32_to_cpu(reth->length);
+ if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
- qp->s_rdma_len, vaddr, rkey,
- IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok)) {
- spin_unlock_irq(&qp->s_lock);
- goto nack_acc;
- }
+ ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto nack_acc_unlck;
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (qp->s_rdma_len > pmtu)
- qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
+ if (len > pmtu)
+ qp->r_psn += (len - 1) / pmtu;
} else {
- qp->s_rdma_sge.sg_list = NULL;
- qp->s_rdma_sge.num_sge = 0;
- qp->s_rdma_sge.sge.mr = NULL;
- qp->s_rdma_sge.sge.vaddr = NULL;
- qp->s_rdma_sge.sge.length = 0;
- qp->s_rdma_sge.sge.sge_length = 0;
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
}
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn;
/*
* We need to increment the MSN here instead of when we
* finish sending the result since a duplicate request would
* increment it more than once.
*/
qp->r_msn++;
-
- qp->s_ack_state = opcode;
- qp->s_ack_psn = psn;
- spin_unlock_irq(&qp->s_lock);
-
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
+ }
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
+ struct ipath_ack_entry *e;
u64 vaddr;
+ atomic64_t *maddr;
u64 sdata;
u32 rkey;
+ u8 next;
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ ipath_update_ack_queue(qp, next);
+ }
if (!header_in_data)
ateth = &ohdr->u.atomic_eth;
- else {
+ else
ateth = (struct ib_atomic_eth *)data;
- data += sizeof(*ateth);
- }
- vaddr = be64_to_cpu(ateth->vaddr);
+ vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
- goto nack_inv;
+ goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
+ goto nack_acc_unlck;
/* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
- spin_lock_irq(&dev->pending_lock);
- qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
- if (opcode == OP(FETCH_ADD))
- *(u64 *) qp->r_sge.sge.vaddr =
- qp->r_atomic_data + sdata;
- else if (qp->r_atomic_data ==
- be64_to_cpu(ateth->compare_data))
- *(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock_irq(&dev->pending_lock);
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ be64_to_cpu(ateth->compare_data),
+ sdata);
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn & IPATH_PSN_MASK;
qp->r_msn++;
- qp->r_atomic_psn = psn & IPATH_PSN_MASK;
- psn |= 1 << 31;
- break;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+
+ goto unlock;
}
default:
- /* Drop packet for unknown opcodes. */
- goto done;
+ /* NAK unknown opcodes. */
+ goto nack_inv;
}
qp->r_psn++;
qp->r_state = opcode;
+ qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & (1 << 31)) {
- /*
- * Coalesce ACKs unless there is a RDMA READ or
- * ATOMIC pending.
- */
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- qp->r_ack_state = opcode;
- qp->r_ack_psn = psn;
- }
+ if (psn & (1 << 31))
goto send_ack;
- }
goto done;
+rnr_nak:
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- /*
- * A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or NAK
- * is pending though.
- */
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
- qp->r_ack_state = OP(RDMA_WRITE_ONLY);
- qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
- qp->r_ack_psn = qp->r_psn;
- }
+ ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
send_ack:
- /* Send ACK right away unless the send tasklet has a pending ACK. */
- if (qp->s_ack_state == OP(ACKNOWLEDGE))
- send_rc_ack(qp);
+ send_rc_ack(qp);
+ goto done;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
done:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 6e23b3d632b..8f44d0cf383 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -63,81 +63,139 @@
/* kr_control bits */
#define INFINIPATH_C_FREEZEMODE 0x00000002
#define INFINIPATH_C_LINKENABLE 0x00000004
-#define INFINIPATH_C_RESET 0x00000001
/* kr_sendctrl bits */
#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
+#define INFINIPATH_S_UPDTHRESH_SHIFT 24
+#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
#define IPATH_S_ABORT 0
#define IPATH_S_PIOINTBUFAVAIL 1
#define IPATH_S_PIOBUFAVAILUPD 2
#define IPATH_S_PIOENABLE 3
+#define IPATH_S_SDMAINTENABLE 9
+#define IPATH_S_SDMASINGLEDESCRIPTOR 10
+#define IPATH_S_SDMAENABLE 11
+#define IPATH_S_SDMAHALT 12
#define IPATH_S_DISARM 31
#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT)
#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL)
#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD)
#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE)
+#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE)
+#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
+ (1U << IPATH_S_SDMASINGLEDESCRIPTOR)
+#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE)
+#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT)
#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM)
-/* kr_rcvctrl bits */
+/* kr_rcvctrl bits that are the same on multiple chips */
#define INFINIPATH_R_PORTENABLE_SHIFT 0
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD 0x80000000
+#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-#define INFINIPATH_I_ERROR 0x80000000
-#define INFINIPATH_I_SPIOSENT 0x40000000
-#define INFINIPATH_I_SPIOBUFAVAIL 0x20000000
-#define INFINIPATH_I_GPIO 0x10000000
+#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL
+#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL
+#define INFINIPATH_I_ERROR 0x0000000080000000ULL
+#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL
+#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL
+#define INFINIPATH_I_GPIO 0x0000000010000000ULL
+#define INFINIPATH_I_JINT 0x0000000004000000ULL
/* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
-#define INFINIPATH_E_RICRC 0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
-#define INFINIPATH_E_REBP 0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
-#define INFINIPATH_E_RHDR 0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
-#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
-#define INFINIPATH_E_RESET 0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
+#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
+#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
+#define INFINIPATH_E_RICRC 0x0000000000000004ULL
+#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
+#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
+#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
+#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
+#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
+#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
+#define INFINIPATH_E_REBP 0x0000000000000200ULL
+#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
+#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
+#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
+#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
+#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
+#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
+#define INFINIPATH_E_RHDR 0x0000000000010000ULL
+#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
+#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL
+#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL
+#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
+#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
+#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
+#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
+#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
+#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
+#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
+#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
+#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
+#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL
+#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL
+#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL
+#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL
+#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL
+#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL
+#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL
+#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL
+#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL
+#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL
+#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
+#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
+#define INFINIPATH_E_RESET 0x0004000000000000ULL
+#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
+#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL
+#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL
+
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+ | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+ | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+ | INFINIPATH_E_REBP )
+
+/* Convenience for decoding Send DMA errors */
+#define INFINIPATH_E_SDMAERRS ( \
+ INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
+ INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
+ INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
+ INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
+ INFINIPATH_E_SDMAUNEXPDATA | \
+ INFINIPATH_E_SDMADESCADDRMISALIGN | \
+ INFINIPATH_E_SDMADISABLED | \
+ INFINIPATH_E_SENDBUFMISUSE)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
* bit 4: flag buffer, 5: datainfo, 6: header info */
#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
-#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
+/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
+#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
@@ -163,8 +221,8 @@
#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_INIT 1 /* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
+#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
+#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
@@ -179,10 +237,9 @@
#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
/* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
+
#define INFINIPATH_IBCS_TXREADY 0x40000000
#define INFINIPATH_IBCS_TXCREDITOK 0x80000000
/* link training states (shift by
@@ -200,30 +257,13 @@
#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
-/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */
+/* link state machine states (shift by ibcs_ls_shift) */
#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
#define INFINIPATH_IBCS_L_STATE_INIT 0x1
#define INFINIPATH_IBCS_L_STATE_ARM 0x2
#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
-/* combination link status states that we use with some frequency */
-#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
- (INFINIPATH_IBCS_LINKSTATE_MASK \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
- (INFINIPATH_IBCS_LT_STATE_LINKUP \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
- (INFINIPATH_IBCS_LT_STATE_LINKUP \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
- (INFINIPATH_IBCS_LT_STATE_LINKUP \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
/* kr_extstatus bits */
#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
@@ -249,20 +289,6 @@
#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
-/* kr_mdio bits */
-#define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL
-#define INFINIPATH_MDIO_CLKDIV_SHIFT 32
-#define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL
-#define INFINIPATH_MDIO_COMMAND_SHIFT 26
-#define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL
-#define INFINIPATH_MDIO_DEVADDR_SHIFT 21
-#define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL
-#define INFINIPATH_MDIO_REGADDR_SHIFT 16
-#define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL
-#define INFINIPATH_MDIO_DATA_SHIFT 0
-#define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL
-#define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL
-
/* kr_partitionkey bits */
#define INFINIPATH_PKEY_SIZE 16
#define INFINIPATH_PKEY_MASK 0xFFFF
@@ -278,20 +304,10 @@
/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET 0x7ULL
-#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
-#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
#define INFINIPATH_XGXS_RX_POL_SHIFT 19
#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
/*
* IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
@@ -302,6 +318,17 @@
typedef u64 ipath_err_t;
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
@@ -310,13 +337,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
- * register bits for selecting i2c direction and values, used for I2C serial
- * flash
- */
-extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-extern u64 ipath_gpio_sda, ipath_gpio_scl;
-
-/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
* registers are calculated from one of the base registers. The use of
@@ -414,6 +434,29 @@ struct ipath_kregs {
ipath_kreg kr_pcieq1serdesconfig0;
ipath_kreg kr_pcieq1serdesconfig1;
ipath_kreg kr_pcieq1serdesstatus;
+ ipath_kreg kr_hrtbt_guid;
+ ipath_kreg kr_ibcddrctrl;
+ ipath_kreg kr_ibcddrstatus;
+ ipath_kreg kr_jintreload;
+
+ /* send dma related regs */
+ ipath_kreg kr_senddmabase;
+ ipath_kreg kr_senddmalengen;
+ ipath_kreg kr_senddmatail;
+ ipath_kreg kr_senddmahead;
+ ipath_kreg kr_senddmaheadaddr;
+ ipath_kreg kr_senddmabufmask0;
+ ipath_kreg kr_senddmabufmask1;
+ ipath_kreg kr_senddmabufmask2;
+ ipath_kreg kr_senddmastatus;
+
+ /* SerDes related regs (IBA7220-only) */
+ ipath_kreg kr_ibserdesctrl;
+ ipath_kreg kr_ib_epbacc;
+ ipath_kreg kr_ib_epbtrans;
+ ipath_kreg kr_pcie_epbacc;
+ ipath_kreg kr_pcie_epbtrans;
+ ipath_kreg kr_ib_ddsrxeq;
};
struct ipath_cregs {
@@ -450,6 +493,20 @@ struct ipath_cregs {
ipath_creg cr_unsupvlcnt;
ipath_creg cr_wordrcvcnt;
ipath_creg cr_wordsendcnt;
+ ipath_creg cr_vl15droppedpktcnt;
+ ipath_creg cr_rxotherlocalphyerrcnt;
+ ipath_creg cr_excessbufferovflcnt;
+ ipath_creg cr_locallinkintegrityerrcnt;
+ ipath_creg cr_rxvlerrcnt;
+ ipath_creg cr_rxdlidfltrcnt;
+ ipath_creg cr_psstat;
+ ipath_creg cr_psstart;
+ ipath_creg cr_psinterval;
+ ipath_creg cr_psrcvdatacount;
+ ipath_creg cr_psrcvpktscount;
+ ipath_creg cr_psxmitdatacount;
+ ipath_creg cr_psxmitpktscount;
+ ipath_creg cr_psxmitwaitcount;
};
#endif /* _IPATH_REGISTERS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 5c1da2d25e0..1f95bbaf760 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,6 +31,9 @@
* SOFTWARE.
*/
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
#include "ipath_verbs.h"
#include "ipath_kernel.h"
@@ -76,6 +79,7 @@ const u32 ib_ipath_rnr_table[32] = {
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
+ * Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
@@ -83,9 +87,9 @@ const u32 ib_ipath_rnr_table[32] = {
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- unsigned long flags;
- spin_lock_irqsave(&dev->pending_lock, flags);
+ /* We already did a spin_lock_irqsave(), so just use spin_lock */
+ spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
@@ -96,57 +100,53 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
qp->s_rnr_timeout -= nqp->s_rnr_timeout;
l = l->next;
- if (l->next == &dev->rnrwait)
+ if (l->next == &dev->rnrwait) {
+ nqp = NULL;
break;
+ }
nqp = list_entry(l->next, struct ipath_qp,
timerwait);
}
+ if (nqp)
+ nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l);
}
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock(&dev->pending_lock);
}
-static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+/**
+ * ipath_init_sge - Validate a RWQE and fill in the SGE state
+ * @qp: the QP
+ *
+ * Return 1 if OK.
+ */
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+ u32 *lengthp, struct ipath_sge_state *ss)
{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- int user = to_ipd(qp->ibqp.pd)->user;
int i, j, ret;
struct ib_wc wc;
- qp->r_len = 0;
+ *lengthp = 0;
for (i = j = 0; i < wqe->num_sge; i++) {
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
- if ((user && wqe->sg_list[i].lkey == 0) ||
- !ipath_lkey_ok(&dev->lk_table,
- &qp->r_sg_list[j], &wqe->sg_list[i],
- IB_ACCESS_LOCAL_WRITE))
+ if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
- qp->r_len += wqe->sg_list[i].length;
+ *lengthp += wqe->sg_list[i].length;
j++;
}
- qp->r_sge.sge = qp->r_sg_list[0];
- qp->r_sge.sg_list = qp->r_sg_list + 1;
- qp->r_sge.num_sge = j;
+ ss->num_sge = j;
ret = 1;
goto bail;
bad_lkey:
+ memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr_id;
wc.status = IB_WC_LOC_PROT_ERR;
wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
- wc.qp_num = qp->ibqp.qp_num;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
+ wc.qp = &qp->ibqp;
/* Signal solicited completion event. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
@@ -157,7 +157,7 @@ bail:
/**
* ipath_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
- * @wr_id_only: update wr_id only, not SGEs
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
*
* Return 0 if no RWQE is available, otherwise return 1.
*
@@ -185,6 +185,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
@@ -192,18 +197,23 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
tail = 0;
do {
if (unlikely(tail == wq->head)) {
- spin_unlock_irqrestore(&rq->lock, flags);
ret = 0;
- goto bail;
+ goto unlock;
}
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
wqe = get_rwqe_ptr(rq, tail);
if (++tail >= rq->size)
tail = 0;
- } while (!wr_id_only && !init_sge(qp, wqe));
+ if (wr_id_only)
+ break;
+ qp->r_sge.sg_list = qp->r_sg_list;
+ } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
qp->r_wr_id = wqe->wr_id;
wq->tail = tail;
ret = 1;
+ set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
@@ -230,17 +240,17 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
goto bail;
}
}
+unlock:
spin_unlock_irqrestore(&rq->lock, flags);
-
bail:
return ret;
}
/**
* ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the loopback QP
+ * @sqp: the sending QP
*
- * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
+ * This is called from ipath_do_send() to
* forward a WQE addressed to the same HCA.
* Note that although we are single threaded due to the tasklet, we still
* have to protect against post_send(). We don't have to worry about
@@ -256,37 +266,64 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
unsigned long flags;
struct ib_wc wc;
u64 sdata;
+ atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
- if (!qp) {
- dev->n_pkt_drops++;
- return;
- }
-again:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
- }
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
- /* Get the next send request. */
- if (sqp->s_last == sqp->s_head) {
- /* Send work queue is empty. */
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
+ sqp->s_flags |= IPATH_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
*/
- wqe = get_swqe_ptr(sqp, sqp->s_last);
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
spin_unlock_irqrestore(&sqp->s_lock, flags);
- wc.wc_flags = 0;
- wc.imm_data = 0;
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof wc);
+ send_status = IB_WC_SUCCESS;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
@@ -295,69 +332,41 @@ again:
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = wqe->wr.imm_data;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
- if (!ipath_get_rwqe(qp, 0)) {
- rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- if (sqp->s_rnr_retry == 0) {
- wc.status = IB_WC_RNR_RETRY_EXC_ERR;
- goto err;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- dev->n_rnr_naks++;
- sqp->s_rnr_timeout =
- ib_ipath_rnr_table[sqp->r_min_rnr_timer];
- ipath_insert_rnr_queue(sqp);
- goto done;
- }
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = wqe->wr.imm_data;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
if (wqe->length == 0)
break;
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
- IB_ACCESS_REMOTE_WRITE))) {
- acc_err:
- wc.status = IB_WC_REM_ACCESS_ERR;
- err:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.qp_num = sqp->ibqp.qp_num;
- wc.src_qp = sqp->remote_qpn;
- wc.pkey_index = 0;
- wc.slid = sqp->remote_ah_attr.dlid;
- wc.sl = sqp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_sqerror_qp(sqp, &wc);
- goto done;
- }
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
break;
case IB_WR_RDMA_READ:
- if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
+ if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -365,26 +374,26 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
+ wqe->wr.wr.atomic.remote_addr,
+ wqe->wr.wr.atomic.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
- sdata = wqe->wr.wr.atomic.swap;
- spin_lock_irqsave(&dev->pending_lock, flags);
- qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
- if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- *(u64 *) qp->r_sge.sge.vaddr =
- qp->r_atomic_data + sdata;
- else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
- *(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock_irqrestore(&dev->pending_lock, flags);
- *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = wqe->wr.wr.atomic.compare_add;
+ *(u64 *) sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ sdata, wqe->wr.wr.atomic.swap);
goto send_comp;
default:
- goto done;
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
}
sge = &sqp->s_sge.sge;
@@ -393,6 +402,8 @@ again:
if (len > sge->length)
len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
BUG_ON(len == 0);
ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
sge->vaddr += len;
@@ -415,8 +426,7 @@ again:
sqp->s_len -= len;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
- wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -425,58 +435,98 @@ again:
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- /* XXX do we know which pkey matched? Only needed for GSI. */
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
+ wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ ipath_send_complete(sqp, wqe, send_status);
+ goto again;
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = wqe->length;
- wc.qp_num = sqp->ibqp.qp_num;
- wc.src_qp = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0);
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
}
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
+ goto clr_busy;
+ sqp->s_flags |= IPATH_S_WAITING;
+ dev->n_rnr_naks++;
+ sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
+ ipath_insert_rnr_queue(sqp);
+ goto clr_busy;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ ipath_rc_error(qp, wc.status);
- /* Update s_last now that we are finished with the SWQE */
+serr:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (++sqp->s_last >= sqp->s_size)
- sqp->s_last = 0;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto again;
+ ipath_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~IPATH_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
- if (atomic_dec_and_test(&qp->refcount))
+ if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
-static int want_buffer(struct ipath_devdata *dd)
+static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
- set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
-
- return 0;
+ if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
+ qp->ibqp.qp_type == IB_QPT_SMI) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
}
/**
@@ -485,115 +535,35 @@ static int want_buffer(struct ipath_devdata *dd)
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
+ * If we are now in the error state, return zero to flush the
+ * send work request.
*/
-void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
+static int ipath_no_bufs_available(struct ipath_qp *qp,
+ struct ipath_ibdev *dev)
{
unsigned long flags;
+ int ret = 1;
- spin_lock_irqsave(&dev->pending_lock, flags);
- if (list_empty(&qp->piowait))
- list_add_tail(&qp->piowait, &dev->piowait);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
- * could be called. If we are still in the tasklet function,
- * tasklet_hi_schedule() will not call us until the next time
- * tasklet_hi_schedule() is called.
- * We clear the tasklet flag now since we are committing to return
- * from the tasklet function.
+ * could be called. Therefore, put QP on the piowait list before
+ * enabling the PIO avail interrupt.
*/
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
- tasklet_unlock(&qp->s_task);
- want_buffer(dev->dd);
- dev->n_piowait++;
-}
-
-/**
- * ipath_post_ruc_send - post RC and UC sends
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
- struct ipath_swqe *wqe;
- unsigned long flags;
- u32 next;
- int i, j;
- int acc;
- int ret;
-
- /*
- * Don't allow RDMA reads or atomic operations on UC or
- * undefined operations.
- * Make sure buffer is large enough to hold the result for atomics.
- */
- if (qp->ibqp.qp_type == IB_QPT_UC) {
- if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
- ret = -EINVAL;
- goto bail;
- }
- } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
- ret = -EINVAL;
- goto bail;
- } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
- (wr->num_sge == 0 ||
- wr->sg_list[0].length < sizeof(u64) ||
- wr->sg_list[0].addr & (sizeof(u64) - 1))) {
- ret = -EINVAL;
- goto bail;
- }
- /* IB spec says that num_sge == 0 is OK. */
- if (wr->num_sge > qp->s_max_sge) {
- ret = -ENOMEM;
- goto bail;
- }
spin_lock_irqsave(&qp->s_lock, flags);
- next = qp->s_head + 1;
- if (next >= qp->s_size)
- next = 0;
- if (next == qp->s_last) {
- spin_unlock_irqrestore(&qp->s_lock, flags);
- ret = -EINVAL;
- goto bail;
- }
-
- wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
- wqe->ssn = qp->s_ssn++;
- wqe->sg_list[0].mr = NULL;
- wqe->sg_list[0].vaddr = NULL;
- wqe->sg_list[0].length = 0;
- wqe->sg_list[0].sge_length = 0;
- wqe->length = 0;
- acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
- for (i = 0, j = 0; i < wr->num_sge; i++) {
- if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
- spin_unlock_irqrestore(&qp->s_lock, flags);
- ret = -EINVAL;
- goto bail;
- }
- if (wr->sg_list[i].length == 0)
- continue;
- if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
- &wqe->sg_list[j], &wr->sg_list[i],
- acc)) {
- spin_unlock_irqrestore(&qp->s_lock, flags);
- ret = -EINVAL;
- goto bail;
- }
- wqe->length += wr->sg_list[i].length;
- j++;
- }
- wqe->wr.num_sge = j;
- qp->s_head = next;
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+ dev->n_piowait++;
+ qp->s_flags |= IPATH_S_WAITING;
+ qp->s_flags &= ~IPATH_S_BUSY;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->piowait))
+ list_add_tail(&qp->piowait, &dev->piowait);
+ spin_unlock(&dev->pending_lock);
+ } else
+ ret = 0;
spin_unlock_irqrestore(&qp->s_lock, flags);
-
- ipath_do_ruc_send((unsigned long) qp);
-
- ret = 0;
-
-bail:
+ if (ret)
+ want_buffer(dev->dd, qp);
return ret;
}
@@ -627,39 +597,78 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 bth0, u32 bth2)
+{
+ u16 lrh0;
+ u32 nwords;
+ u32 extra_bytes;
+
+ /* Construct the header. */
+ extra_bytes = -qp->s_cur_size & 3;
+ nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ lrh0 = IPATH_LRH_BTH;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = IPATH_LRH_GRH;
+ }
+ lrh0 |= qp->remote_ah_attr.sl << 4;
+ qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
+ qp->remote_ah_attr.src_path_bits);
+ bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 |= extra_bytes << 20;
+ ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(bth2);
+}
+
/**
- * ipath_do_ruc_send - perform a send on an RC or UC QP
+ * ipath_do_send - perform a send on a QP
* @data: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, after we drop the QP s_lock, two threads could send
- * packets out of order.
+ * Otherwise, two threads could send packets out of order.
*/
-void ipath_do_ruc_send(unsigned long data)
+void ipath_do_send(unsigned long data)
{
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ int (*make_req)(struct ipath_qp *qp);
unsigned long flags;
- u16 lrh0;
- u32 nwords;
- u32 extra_bytes;
- u32 bth0;
- u32 bth2;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
- struct ipath_other_headers *ohdr;
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+ if ((qp->ibqp.qp_type == IB_QPT_RC ||
+ qp->ibqp.qp_type == IB_QPT_UC) &&
+ qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
+ ipath_ruc_loopback(qp);
goto bail;
+ }
- if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
- ipath_ruc_loopback(qp);
- goto clear;
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ make_req = ipath_make_rc_req;
+ else if (qp->ibqp.qp_type == IB_QPT_UC)
+ make_req = ipath_make_uc_req;
+ else
+ make_req = ipath_make_ud_req;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
}
- ohdr = &qp->s_hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr.u.l.oth;
+ qp->s_flags |= IPATH_S_BUSY;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
again:
/* Check for a constructed packet to be sent. */
@@ -668,69 +677,58 @@ again:
* If no PIO bufs are available, return. An interrupt will
* call ipath_ib_piobufavail() when one is available.
*/
- if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
- (u32 *) &qp->s_hdr, qp->s_cur_size,
- qp->s_cur_sge)) {
- ipath_no_bufs_available(qp, dev);
- goto bail;
+ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+ qp->s_cur_sge, qp->s_cur_size)) {
+ if (ipath_no_bufs_available(qp, dev))
+ goto bail;
}
dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */
qp->s_hdrwords = 0;
}
- /*
- * The lock is needed to synchronize between setting
- * qp->s_ack_state, resend timer, and post_send().
- */
- spin_lock_irqsave(&qp->s_lock, flags);
+ if (make_req(qp))
+ goto again;
- /* Sending responses has higher priority over sending requests. */
- if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
- (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
- bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
- else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
- ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
- ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
- /*
- * Clear the busy bit before unlocking to avoid races with
- * adding new work queue items and then failing to process
- * them.
- */
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- goto bail;
- }
+bail:;
+}
- spin_unlock_irqrestore(&qp->s_lock, flags);
+/*
+ * This should be called with s_lock held.
+ */
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+ enum ib_wc_status status)
+{
+ u32 old_last, last;
- /* Construct the header. */
- extra_bytes = (4 - qp->s_cur_size) & 3;
- nwords = (qp->s_cur_size + extra_bytes) >> 2;
- lrh0 = IPATH_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
- lrh0 = IPATH_LRH_GRH;
- }
- lrh0 |= qp->remote_ah_attr.sl << 4;
- qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
- SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
- bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
- bth0 |= extra_bytes << 20;
- ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[2] = cpu_to_be32(bth2);
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ return;
- /* Check for more work to do. */
- goto again;
+ /* See ch. 11.2.4.1 and 10.7.3.1 */
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS) {
+ struct ib_wc wc;
-clear:
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
-bail:
- return;
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = status;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.qp = &qp->ibqp;
+ if (status == IB_WC_SUCCESS)
+ wc.byte_len = wqe->length;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
+ }
+
+ old_last = last = qp->s_last;
+ if (++last >= qp->s_size)
+ last = 0;
+ qp->s_last = last;
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
new file mode 100644
index 00000000000..17a517766ad
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -0,0 +1,818 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/gfp.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
+
+static void vl15_watchdog_enq(struct ipath_devdata *dd)
+{
+ /* ipath_sdma_lock must already be held */
+ if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
+ unsigned long interval = (HZ + 19) / 20;
+ dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
+ add_timer(&dd->ipath_sdma_vl15_timer);
+ }
+}
+
+static void vl15_watchdog_deq(struct ipath_devdata *dd)
+{
+ /* ipath_sdma_lock must already be held */
+ if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
+ unsigned long interval = (HZ + 19) / 20;
+ mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
+ } else {
+ del_timer(&dd->ipath_sdma_vl15_timer);
+ }
+}
+
+static void vl15_watchdog_timeout(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+ if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
+ ipath_dbg("vl15 watchdog timeout - clearing\n");
+ ipath_cancel_sends(dd, 1);
+ ipath_hol_down(dd);
+ } else {
+ ipath_dbg("vl15 watchdog timeout - "
+ "condition already cleared\n");
+ }
+}
+
+static void unmap_desc(struct ipath_devdata *dd, unsigned head)
+{
+ __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
+ u64 desc[2];
+ dma_addr_t addr;
+ size_t len;
+
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+
+ addr = (desc[1] << 32) | (desc[0] >> 32);
+ len = (desc[0] >> 14) & (0x7ffULL << 2);
+ dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
+}
+
+/*
+ * ipath_sdma_lock should be locked before calling this.
+ */
+int ipath_sdma_make_progress(struct ipath_devdata *dd)
+{
+ struct list_head *lp = NULL;
+ struct ipath_sdma_txreq *txp = NULL;
+ u16 dmahead;
+ u16 start_idx = 0;
+ int progress = 0;
+
+ if (!list_empty(&dd->ipath_sdma_activelist)) {
+ lp = dd->ipath_sdma_activelist.next;
+ txp = list_entry(lp, struct ipath_sdma_txreq, list);
+ start_idx = txp->start_idx;
+ }
+
+ /*
+ * Read the SDMA head register in order to know that the
+ * interrupt clear has been written to the chip.
+ * Otherwise, we may not get an interrupt for the last
+ * descriptor in the queue.
+ */
+ dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
+ /* sanity check return value for error handling (chip reset, etc.) */
+ if (dmahead >= dd->ipath_sdma_descq_cnt)
+ goto done;
+
+ while (dd->ipath_sdma_descq_head != dmahead) {
+ if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
+ dd->ipath_sdma_descq_head == start_idx) {
+ unmap_desc(dd, dd->ipath_sdma_descq_head);
+ start_idx++;
+ if (start_idx == dd->ipath_sdma_descq_cnt)
+ start_idx = 0;
+ }
+
+ /* increment free count and head */
+ dd->ipath_sdma_descq_removed++;
+ if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
+ dd->ipath_sdma_descq_head = 0;
+
+ if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
+ /* move to notify list */
+ if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_deq(dd);
+ list_move_tail(lp, &dd->ipath_sdma_notifylist);
+ if (!list_empty(&dd->ipath_sdma_activelist)) {
+ lp = dd->ipath_sdma_activelist.next;
+ txp = list_entry(lp, struct ipath_sdma_txreq,
+ list);
+ start_idx = txp->start_idx;
+ } else {
+ lp = NULL;
+ txp = NULL;
+ }
+ }
+ progress = 1;
+ }
+
+ if (progress)
+ tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
+
+done:
+ return progress;
+}
+
+static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
+{
+ struct ipath_sdma_txreq *txp, *txp_next;
+
+ list_for_each_entry_safe(txp, txp_next, list, list) {
+ list_del_init(&txp->list);
+
+ if (txp->callback)
+ (*txp->callback)(txp->callback_cookie,
+ txp->callback_status);
+ }
+}
+
+static void sdma_notify_taskbody(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ struct list_head list;
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ list_splice_init(&dd->ipath_sdma_notifylist, &list);
+
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ ipath_sdma_notify(dd, &list);
+
+ /*
+ * The IB verbs layer needs to see the callback before getting
+ * the call to ipath_ib_piobufavail() because the callback
+ * handles releasing resources the next send will need.
+ * Otherwise, we could do these calls in
+ * ipath_sdma_make_progress().
+ */
+ ipath_ib_piobufavail(dd->verbs_dev);
+}
+
+static void sdma_notify_task(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ sdma_notify_taskbody(dd);
+}
+
+static void dump_sdma_state(struct ipath_devdata *dd)
+{
+ unsigned long reg;
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
+ ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
+ ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
+ ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
+ ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
+ ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+ ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+ ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
+}
+
+static void sdma_abort_task(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+ u64 status;
+ unsigned long flags;
+
+ if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ return;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
+
+ /* nothing to do */
+ if (status == IPATH_SDMA_ABORT_NONE)
+ goto unlock;
+
+ /* ipath_sdma_abort() is done, waiting for interrupt */
+ if (status == IPATH_SDMA_ABORT_DISARMED) {
+ if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
+ goto resched_noprint;
+ /* give up, intr got lost somewhere */
+ ipath_dbg("give up waiting for SDMADISABLED intr\n");
+ __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ status = IPATH_SDMA_ABORT_ABORTED;
+ }
+
+ /* everything is stopped, time to clean up and restart */
+ if (status == IPATH_SDMA_ABORT_ABORTED) {
+ struct ipath_sdma_txreq *txp, *txpnext;
+ u64 hwstatus;
+ int notify = 0;
+
+ hwstatus = ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_senddmastatus);
+
+ if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
+ IPATH_SDMA_STATUS_ABORT_IN_PROG |
+ IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
+ !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
+ if (dd->ipath_sdma_reset_wait > 0) {
+ /* not done shutting down sdma */
+ --dd->ipath_sdma_reset_wait;
+ goto resched;
+ }
+ ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
+ "status after SDMA reset, continuing\n");
+ dump_sdma_state(dd);
+ }
+
+ /* dequeue all "sent" requests */
+ list_for_each_entry_safe(txp, txpnext,
+ &dd->ipath_sdma_activelist, list) {
+ txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
+ if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_deq(dd);
+ list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
+ notify = 1;
+ }
+ if (notify)
+ tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
+
+ /* reset our notion of head and tail */
+ dd->ipath_sdma_descq_tail = 0;
+ dd->ipath_sdma_descq_head = 0;
+ dd->ipath_sdma_head_dma[0] = 0;
+ dd->ipath_sdma_generation = 0;
+ dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
+
+ /* Reset SendDmaLenGen */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
+ (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
+
+ /* done with sdma state for a bit */
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ /*
+ * Don't restart sdma here (with the exception
+ * below). Wait until link is up to ACTIVE. VL15 MADs
+ * used to bring the link up use PIO, and multiple link
+ * transitions otherwise cause the sdma engine to be
+ * stopped and started multiple times.
+ * The disable is done here, including the shadow,
+ * so the state is kept consistent.
+ * See ipath_restart_sdma() for the actual starting
+ * of sdma.
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* make sure I see next message */
+ dd->ipath_sdma_abort_jiffies = 0;
+
+ /*
+ * Not everything that takes SDMA offline is a link
+ * status change. If the link was up, restart SDMA.
+ */
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ ipath_restart_sdma(dd);
+
+ goto done;
+ }
+
+resched:
+ /*
+ * for now, keep spinning
+ * JAG - this is bad to just have default be a loop without
+ * state change
+ */
+ if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
+ ipath_dbg("looping with status 0x%08lx\n",
+ dd->ipath_sdma_status);
+ dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
+ }
+resched_noprint:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ return;
+
+unlock:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+done:
+ return;
+}
+
+/*
+ * This is called from interrupt context.
+ */
+void ipath_sdma_intr(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ (void) ipath_sdma_make_progress(dd);
+
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+}
+
+static int alloc_sdma(struct ipath_devdata *dd)
+{
+ int ret = 0;
+
+ /* Allocate memory for SendDMA descriptor FIFO */
+ dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
+ SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
+
+ if (!dd->ipath_sdma_descq) {
+ ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
+ "FIFO memory\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ dd->ipath_sdma_descq_cnt =
+ SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
+
+ /* Allocate memory for DMA of head register to memory */
+ dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
+ PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
+ if (!dd->ipath_sdma_head_dma) {
+ ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
+ ret = -ENOMEM;
+ goto cleanup_descq;
+ }
+ dd->ipath_sdma_head_dma[0] = 0;
+
+ init_timer(&dd->ipath_sdma_vl15_timer);
+ dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout;
+ dd->ipath_sdma_vl15_timer.data = (unsigned long)dd;
+ atomic_set(&dd->ipath_sdma_vl15_count, 0);
+
+ goto done;
+
+cleanup_descq:
+ dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
+ (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
+ dd->ipath_sdma_descq = NULL;
+ dd->ipath_sdma_descq_phys = 0;
+done:
+ return ret;
+}
+
+int setup_sdma(struct ipath_devdata *dd)
+{
+ int ret = 0;
+ unsigned i, n;
+ u64 tmp64;
+ u64 senddmabufmask[3] = { 0 };
+ unsigned long flags;
+
+ ret = alloc_sdma(dd);
+ if (ret)
+ goto done;
+
+ if (!dd->ipath_sdma_descq) {
+ ipath_dev_err(dd, "SendDMA memory not allocated\n");
+ goto done;
+ }
+
+ /*
+ * Set initial status as if we had been up, then gone down.
+ * This lets initial start on transition to ACTIVE be the
+ * same as restart after link flap.
+ */
+ dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
+ dd->ipath_sdma_abort_jiffies = 0;
+ dd->ipath_sdma_generation = 0;
+ dd->ipath_sdma_descq_tail = 0;
+ dd->ipath_sdma_descq_head = 0;
+ dd->ipath_sdma_descq_removed = 0;
+ dd->ipath_sdma_descq_added = 0;
+
+ /* Set SendDmaBase */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
+ dd->ipath_sdma_descq_phys);
+ /* Set SendDmaLenGen */
+ tmp64 = dd->ipath_sdma_descq_cnt;
+ tmp64 |= 1<<18; /* enable generation checking */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
+ /* Set SendDmaTail */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
+ dd->ipath_sdma_descq_tail);
+ /* Set SendDmaHeadAddr */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
+ dd->ipath_sdma_head_phys);
+
+ /*
+ * Reserve all the former "kernel" piobufs, using high number range
+ * so we get as many 4K buffers as possible
+ */
+ n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
+ ipath_chg_pioavailkernel(dd, i, n - i , 0);
+ for (; i < n; ++i) {
+ unsigned word = i / 64;
+ unsigned bit = i & 63;
+ BUG_ON(word >= 3);
+ senddmabufmask[word] |= 1ULL << bit;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
+ senddmabufmask[0]);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
+ senddmabufmask[1]);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
+ senddmabufmask[2]);
+
+ INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
+ INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
+
+ tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
+ (unsigned long) dd);
+ tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
+ (unsigned long) dd);
+
+ /*
+ * No use to turn on SDMA here, as link is probably not ACTIVE
+ * Just mark it RUNNING and enable the interrupt, and let the
+ * ipath_restart_sdma() on link transition to ACTIVE actually
+ * enable it.
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+done:
+ return ret;
+}
+
+void teardown_sdma(struct ipath_devdata *dd)
+{
+ struct ipath_sdma_txreq *txp, *txpnext;
+ unsigned long flags;
+ dma_addr_t sdma_head_phys = 0;
+ dma_addr_t sdma_descq_phys = 0;
+ void *sdma_descq = NULL;
+ void *sdma_head_dma = NULL;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
+ __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ tasklet_kill(&dd->ipath_sdma_abort_task);
+ tasklet_kill(&dd->ipath_sdma_notify_task);
+
+ /* turn off sdma */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ /* dequeue all "sent" requests */
+ list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
+ list) {
+ txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
+ if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_deq(dd);
+ list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
+ }
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ sdma_notify_taskbody(dd);
+
+ del_timer_sync(&dd->ipath_sdma_vl15_timer);
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ dd->ipath_sdma_abort_jiffies = 0;
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
+
+ if (dd->ipath_sdma_head_dma) {
+ sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
+ sdma_head_phys = dd->ipath_sdma_head_phys;
+ dd->ipath_sdma_head_dma = NULL;
+ dd->ipath_sdma_head_phys = 0;
+ }
+
+ if (dd->ipath_sdma_descq) {
+ sdma_descq = dd->ipath_sdma_descq;
+ sdma_descq_phys = dd->ipath_sdma_descq_phys;
+ dd->ipath_sdma_descq = NULL;
+ dd->ipath_sdma_descq_phys = 0;
+ }
+
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ if (sdma_head_dma)
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ sdma_head_dma, sdma_head_phys);
+
+ if (sdma_descq)
+ dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
+ sdma_descq, sdma_descq_phys);
+}
+
+/*
+ * [Re]start SDMA, if we use it, and it's not already OK.
+ * This is called on transition to link ACTIVE, either the first or
+ * subsequent times.
+ */
+void ipath_restart_sdma(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ int needed = 1;
+
+ if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+ goto bail;
+
+ /*
+ * First, make sure we should, which is to say,
+ * check that we are "RUNNING" (not in teardown)
+ * and not "SHUTDOWN"
+ */
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
+ || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ needed = 0;
+ else {
+ __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+ __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ }
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!needed) {
+ ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
+ dd->ipath_sdma_status);
+ goto bail;
+ }
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ /*
+ * First clear, just to be safe. Enable is only done
+ * in chip on 0->1 transition
+ */
+ dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* notify upper layers */
+ ipath_ib_piobufavail(dd->verbs_dev);
+
+bail:
+ return;
+}
+
+static inline void make_sdma_desc(struct ipath_devdata *dd,
+ u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
+{
+ WARN_ON(addr & 3);
+ /* SDmaPhyAddr[47:32] */
+ sdmadesc[1] = addr >> 32;
+ /* SDmaPhyAddr[31:0] */
+ sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
+ /* SDmaGeneration[1:0] */
+ sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
+ /* SDmaDwordCount[10:0] */
+ sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
+ /* SDmaBufOffset[12:2] */
+ sdmadesc[0] |= dwoffset & 0x7ffULL;
+}
+
+/*
+ * This function queues one IB packet onto the send DMA queue per call.
+ * The caller is responsible for checking:
+ * 1) The number of send DMA descriptor entries is less than the size of
+ * the descriptor queue.
+ * 2) The IB SGE addresses and lengths are 32-bit aligned
+ * (except possibly the last SGE's length)
+ * 3) The SGE addresses are suitable for passing to dma_map_single().
+ */
+int ipath_sdma_verbs_send(struct ipath_devdata *dd,
+ struct ipath_sge_state *ss, u32 dwords,
+ struct ipath_verbs_txreq *tx)
+{
+
+ unsigned long flags;
+ struct ipath_sge *sge;
+ int ret = 0;
+ u16 tail;
+ __le64 *descqp;
+ u64 sdmadesc[2];
+ u32 dwoffset;
+ dma_addr_t addr;
+
+ if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
+ ipath_dbg("packet size %X > ibmax %X, fail\n",
+ tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
+ ret = -EMSGSIZE;
+ goto fail;
+ }
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+retry:
+ if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
+ if (ipath_sdma_make_progress(dd))
+ goto retry;
+ ret = -ENOBUFS;
+ goto unlock;
+ }
+
+ addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
+ tx->map_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, addr))
+ goto ioerr;
+
+ dwoffset = tx->map_len >> 2;
+ make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
+
+ /* SDmaFirstDesc */
+ sdmadesc[0] |= 1ULL << 12;
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */
+
+ /* write to the descq */
+ tail = dd->ipath_sdma_descq_tail;
+ descqp = &dd->ipath_sdma_descq[tail].qw[0];
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
+ tx->txreq.start_idx = tail;
+
+ /* increment the tail */
+ if (++tail == dd->ipath_sdma_descq_cnt) {
+ tail = 0;
+ descqp = &dd->ipath_sdma_descq[0].qw[0];
+ ++dd->ipath_sdma_generation;
+ }
+
+ sge = &ss->sge;
+ while (dwords) {
+ u32 dw;
+ u32 len;
+
+ len = dwords << 2;
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ dw = (len + 3) >> 2;
+ addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, addr))
+ goto unmap;
+ make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
+ /* SDmaUseLargeBuf has to be set in every descriptor */
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= 1ULL << 14;
+ /* write to the descq */
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ /* increment the tail */
+ if (++tail == dd->ipath_sdma_descq_cnt) {
+ tail = 0;
+ descqp = &dd->ipath_sdma_descq[0].qw[0];
+ ++dd->ipath_sdma_generation;
+ }
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+
+ dwoffset += dw;
+ dwords -= dw;
+ }
+
+ if (!tail)
+ descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
+ descqp -= 2;
+ /* SDmaLastDesc */
+ descqp[0] |= cpu_to_le64(1ULL << 11);
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
+ /* SDmaIntReq */
+ descqp[0] |= cpu_to_le64(1ULL << 15);
+ }
+
+ /* Commit writes to memory and advance the tail on the chip */
+ wmb();
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
+
+ tx->txreq.next_descq_idx = tail;
+ tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
+ dd->ipath_sdma_descq_tail = tail;
+ dd->ipath_sdma_descq_added += tx->txreq.sg_count;
+ list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_enq(dd);
+ goto unlock;
+
+unmap:
+ while (tail != dd->ipath_sdma_descq_tail) {
+ if (!tail)
+ tail = dd->ipath_sdma_descq_cnt - 1;
+ else
+ tail--;
+ unmap_desc(dd, tail);
+ }
+ioerr:
+ ret = -EIO;
+unlock:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+fail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 941e866d951..26271984b71 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,6 +32,7 @@
*/
#include <linux/err.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
@@ -59,7 +60,7 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
if ((unsigned) wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
- ret = -ENOMEM;
+ ret = -EINVAL;
goto bail;
}
@@ -80,6 +81,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
@@ -92,8 +95,8 @@ bail:
/**
* ipath_create_srq - create a shared receive queue
* @ibpd: the protection domain of the SRQ to create
- * @attr: the attributes of the SRQ
- * @udata: not used by the InfiniPath verbs driver
+ * @srq_init_attr: the attributes of the SRQ
+ * @udata: data from libipathverbs when creating a user SRQ
*/
struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
@@ -104,8 +107,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
u32 sz;
struct ib_srq *ret;
- if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
- ret = ERR_PTR(-ENOMEM);
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+ ret = ERR_PTR(-ENOSYS);
goto done;
}
@@ -144,33 +147,24 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- struct ipath_mmap_info *ip;
- __u64 offset = (__u64) srq->rq.wq;
int err;
+ u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
- err = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
+ srq->ip =
+ ipath_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ srq->rq.wq);
+ if (!srq->ip) {
+ ret = ERR_PTR(-ENOMEM);
goto bail_wq;
}
- /* Allocate info for ipath_mmap(). */
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_wq;
+ err = ib_copy_to_udata(udata, &srq->ip->offset,
+ sizeof(srq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
}
- srq->ip = ip;
- ip->context = ibpd->uobject->context;
- ip->obj = srq->rq.wq;
- kref_init(&ip->ref);
- ip->mmap_cnt = 0;
- ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
- srq->rq.size * sz);
- spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
- spin_unlock_irq(&dev->pending_lock);
} else
srq->ip = NULL;
@@ -180,20 +174,33 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
spin_lock_init(&srq->rq.lock);
srq->rq.wq->head = 0;
srq->rq.wq->tail = 0;
- srq->rq.max_sge = srq_init_attr->attr.max_sge;
srq->limit = srq_init_attr->attr.srq_limit;
- dev->n_srqs_allocated++;
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
+
+ if (srq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
ret = &srq->ibsrq;
goto done;
+bail_ip:
+ kfree(srq->ip);
bail_wq:
vfree(srq->rq.wq);
-
bail_srq:
kfree(srq);
-
done:
return ret;
}
@@ -210,11 +217,11 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
struct ib_udata *udata)
{
struct ipath_srq *srq = to_isrq(ibsrq);
+ struct ipath_rwq *wq;
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
struct ipath_rwq *owq;
- struct ipath_rwq *wq;
struct ipath_rwqe *p;
u32 sz, size, n, head, tail;
@@ -235,27 +242,21 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
goto bail;
}
- /*
- * Return the address of the RWQ as the offset to mmap.
- * See ipath_mmap() for details.
- */
+ /* Check that we can write the offset to mmap. */
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 offset_addr;
- __u64 offset = (__u64) wq;
+ __u64 offset = 0;
ret = ib_copy_from_udata(&offset_addr, udata,
sizeof(offset_addr));
- if (ret) {
- vfree(wq);
- goto bail;
- }
- udata->outbuf = (void __user *) offset_addr;
+ if (ret)
+ goto bail_free;
+ udata->outbuf =
+ (void __user *) (unsigned long) offset_addr;
ret = ib_copy_to_udata(udata, &offset,
sizeof(offset));
- if (ret) {
- vfree(wq);
- goto bail;
- }
+ if (ret)
+ goto bail_free;
}
spin_lock_irq(&srq->rq.lock);
@@ -276,10 +277,8 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
else
n -= tail;
if (size <= n) {
- spin_unlock_irq(&srq->rq.lock);
- vfree(wq);
ret = -EINVAL;
- goto bail;
+ goto bail_unlock;
}
n = 0;
p = wq->wq;
@@ -310,13 +309,25 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (srq->ip) {
struct ipath_mmap_info *ip = srq->ip;
struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+ u32 s = sizeof(struct ipath_rwq) + size * sz;
+
+ ipath_update_mmap_info(dev, ip, s, wq);
+
+ /*
+ * Return the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
- ip->obj = wq;
- ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
- size * sz);
spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps,
+ &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
@@ -327,7 +338,12 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
}
+ goto bail;
+bail_unlock:
+ spin_unlock_irq(&srq->rq.lock);
+bail_free:
+ vfree(wq);
bail:
return ret;
}
@@ -351,8 +367,13 @@ int ipath_destroy_srq(struct ib_srq *ibsrq)
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
+ spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
- vfree(srq->rq.wq);
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
kfree(srq);
return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fc..f63e143e329 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,8 +31,6 @@
* SOFTWARE.
*/
-#include <linux/pci.h>
-
#include "ipath_kernel.h"
struct infinipath_stats ipath_stats;
@@ -114,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
dd->ipath_lastrpkts = val;
}
val64 = dd->ipath_rpkts;
+ } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
+ if (dd->ibdeltainprog)
+ val64 -= val64 - dd->ibsymsnap;
+ val64 -= dd->ibsymdelta;
+ } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
+ if (dd->ibdeltainprog)
+ val64 -= val64 - dd->iblnkerrsnap;
+ val64 -= dd->iblnkerrdelta;
} else
val64 = (u64) val;
@@ -135,15 +141,17 @@ bail:
static void ipath_qcheck(struct ipath_devdata *dd)
{
static u64 last_tot_hdrqfull;
+ struct ipath_portdata *pd = dd->ipath_pd[0];
size_t blen = 0;
char buf[128];
+ u32 hdrqtail;
*buf = 0;
- if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) {
+ if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
- dd->ipath_pd[0]->port_hdrqfull -
+ pd->port_hdrqfull -
dd->ipath_p0_hdrqfull);
- dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull;
+ dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
}
if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
blen += snprintf(buf + blen, sizeof buf - blen,
@@ -175,22 +183,62 @@ static void ipath_qcheck(struct ipath_devdata *dd)
if (blen)
ipath_dbg("%s\n", buf);
- if (dd->ipath_port0head != (u32)
- le64_to_cpu(*dd->ipath_hdrqtailptr)) {
+ hdrqtail = ipath_get_hdrqtail(pd);
+ if (pd->port_head != hdrqtail) {
if (dd->ipath_lastport0rcv_cnt ==
ipath_stats.sps_port0pkts) {
ipath_cdbg(PKT, "missing rcv interrupts? "
- "port0 hd=%llx tl=%x; port0pkts %llx\n",
- (unsigned long long)
- le64_to_cpu(*dd->ipath_hdrqtailptr),
- dd->ipath_port0head,
+ "port0 hd=%x tl=%x; port0pkts %llx; write"
+ " hd (w/intr)\n",
+ pd->port_head, hdrqtail,
(unsigned long long)
ipath_stats.sps_port0pkts);
+ ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
+ dd->ipath_rhdrhead_intr_off, pd->port_port);
}
dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
}
}
+static void ipath_chk_errormask(struct ipath_devdata *dd)
+{
+ static u32 fixed;
+ u32 ctrl;
+ unsigned long errormask;
+ unsigned long hwerrs;
+
+ if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+
+ if (errormask == dd->ipath_errormask)
+ return;
+ fixed++;
+
+ hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+ ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+
+ if ((hwerrs & dd->ipath_hwerrmask) ||
+ (ctrl & INFINIPATH_C_FREEZEMODE)) {
+ /* force re-interrupt of pending events, just in case */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+ dev_info(&dd->pcidev->dev,
+ "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
+ fixed, errormask, (unsigned long)dd->ipath_errormask,
+ ctrl, hwerrs);
+ } else
+ ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
+ fixed, errormask,
+ (unsigned long)dd->ipath_errormask);
+}
+
+
/**
* ipath_get_faststats - get word counters from chip before they overflow
* @opaque - contains a pointer to the infinipath device ipath_devdata
@@ -200,21 +248,35 @@ static void ipath_qcheck(struct ipath_devdata *dd)
void ipath_get_faststats(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
- u32 val;
+ int i;
static unsigned cnt;
+ unsigned long flags;
+ u64 traffic_wds;
/*
* don't access the chip while running diags, or memory diags can
* fail
*/
- if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
ipath_diag_inuse)
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
- if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ /*
+ * We now try to maintain a "active timer", based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ traffic_wds -= dd->ipath_traffic_wds;
+ dd->ipath_traffic_wds += traffic_wds;
+ if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(5, &dd->ipath_active_time); /* S/B #define */
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+
+ if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
}
@@ -234,14 +296,15 @@ void ipath_get_faststats(unsigned long opaque)
dd->ipath_lasterror = 0;
if (dd->ipath_lasthwerror)
dd->ipath_lasthwerror = 0;
- if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
+ if (dd->ipath_maskederrs
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
- ipath_decode_err(ebuf, sizeof ebuf,
- (dd->ipath_maskederrs & ~dd->
- ipath_ignorederrs));
- if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+ int iserr;
+ iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
+ dd->ipath_maskederrs);
+ if (dd->ipath_maskederrs &
+ ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Re-enabling masked errors "
"(%s)\n", ebuf);
else {
@@ -252,25 +315,33 @@ void ipath_get_faststats(unsigned long opaque)
* them. So only complain about these at debug
* level.
*/
- ipath_dbg("Disabling frequent queue full errors "
- "(%s)\n", ebuf);
+ if (iserr)
+ ipath_dbg(
+ "Re-enabling queue full errors (%s)\n",
+ ebuf);
+ else
+ ipath_cdbg(ERRPKT, "Re-enabling packet"
+ " problem interrupt (%s)\n", ebuf);
}
- dd->ipath_maskederrs = dd->ipath_ignorederrs;
+
+ /* re-enable masked errors */
+ dd->ipath_errormask |= dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
- ~dd->ipath_maskederrs);
+ dd->ipath_errormask);
+ dd->ipath_maskederrs = 0;
}
/* limit qfull messages to ~one per minute per port */
if ((++cnt & 0x10)) {
- for (val = dd->ipath_cfgports - 1; ((int)val) >= 0;
- val--) {
- if (dd->ipath_lastegrheads[val] != -1)
- dd->ipath_lastegrheads[val] = -1;
- if (dd->ipath_lastrcvhdrqtails[val] != -1)
- dd->ipath_lastrcvhdrqtails[val] = -1;
+ for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ if (pd && pd->port_lastrcvhdrqtail != -1)
+ pd->port_lastrcvhdrqtail = -1;
}
}
+ ipath_chk_errormask(dd);
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index e299148c4b6..75558f33f1c 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,9 +32,10 @@
*/
#include <linux/ctype.h>
-#include <linux/pci.h>
+#include <linux/stat.h>
#include "ipath_kernel.h"
+#include "ipath_verbs.h"
#include "ipath_common.h"
/**
@@ -164,6 +165,51 @@ static ssize_t show_boardversion(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
}
+static ssize_t show_localbus_info(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
+}
+
+static ssize_t show_lmc(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
+}
+
+static ssize_t store_lmc(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 lmc = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &lmc);
+ if (ret < 0)
+ goto invalid;
+
+ if (lmc > 7) {
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ipath_set_lid(dd, dd->ipath_lid, lmc);
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
+bail:
+ return ret;
+}
+
static ssize_t show_lid(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -191,7 +237,7 @@ static ssize_t store_lid(struct device *dev,
goto invalid;
}
- ipath_set_lid(dd, lid, 0);
+ ipath_set_lid(dd, lid, dd->ipath_lmc);
goto bail;
invalid:
@@ -215,7 +261,6 @@ static ssize_t store_mlid(struct device *dev,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
- int unit;
u16 mlid;
int ret;
@@ -223,8 +268,6 @@ static ssize_t store_mlid(struct device *dev,
if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
goto invalid;
- unit = dd->ipath_unit;
-
dd->ipath_mlid = mlid;
goto bail;
@@ -257,7 +300,7 @@ static ssize_t store_guid(struct device *dev,
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
unsigned short guid[8];
- __be64 nguid;
+ __be64 new_guid;
u8 *ng;
int i;
@@ -266,7 +309,7 @@ static ssize_t store_guid(struct device *dev,
&guid[4], &guid[5], &guid[6], &guid[7]) != 8)
goto invalid;
- ng = (u8 *) &nguid;
+ ng = (u8 *) &new_guid;
for (i = 0; i < 8; i++) {
if (guid[i] > 0xff)
@@ -274,8 +317,13 @@ static ssize_t store_guid(struct device *dev,
ng[i] = guid[i];
}
- dd->ipath_guid = nguid;
+ if (new_guid == 0)
+ goto invalid;
+
+ dd->ipath_guid = new_guid;
dd->ipath_nguid = 1;
+ if (dd->verbs_dev)
+ dd->verbs_dev->ibdev.node_guid = new_guid;
ret = strlen(buf);
goto bail;
@@ -297,6 +345,16 @@ static ssize_t show_nguid(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
}
+static ssize_t show_nports(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ /* Return the number of user ports available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
static ssize_t show_serial(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -318,6 +376,60 @@ static ssize_t show_unit(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
}
+static ssize_t show_jint_max_packets(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
+}
+
+static ssize_t store_jint_max_packets(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 v = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &v);
+ if (ret < 0)
+ ipath_dev_err(dd, "invalid jint_max_packets.\n");
+ else
+ dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
+
+ return ret;
+}
+
+static ssize_t show_jint_idle_ticks(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
+}
+
+static ssize_t store_jint_idle_ticks(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 v = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &v);
+ if (ret < 0)
+ ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
+ else
+ dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
+
+ return ret;
+}
+
#define DEVICE_COUNTER(name, attr) \
static ssize_t show_counter_##name(struct device *dev, \
struct device_attribute *attr, \
@@ -446,6 +558,7 @@ static ssize_t store_reset(struct device *dev,
dev_info(dev,"Unit %d is disabled, can't reset\n",
dd->ipath_unit);
ret = -EINVAL;
+ goto bail;
}
ret = ipath_reset_device(dd->ipath_unit);
bail:
@@ -587,6 +700,294 @@ bail:
return ret;
}
+static ssize_t store_led_override(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret > 0)
+ ipath_set_led_override(dd, val);
+ else
+ ipath_dev_err(dd, "attempt to set invalid LED override\n");
+ return ret;
+}
+
+static ssize_t show_logged_errs(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int idx, count;
+
+ /* force consistency with actual EEPROM */
+ if (ipath_update_eeprom_log(dd) != 0)
+ return -ENXIO;
+
+ count = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+ dd->ipath_eep_st_errs[idx],
+ idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
+ }
+
+ return count;
+}
+
+/*
+ * New sysfs entries to control various IB config. These all turn into
+ * accesses via ipath_f_get/set_ib_cfg.
+ *
+ * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
+ */
+static ssize_t show_hrtbt_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_hrtbt_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && val > 3)
+ ret = -EINVAL;
+ if (ret < 0) {
+ ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ goto bail;
+ }
+
+ /*
+ * Set the "intentional" heartbeat enable per either of
+ * "Enable" and "Auto", as these are normally set together.
+ * This bit is consulted when leaving loopback mode,
+ * because entering loopback mode overrides it and automatically
+ * disables heartbeat.
+ */
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
+ if (r < 0)
+ ret = r;
+ else if (val == IPATH_IB_HRTBT_OFF)
+ dd->ipath_flags |= IPATH_NO_HRTBT;
+ else
+ dd->ipath_flags &= ~IPATH_NO_HRTBT;
+
+bail:
+ return ret;
+}
+
+/*
+ * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
+ * _not_ the particular encoding of any given chip)
+ */
+static ssize_t show_lwid_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_lwid_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && (val == 0 || val > 3))
+ ret = -EINVAL;
+ if (ret < 0) {
+ ipath_dev_err(dd,
+ "attempt to set invalid Link Width (enable)\n");
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+/* Get current link width */
+static ssize_t show_lwid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+/*
+ * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
+ */
+static ssize_t show_spd_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_spd_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
+ ret = -EINVAL;
+ if (ret < 0) {
+ ipath_dev_err(dd,
+ "attempt to set invalid Link Speed (enable)\n");
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+/* Get current link speed */
+static ssize_t show_spd(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+/*
+ * Get/Set RX polarity-invert enable. 0=no, 1=yes.
+ */
+static ssize_t show_rx_polinv_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_rx_polinv_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && val > 1) {
+ ipath_dev_err(dd,
+ "attempt to set invalid Rx Polarity (enable)\n");
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+/*
+ * Get/Set RX lane-reversal enable. 0=no, 1=yes.
+ */
+static ssize_t show_lanerev_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_lanerev_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && val > 1) {
+ ret = -EINVAL;
+ ipath_dev_err(dd,
+ "attempt to set invalid Lane reversal (enable)\n");
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -601,13 +1002,89 @@ static struct attribute_group driver_attr_group = {
.attrs = driver_attributes
};
+static ssize_t store_tempsense(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, stat;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret <= 0) {
+ ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
+ goto bail;
+ }
+ /* If anything but the highest limit, enable T_CRIT_A "interrupt" */
+ stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
+ if (stat) {
+ ipath_dev_err(dd, "Unable to set tempsense config\n");
+ ret = -1;
+ goto bail;
+ }
+ stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
+ if (stat) {
+ ipath_dev_err(dd, "Unable to set local Tcrit\n");
+ ret = -1;
+ goto bail;
+ }
+ stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
+ if (stat) {
+ ipath_dev_err(dd, "Unable to set remote Tcrit\n");
+ ret = -1;
+ goto bail;
+ }
+
+bail:
+ return ret;
+}
+
+/*
+ * dump tempsense regs. in decimal, to ease shell-scripts.
+ */
+static ssize_t show_tempsense(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+ int idx;
+ u8 regvals[8];
+
+ ret = -ENXIO;
+ for (idx = 0; idx < 8; ++idx) {
+ if (idx == 6)
+ continue;
+ ret = ipath_tempsense_read(dd, idx);
+ if (ret < 0)
+ break;
+ regvals[idx] = ret;
+ }
+ if (idx == 8)
+ ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
+ *(signed char *)(regvals),
+ *(signed char *)(regvals + 1),
+ regvals[2], regvals[3],
+ *(signed char *)(regvals + 5),
+ *(signed char *)(regvals + 7));
+ return ret;
+}
+
+const struct attribute_group *ipath_driver_attr_groups[] = {
+ &driver_attr_group,
+ NULL,
+};
+
static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
+static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
@@ -615,14 +1092,25 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
+static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
+static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
+static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
+ show_jint_max_packets, store_jint_max_packets);
+static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
+ show_jint_idle_ticks, store_jint_idle_ticks);
+static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
+ show_tempsense, store_tempsense);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
+ &dev_attr_lmc.attr,
&dev_attr_lid.attr,
&dev_attr_link_state.attr,
&dev_attr_mlid.attr,
&dev_attr_mtu.attr,
&dev_attr_nguid.attr,
+ &dev_attr_nports.attr,
&dev_attr_serial.attr,
&dev_attr_status.attr,
&dev_attr_status_str.attr,
@@ -630,6 +1118,10 @@ static struct attribute *dev_attributes[] = {
&dev_attr_unit.attr,
&dev_attr_enabled.attr,
&dev_attr_rx_pol_inv.attr,
+ &dev_attr_led_override.attr,
+ &dev_attr_logged_errors.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_localbus_info.attr,
NULL
};
@@ -637,6 +1129,34 @@ static struct attribute_group dev_attr_group = {
.attrs = dev_attributes
};
+static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
+ store_hrtbt_enb);
+static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
+ store_lwid_enb);
+static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
+static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
+ store_spd_enb);
+static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
+static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
+ store_rx_polinv_enb);
+static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
+ store_lanerev_enb);
+
+static struct attribute *dev_ibcfg_attributes[] = {
+ &dev_attr_hrtbt_enable.attr,
+ &dev_attr_link_width_enable.attr,
+ &dev_attr_link_width.attr,
+ &dev_attr_link_speed_enable.attr,
+ &dev_attr_link_speed.attr,
+ &dev_attr_rx_pol_inv_enable.attr,
+ &dev_attr_rx_lane_rev_enable.attr,
+ NULL
+};
+
+static struct attribute_group dev_ibcfg_attr_group = {
+ .attrs = dev_ibcfg_attributes
+};
+
/**
* ipath_expose_reset - create a device reset file
* @dev: the device structure
@@ -663,24 +1183,9 @@ int ipath_expose_reset(struct device *dev)
return ret;
}
-int ipath_driver_create_group(struct device_driver *drv)
-{
- int ret;
-
- ret = sysfs_create_group(&drv->kobj, &driver_attr_group);
-
- return ret;
-}
-
-void ipath_driver_remove_group(struct device_driver *drv)
-{
- sysfs_remove_group(&drv->kobj, &driver_attr_group);
-}
-
int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
{
int ret;
- char unit[5];
ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
if (ret)
@@ -690,11 +1195,26 @@ int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
if (ret)
goto bail_attrs;
- snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit);
- ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit);
- if (ret == 0)
- goto bail;
+ if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
+ ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
+ if (ret)
+ goto bail_counter;
+ ret = device_create_file(dev, &dev_attr_jint_max_packets);
+ if (ret)
+ goto bail_idle;
+
+ ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
+ if (ret)
+ goto bail_max;
+ }
+
+ return 0;
+bail_max:
+ device_remove_file(dev, &dev_attr_jint_max_packets);
+bail_idle:
+ device_remove_file(dev, &dev_attr_jint_idle_ticks);
+bail_counter:
sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
bail_attrs:
sysfs_remove_group(&dev->kobj, &dev_attr_group);
@@ -704,12 +1224,14 @@ bail:
void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
{
- char unit[5];
+ sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
- snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit);
- sysfs_remove_link(&dev->driver->kobj, unit);
+ if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
+ sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
+ device_remove_file(dev, &dev_attr_jint_idle_ticks);
+ device_remove_file(dev, &dev_attr_jint_max_packets);
+ }
- sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
sysfs_remove_group(&dev->kobj, &dev_attr_group);
device_remove_file(dev, &dev_attr_reset);
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0fd3cded16b..22e60998f1a 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -37,72 +37,60 @@
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
-static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
- struct ib_wc *wc)
-{
- if (++qp->s_last == qp->s_size)
- qp->s_last = 0;
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- wc->wr_id = wqe->wr.wr_id;
- wc->status = IB_WC_SUCCESS;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc->vendor_err = 0;
- wc->byte_len = wqe->length;
- wc->qp_num = qp->ibqp.qp_num;
- wc->src_qp = qp->remote_qpn;
- wc->pkey_index = 0;
- wc->slid = qp->remote_ah_attr.dlid;
- wc->sl = qp->remote_ah_attr.sl;
- wc->dlid_path_bits = 0;
- wc->port_num = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
- }
- wqe = get_swqe_ptr(qp, qp->s_last);
-}
-
/**
* ipath_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- * @bth0p: pointer to the BTH opcode word
- * @bth2p: pointer to the BTH PSN word
*
* Return 1 if constructed; otherwise, return 0.
- * Note the QP s_lock must be held and interrupts disabled.
*/
-int ipath_make_uc_req(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu, u32 *bth0p, u32 *bth2p)
+int ipath_make_uc_req(struct ipath_qp *qp)
{
+ struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe;
+ unsigned long flags;
u32 hwords;
u32 bth0;
u32 len;
- struct ib_wc wc;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
+ }
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- bth0 = 0;
+ bth0 = 1 << 22; /* Set M bit */
/* Get the next send request. */
- wqe = get_swqe_ptr(qp, qp->s_last);
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ qp->s_wqe = NULL;
switch (qp->s_state) {
default:
- /*
- * Signal the completion of the last send
- * (if there is one).
- */
- if (qp->s_last != qp->s_tail)
- complete_last_send(qp, wqe, &wc);
-
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/* Check if send work queue is empty. */
- if (qp->s_tail == qp->s_head)
- goto done;
+ if (qp->s_cur == qp->s_head)
+ goto bail;
/*
* Start a new request.
*/
@@ -125,11 +113,14 @@ int ipath_make_uc_req(struct ipath_qp *qp,
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
break;
case IB_WR_RDMA_WRITE:
@@ -151,18 +142,19 @@ int ipath_make_uc_req(struct ipath_qp *qp,
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
- ohdr->u.rc.imm_data = wqe->wr.imm_data;
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
break;
default:
- goto done;
+ goto bail;
}
- if (++qp->s_tail >= qp->s_size)
- qp->s_tail = 0;
break;
case OP(SEND_FIRST):
@@ -179,11 +171,14 @@ int ipath_make_uc_req(struct ipath_qp *qp,
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
break;
case OP(RDMA_WRITE_FIRST):
@@ -201,23 +196,32 @@ int ipath_make_uc_req(struct ipath_qp *qp,
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
- ohdr->u.imm_data = wqe->wr.imm_data;
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
break;
}
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
- *bth0p = bth0 | (qp->s_state << 24);
- *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
- return 1;
-
+ ipath_make_ruc_header(to_idev(qp->ibqp.device),
+ qp, ohdr, bth0 | (qp->s_state << 24),
+ qp->s_next_psn++ & IPATH_PSN_MASK);
done:
- return 0;
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
}
/**
@@ -246,6 +250,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -274,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- wc.imm_data = 0;
- wc.wc_flags = 0;
+ memset(&wc, 0, sizeof wc);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
@@ -338,15 +345,15 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
- if (qp->r_reuse_sge) {
- qp->r_reuse_sge = 0;
- qp->r_sge = qp->s_rdma_sge;
+ if (qp->r_flags & IPATH_R_REUSE_SGE) {
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
+ qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
goto done;
}
/* Save the WQE so we can reuse it in case of an error. */
- qp->s_rdma_sge = qp->r_sge;
+ qp->s_rdma_read_sge = qp->r_sge;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto send_last;
@@ -356,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -372,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
+ wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
- wc.imm_data = ohdr->u.imm_data;
+ wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
@@ -388,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -396,28 +403,23 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
- /* XXX Need to free SGEs */
+ wc.opcode = IB_WC_RECV;
last_imm:
ipath_copy_sge(&qp->r_sge, data, tlen);
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
- __constant_cpu_to_be32(1 << 23)) != 0);
+ cpu_to_be32(1 << 23)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
@@ -440,7 +442,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey */
- ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
+ ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok)) {
@@ -480,6 +482,16 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
rdma_last_imm:
+ if (header_in_data) {
+ wc.ex.imm_data = *(__be32 *) data;
+ data += sizeof(__be32);
+ } else {
+ /* Immediate data comes after BTH */
+ wc.ex.imm_data = ohdr->u.imm_data;
+ }
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
@@ -494,22 +506,14 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++;
goto done;
}
- if (qp->r_reuse_sge)
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
}
- if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
- data += sizeof(__be32);
- } else {
- /* Immediate data comes after BTH */
- wc.imm_data = ohdr->u.imm_data;
- }
- hdrsize += 4;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.byte_len = 0;
+ wc.byte_len = qp->r_len;
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
goto last_imm;
case OP(RDMA_WRITE_LAST):
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 6991d1d74e3..e8a2a915251 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,75 +31,23 @@
* SOFTWARE.
*/
+#include <linux/sched.h>
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
#include "ipath_kernel.h"
-static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
- u32 *lengthp, struct ipath_sge_state *ss)
-{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- int user = to_ipd(qp->ibqp.pd)->user;
- int i, j, ret;
- struct ib_wc wc;
-
- *lengthp = 0;
- for (i = j = 0; i < wqe->num_sge; i++) {
- if (wqe->sg_list[i].length == 0)
- continue;
- /* Check LKEY */
- if ((user && wqe->sg_list[i].lkey == 0) ||
- !ipath_lkey_ok(&dev->lk_table,
- j ? &ss->sg_list[j - 1] : &ss->sge,
- &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
- goto bad_lkey;
- *lengthp += wqe->sg_list[i].length;
- j++;
- }
- ss->num_sge = j;
- ret = 1;
- goto bail;
-
-bad_lkey:
- wc.wr_id = wqe->wr_id;
- wc.status = IB_WC_LOC_PROT_ERR;
- wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
- wc.qp_num = qp->ibqp.qp_num;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- /* Signal solicited completion event. */
- ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
- ret = 0;
-bail:
- return ret;
-}
-
/**
* ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the QP
- * @ss: the SGE state
- * @length: the length of the data to send
- * @wr: the work request
- * @wc: the work completion entry
+ * @sqp: the sending QP
+ * @swqe: the send work request
*
- * This is called from ipath_post_ud_send() to forward a WQE addressed
+ * This is called from ipath_make_ud_req() to forward a WQE addressed
* to the same HCA.
* Note that the receive interrupt handler may be calling ipath_ud_rcv()
* while this is being called.
*/
-static void ipath_ud_loopback(struct ipath_qp *sqp,
- struct ipath_sge_state *ss,
- u32 length, struct ib_send_wr *wr,
- struct ib_wc *wc)
+static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
{
struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
struct ipath_qp *qp;
@@ -112,12 +60,16 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
struct ipath_rwq *wq;
struct ipath_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
+ struct ib_wc wc;
u32 tail;
u32 rlen;
+ u32 length;
- qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
- if (!qp)
- return;
+ qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
/*
* Check that the qkey matches (except for QP0, see 9.6.1.4.1).
@@ -125,39 +77,32 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
* qkey from the QP context instead of the WR (see 10.2.5).
*/
if (unlikely(qp->ibqp.qp_num &&
- ((int) wr->wr.ud.remote_qkey < 0
- ? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) {
+ ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
+ sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
- goto done;
+ goto drop;
}
/*
- * A GRH is expected to preceed the data even if not
+ * A GRH is expected to precede the data even if not
* present on the wire.
*/
- wc->byte_len = length + sizeof(struct ib_grh);
+ length = swqe->length;
+ memset(&wc, 0, sizeof wc);
+ wc.byte_len = length + sizeof(struct ib_grh);
- if (wr->opcode == IB_WR_SEND_WITH_IMM) {
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->imm_data = wr->imm_data;
- } else {
- wc->wc_flags = 0;
- wc->imm_data = 0;
+ if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = swqe->wr.ex.imm_data;
}
- if (wr->num_sge > 1) {
- rsge.sg_list = kmalloc((wr->num_sge - 1) *
- sizeof(struct ipath_sge),
- GFP_ATOMIC);
- } else
- rsge.sg_list = NULL;
-
/*
- * Get the next work request entry to find where to put the data.
- * Note that it is safe to drop the lock after changing rq->tail
- * since ipath_post_receive() won't fill the empty slot.
+ * This would be a lot simpler if we could call ipath_get_rwqe()
+ * but that uses state that the receive interrupt handler uses
+ * so we would need to lock out receive interrupts while doing
+ * local loopback.
*/
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
@@ -169,30 +114,39 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
rq = &qp->r_rq;
}
+ /*
+ * Get the next work request entry to find where to put the data.
+ * Note that it is safe to drop the lock after changing rq->tail
+ * since ipath_post_receive() won't fill the empty slot.
+ */
spin_lock_irqsave(&rq->lock, flags);
wq = rq->wq;
tail = wq->tail;
- while (1) {
- if (unlikely(tail == wq->head)) {
- spin_unlock_irqrestore(&rq->lock, flags);
- dev->n_pkt_drops++;
- goto bail_sge;
- }
- wqe = get_rwqe_ptr(rq, tail);
- if (++tail >= rq->size)
- tail = 0;
- if (init_sge(qp, wqe, &rlen, &rsge))
- break;
- wq->tail = tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ if (unlikely(tail == wq->head)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto drop;
+ }
+ wqe = get_rwqe_ptr(rq, tail);
+ rsge.sg_list = qp->r_ud_sg_list;
+ if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto drop;
}
/* Silently drop packets which are too big. */
- if (wc->byte_len > rlen) {
+ if (wc.byte_len > rlen) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
- goto bail_sge;
+ goto drop;
}
+ if (++tail >= rq->size)
+ tail = 0;
wq->tail = tail;
- wc->wr_id = wqe->wr_id;
+ wc.wr_id = wqe->wr_id;
if (handler) {
u32 n;
@@ -221,26 +175,28 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
} else
spin_unlock_irqrestore(&rq->lock, flags);
- ah_attr = &to_iah(wr->wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
- wc->wc_flags |= IB_WC_GRH;
+ wc.wc_flags |= IB_WC_GRH;
} else
ipath_skip_sge(&rsge, sizeof(struct ib_grh));
- sge = &ss->sge;
+ sge = swqe->sg_list;
while (length) {
u32 len = sge->length;
if (len > length)
len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
BUG_ON(len == 0);
ipath_copy_sge(&rsge, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
+ if (--swqe->wr.num_sge)
+ sge++;
} else if (sge->length == 0 && sge->mr != NULL) {
if (++sge->n >= IPATH_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
@@ -254,118 +210,77 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
}
length -= len;
}
- wc->status = IB_WC_SUCCESS;
- wc->opcode = IB_WC_RECV;
- wc->vendor_err = 0;
- wc->qp_num = qp->ibqp.qp_num;
- wc->src_qp = sqp->ibqp.qp_num;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
- wc->pkey_index = 0;
- wc->slid = dev->dd->ipath_lid |
+ wc.pkey_index = 0;
+ wc.slid = dev->dd->ipath_lid |
(ah_attr->src_path_bits &
- ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
- wc->sl = ah_attr->sl;
- wc->dlid_path_bits =
- ah_attr->dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+ ((1 << dev->dd->ipath_lmc) - 1));
+ wc.sl = ah_attr->sl;
+ wc.dlid_path_bits =
+ ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
+ wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
- ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
- wr->send_flags & IB_SEND_SOLICITED);
-
-bail_sge:
- kfree(rsge.sg_list);
-done:
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ swqe->wr.send_flags & IB_SEND_SOLICITED);
+drop:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
+done:;
}
/**
- * ipath_post_ud_send - post a UD send on QP
+ * ipath_make_ud_req - construct a UD request packet
* @qp: the QP
- * @wr: the work request
*
- * Note that we actually send the data as it is posted instead of putting
- * the request into a ring buffer. If we wanted to use a ring buffer,
- * we would need to save a reference to the destination address in the SWQE.
+ * Return 1 if constructed; otherwise, return 0.
*/
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+int ipath_make_ud_req(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
- struct ipath_sge_state ss;
- struct ipath_sge *sg_list;
- struct ib_wc wc;
- u32 hwords;
+ struct ipath_swqe *wqe;
+ unsigned long flags;
u32 nwords;
- u32 len;
u32 extra_bytes;
u32 bth0;
u16 lrh0;
u16 lid;
- int i;
- int ret;
-
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
- ret = 0;
- goto bail;
- }
+ int ret = 0;
+ int next_cur;
- /* IB spec says that num_sge == 0 is OK. */
- if (wr->num_sge > qp->s_max_sge) {
- ret = -EINVAL;
- goto bail;
- }
+ spin_lock_irqsave(&qp->s_lock, flags);
- if (wr->num_sge > 1) {
- sg_list = kmalloc((qp->s_max_sge - 1) * sizeof(*sg_list),
- GFP_ATOMIC);
- if (!sg_list) {
- ret = -ENOMEM;
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
- }
- } else
- sg_list = NULL;
-
- /* Check the buffer to send. */
- ss.sg_list = sg_list;
- ss.sge.mr = NULL;
- ss.sge.vaddr = NULL;
- ss.sge.length = 0;
- ss.sge.sge_length = 0;
- ss.num_sge = 0;
- len = 0;
- for (i = 0; i < wr->num_sge; i++) {
- /* Check LKEY */
- if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
- ret = -EINVAL;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
goto bail;
- }
-
- if (wr->sg_list[i].length == 0)
- continue;
- if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
- sg_list + ss.num_sge - 1 : &ss.sge,
- &wr->sg_list[i], 0)) {
- ret = -EINVAL;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
- len += wr->sg_list[i].length;
- ss.num_sge++;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
}
- /* Check for invalid packet size. */
- if (len > dev->dd->ipath_ibmtu) {
- ret = -EINVAL;
+
+ if (qp->s_cur == qp->s_head)
goto bail;
- }
- extra_bytes = (4 - len) & 3;
- nwords = (len + extra_bytes) >> 2;
+
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ next_cur = qp->s_cur + 1;
+ if (next_cur >= qp->s_size)
+ next_cur = 0;
/* Construct the header. */
- ah_attr = &to_iah(wr->wr.ud.ah)->attr;
- if (ah_attr->dlid == 0) {
- ret = -EINVAL;
- goto bail;
- }
+ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
dev->n_multicast_xmit++;
@@ -373,75 +288,79 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
dev->n_unicast_xmit++;
} else {
dev->n_unicast_xmit++;
- lid = ah_attr->dlid &
- ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+ lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid == dev->dd->ipath_lid)) {
/*
- * Pass in an uninitialized ib_wc to save stack
- * space.
+ * If DMAs are in progress, we can't generate
+ * a completion for the loopback packet since
+ * it would be out of order.
+ * XXX Instead of waiting, we could queue a
+ * zero length descriptor so we get a callback.
*/
- ipath_ud_loopback(qp, &ss, len, wr, &wc);
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ qp->s_cur = next_cur;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ ipath_ud_loopback(qp, wqe);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
+
+ qp->s_cur = next_cur;
+ extra_bytes = -wqe->length & 3;
+ nwords = (wqe->length + extra_bytes) >> 2;
+
+ /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
+ qp->s_hdrwords = 7;
+ qp->s_cur_size = wqe->length;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_dmult = ah_attr->static_rate;
+ qp->s_wqe = wqe;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
- hwords = 17;
+ qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+ &ah_attr->grh,
+ qp->s_hdrwords, nwords);
lrh0 = IPATH_LRH_GRH;
ohdr = &qp->s_hdr.u.l.oth;
- qp->s_hdr.u.l.grh.version_tclass_flow =
- cpu_to_be32((6 << 28) |
- (ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- qp->s_hdr.u.l.grh.paylen =
- cpu_to_be16(((wr->opcode ==
- IB_WR_SEND_WITH_IMM ? 6 : 5) +
- nwords + SIZE_OF_CRC) << 2);
- /* next_hdr is defined by C8-7 in ch. 8.4.1 */
- qp->s_hdr.u.l.grh.next_hdr = 0x1B;
- qp->s_hdr.u.l.grh.hop_limit = ah_attr->grh.hop_limit;
- /* The SGID is 32-bit aligned. */
- qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
- dev->gid_prefix;
- qp->s_hdr.u.l.grh.sgid.global.interface_id =
- dev->dd->ipath_guid;
- qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
*/
} else {
/* Header size in 32-bit words. */
- hwords = 7;
lrh0 = IPATH_LRH_BTH;
ohdr = &qp->s_hdr.u.oth;
}
- if (wr->opcode == IB_WR_SEND_WITH_IMM) {
- ohdr->u.ud.imm_data = wr->imm_data;
- wc.imm_data = wr->imm_data;
- hwords += 1;
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ qp->s_hdrwords++;
+ ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
- } else if (wr->opcode == IB_WR_SEND) {
- wc.imm_data = 0;
+ } else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
- } else {
- ret = -EINVAL;
- goto bail;
- }
lrh0 |= ah_attr->sl << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
- qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
+ SIZE_OF_CRC);
lid = dev->dd->ipath_lid;
if (lid) {
lid |= ah_attr->src_path_bits &
- ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+ ((1 << dev->dd->ipath_lmc) - 1);
qp->s_hdr.lrh[3] = cpu_to_be16(lid);
} else
qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
- if (wr->send_flags & IB_SEND_SOLICITED)
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -452,41 +371,25 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
*/
ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
ah_attr->dlid != IPATH_PERMISSIVE_LID ?
- __constant_cpu_to_be32(IPATH_MULTICAST_QPN) :
- cpu_to_be32(wr->wr.ud.remote_qpn);
- /* XXX Could lose a PSN count but not worth locking */
+ cpu_to_be32(IPATH_MULTICAST_QPN) :
+ cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wr->wr.ud.remote_qkey < 0 ?
- qp->qkey : wr->wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
+ qp->qkey : wqe->wr.wr.ud.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
- if (ipath_verbs_send(dev->dd, hwords, (u32 *) &qp->s_hdr,
- len, &ss))
- dev->n_no_piobuf++;
done:
- /* Queue the completion status entry. */
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
- (wr->send_flags & IB_SEND_SIGNALED)) {
- wc.wr_id = wr->wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.vendor_err = 0;
- wc.opcode = IB_WC_SEND;
- wc.byte_len = len;
- wc.qp_num = qp->ibqp.qp_num;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- /* XXX initialize other fields? */
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
- }
- kfree(sg_list);
-
- ret = 0;
+ ret = 1;
+ goto unlock;
bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -569,6 +472,28 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
}
+ /*
+ * The opcode is in the low byte when its in network order
+ * (top byte when in host order).
+ */
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (qp->ibqp.qp_num > 1 &&
+ opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
+ if (header_in_data) {
+ wc.ex.imm_data = *(__be32 *) data;
+ data += sizeof(__be32);
+ } else
+ wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ hdrsize += sizeof(u32);
+ } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
+ wc.ex.imm_data = 0;
+ wc.wc_flags = 0;
+ } else {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
if (unlikely(tlen < (hdrsize + pad + 4))) {
@@ -590,38 +515,16 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
/*
- * A GRH is expected to preceed the data even if not
+ * A GRH is expected to precede the data even if not
* present on the wire.
*/
wc.byte_len = tlen + sizeof(struct ib_grh);
/*
- * The opcode is in the low byte when its in network order
- * (top byte when in host order).
- */
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- if (qp->ibqp.qp_num > 1 &&
- opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
- if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
- data += sizeof(__be32);
- } else
- wc.imm_data = ohdr->u.ud.imm_data;
- wc.wc_flags = IB_WC_WITH_IMM;
- hdrsize += sizeof(u32);
- } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
- wc.imm_data = 0;
- wc.wc_flags = 0;
- } else {
- dev->n_pkt_drops++;
- goto bail;
- }
-
- /*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_reuse_sge)
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 0)) {
/*
* Count VL15 packets dropped due to no receive buffer.
@@ -637,7 +540,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
/* Silently drop packets which are too big. */
if (wc.byte_len > qp->r_len) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto bail;
}
@@ -649,11 +552,13 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+ goto bail;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = src_qp;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
@@ -664,11 +569,12 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
- dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+ dlid & ((1 << dev->dd->ipath_lmc) - 1);
+ wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
- __constant_cpu_to_be32(1 << 23)) != 0);
+ cpu_to_be32(1 << 23)) != 0);
bail:;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index e32fca9faf8..dc66c450691 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,6 +33,8 @@
#include <linux/mm.h>
#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
#include "ipath_kernel.h"
@@ -51,15 +53,14 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages,
}
/* call with current->mm->mmap_sem held */
-static int __get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
{
unsigned long lock_limit;
size_t got;
int ret;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
- PAGE_SHIFT;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (num_pages > lock_limit) {
ret = -ENOMEM;
@@ -78,7 +79,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
goto bail_release;
}
- current->mm->locked_vm += num_pages;
+ current->mm->pinned_vm += num_pages;
ret = 0;
goto bail;
@@ -90,6 +91,62 @@ bail:
}
/**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+ int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_single(hwdev, ptr, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_single(hwdev, phys, size, direction);
+ phys = pci_map_single(hwdev, ptr, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
@@ -108,33 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __get_user_pages(start_page, num_pages, p, NULL);
-
- up_write(&current->mm->mmap_sem);
-
- return ret;
-}
-
-/**
- * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared
- * @start_page: the page to lock
- * @p: the output page structure
- *
- * This is similar to ipath_get_user_pages, but it's always one page, and we
- * mark the page as locked for I/O, and shared. This is used for the user
- * process page that contains the destination address for the rcvhdrq tail
- * update, so we need to have the vma. If we don't do this, the page can be
- * taken away from us on fork, even if the child never touches it, and then
- * the user process never sees the tail register updates.
- */
-int ipath_get_user_pages_nocopy(unsigned long page, struct page **p)
-{
- struct vm_area_struct *vma;
- int ret;
-
- down_write(&current->mm->mmap_sem);
-
- ret = __get_user_pages(page, 1, p, &vma);
+ ret = __ipath_get_user_pages(start_page, num_pages, p, NULL);
up_write(&current->mm->mmap_sem);
@@ -147,7 +178,7 @@ void ipath_release_user_pages(struct page **p, size_t num_pages)
__ipath_release_user_pages(p, num_pages, 1);
- current->mm->locked_vm -= num_pages;
+ current->mm->pinned_vm -= num_pages;
up_write(&current->mm->mmap_sem);
}
@@ -158,12 +189,13 @@ struct ipath_user_pages_work {
unsigned long num_pages;
};
-static void user_pages_account(void *ptr)
+static void user_pages_account(struct work_struct *_work)
{
- struct ipath_user_pages_work *work = ptr;
+ struct ipath_user_pages_work *work =
+ container_of(_work, struct ipath_user_pages_work, work);
down_write(&work->mm->mmap_sem);
- work->mm->locked_vm -= work->num_pages;
+ work->mm->pinned_vm -= work->num_pages;
up_write(&work->mm->mmap_sem);
mmput(work->mm);
kfree(work);
@@ -178,20 +210,20 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
mm = get_task_mm(current);
if (!mm)
- goto bail;
+ return;
work = kmalloc(sizeof(*work), GFP_KERNEL);
if (!work)
goto bail_mm;
- goto bail;
-
- INIT_WORK(&work->work, user_pages_account, work);
+ INIT_WORK(&work->work, user_pages_account);
work->mm = mm;
work->num_pages = num_pages;
+ queue_work(ib_wq, &work->work);
+ return;
+
bail_mm:
mmput(mm);
-bail:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
new file mode 100644
index 00000000000..cc04b7ba348
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -0,0 +1,875 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_user_sdma.h"
+
+/* minimum size of header */
+#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
+/* expected size of headers (for dma_pool) */
+#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
+/* length mask in PBC (lower 11 bits) */
+#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
+
+struct ipath_user_sdma_pkt {
+ u8 naddr; /* dimension of addr (1..3) ... */
+ u32 counter; /* sdma pkts queued counter for this entry */
+ u64 added; /* global descq number of entries */
+
+ struct {
+ u32 offset; /* offset for kvaddr, addr */
+ u32 length; /* length in page */
+ u8 put_page; /* should we put_page? */
+ u8 dma_mapped; /* is page dma_mapped? */
+ struct page *page; /* may be NULL (coherent mem) */
+ void *kvaddr; /* FIXME: only for pio hack */
+ dma_addr_t addr;
+ } addr[4]; /* max pages, any more and we coalesce */
+ struct list_head list; /* list element */
+};
+
+struct ipath_user_sdma_queue {
+ /*
+ * pkts sent to dma engine are queued on this
+ * list head. the type of the elements of this
+ * list are struct ipath_user_sdma_pkt...
+ */
+ struct list_head sent;
+
+ /* headers with expected length are allocated from here... */
+ char header_cache_name[64];
+ struct dma_pool *header_cache;
+
+ /* packets are allocated from the slab cache... */
+ char pkt_slab_name[64];
+ struct kmem_cache *pkt_slab;
+
+ /* as packets go on the queued queue, they are counted... */
+ u32 counter;
+ u32 sent_counter;
+
+ /* dma page table */
+ struct rb_root dma_pages_root;
+
+ /* protect everything above... */
+ struct mutex lock;
+};
+
+struct ipath_user_sdma_queue *
+ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
+{
+ struct ipath_user_sdma_queue *pq =
+ kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
+
+ if (!pq)
+ goto done;
+
+ pq->counter = 0;
+ pq->sent_counter = 0;
+ INIT_LIST_HEAD(&pq->sent);
+
+ mutex_init(&pq->lock);
+
+ snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
+ "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
+ pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
+ sizeof(struct ipath_user_sdma_pkt),
+ 0, 0, NULL);
+
+ if (!pq->pkt_slab)
+ goto err_kfree;
+
+ snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
+ "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
+ pq->header_cache = dma_pool_create(pq->header_cache_name,
+ dev,
+ IPATH_USER_SDMA_EXP_HEADER_LENGTH,
+ 4, 0);
+ if (!pq->header_cache)
+ goto err_slab;
+
+ pq->dma_pages_root = RB_ROOT;
+
+ goto done;
+
+err_slab:
+ kmem_cache_destroy(pq->pkt_slab);
+err_kfree:
+ kfree(pq);
+ pq = NULL;
+
+done:
+ return pq;
+}
+
+static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
+ int i, size_t offset, size_t len,
+ int put_page, int dma_mapped,
+ struct page *page,
+ void *kvaddr, dma_addr_t dma_addr)
+{
+ pkt->addr[i].offset = offset;
+ pkt->addr[i].length = len;
+ pkt->addr[i].put_page = put_page;
+ pkt->addr[i].dma_mapped = dma_mapped;
+ pkt->addr[i].page = page;
+ pkt->addr[i].kvaddr = kvaddr;
+ pkt->addr[i].addr = dma_addr;
+}
+
+static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
+ u32 counter, size_t offset,
+ size_t len, int dma_mapped,
+ struct page *page,
+ void *kvaddr, dma_addr_t dma_addr)
+{
+ pkt->naddr = 1;
+ pkt->counter = counter;
+ ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
+ kvaddr, dma_addr);
+}
+
+/* we've too many pages in the iovec, coalesce to a single page */
+static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov) {
+ int ret = 0;
+ struct page *page = alloc_page(GFP_KERNEL);
+ void *mpage_save;
+ char *mpage;
+ int i;
+ int len = 0;
+ dma_addr_t dma_addr;
+
+ if (!page) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ mpage = kmap(page);
+ mpage_save = mpage;
+ for (i = 0; i < niov; i++) {
+ int cfur;
+
+ cfur = copy_from_user(mpage,
+ iov[i].iov_base, iov[i].iov_len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_unmap;
+ }
+
+ mpage += iov[i].iov_len;
+ len += iov[i].iov_len;
+ }
+
+ dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto free_unmap;
+ }
+
+ ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
+ dma_addr);
+ pkt->naddr = 2;
+
+ goto done;
+
+free_unmap:
+ kunmap(page);
+ __free_page(page);
+done:
+ return ret;
+}
+
+/* how many pages in this iovec element? */
+static int ipath_user_sdma_num_pages(const struct iovec *iov)
+{
+ const unsigned long addr = (unsigned long) iov->iov_base;
+ const unsigned long len = iov->iov_len;
+ const unsigned long spage = addr & PAGE_MASK;
+ const unsigned long epage = (addr + len - 1) & PAGE_MASK;
+
+ return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
+
+/* truncate length to page boundary */
+static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
+{
+ const unsigned long offset = addr & ~PAGE_MASK;
+
+ return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
+}
+
+static void ipath_user_sdma_free_pkt_frag(struct device *dev,
+ struct ipath_user_sdma_queue *pq,
+ struct ipath_user_sdma_pkt *pkt,
+ int frag)
+{
+ const int i = frag;
+
+ if (pkt->addr[i].page) {
+ if (pkt->addr[i].dma_mapped)
+ dma_unmap_page(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].length,
+ DMA_TO_DEVICE);
+
+ if (pkt->addr[i].kvaddr)
+ kunmap(pkt->addr[i].page);
+
+ if (pkt->addr[i].put_page)
+ put_page(pkt->addr[i].page);
+ else
+ __free_page(pkt->addr[i].page);
+ } else if (pkt->addr[i].kvaddr)
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
+ pkt->addr[i].kvaddr, pkt->addr[i].addr);
+}
+
+/* return number of pages pinned... */
+static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_pkt *pkt,
+ unsigned long addr, int tlen, int npages)
+{
+ struct page *pages[2];
+ int j;
+ int ret;
+
+ ret = get_user_pages_fast(addr, npages, 0, pages);
+ if (ret != npages) {
+ int i;
+
+ for (i = 0; i < ret; i++)
+ put_page(pages[i]);
+
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ for (j = 0; j < npages; j++) {
+ /* map the pages... */
+ const int flen =
+ ipath_user_sdma_page_length(addr, tlen);
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ pages[j], 0, flen, DMA_TO_DEVICE);
+ unsigned long fofs = addr & ~PAGE_MASK;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
+ pages[j], kmap(pages[j]),
+ dma_addr);
+
+ pkt->naddr++;
+ addr += flen;
+ tlen -= flen;
+ }
+
+done:
+ return ret;
+}
+
+static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct ipath_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov)
+{
+ int ret = 0;
+ unsigned long idx;
+
+ for (idx = 0; idx < niov; idx++) {
+ const int npages = ipath_user_sdma_num_pages(iov + idx);
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+
+ ret = ipath_user_sdma_pin_pages(dd, pkt,
+ addr, iov[idx].iov_len,
+ npages);
+ if (ret < 0)
+ goto free_pkt;
+ }
+
+ goto done;
+
+free_pkt:
+ for (idx = 0; idx < pkt->naddr; idx++)
+ ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+
+done:
+ return ret;
+}
+
+static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct ipath_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov, int npages)
+{
+ int ret = 0;
+
+ if (npages >= ARRAY_SIZE(pkt->addr))
+ ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
+ else
+ ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
+
+ return ret;
+}
+
+/* free a packet list -- return counter value of last packet */
+static void ipath_user_sdma_free_pkt_list(struct device *dev,
+ struct ipath_user_sdma_queue *pq,
+ struct list_head *list)
+{
+ struct ipath_user_sdma_pkt *pkt, *pkt_next;
+
+ list_for_each_entry_safe(pkt, pkt_next, list, list) {
+ int i;
+
+ for (i = 0; i < pkt->naddr; i++)
+ ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
+ kmem_cache_free(pq->pkt_slab, pkt);
+ }
+}
+
+/*
+ * copy headers, coalesce etc -- pq->lock must be held
+ *
+ * we queue all the packets to list, returning the
+ * number of bytes total. list must be empty initially,
+ * as, if there is an error we clean it...
+ */
+static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct list_head *list,
+ const struct iovec *iov,
+ unsigned long niov,
+ int maxpkts)
+{
+ unsigned long idx = 0;
+ int ret = 0;
+ int npkts = 0;
+ struct page *page = NULL;
+ __le32 *pbc;
+ dma_addr_t dma_addr;
+ struct ipath_user_sdma_pkt *pkt = NULL;
+ size_t len;
+ size_t nw;
+ u32 counter = pq->counter;
+ int dma_mapped = 0;
+
+ while (idx < niov && npkts < maxpkts) {
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+ const unsigned long idx_save = idx;
+ unsigned pktnw;
+ unsigned pktnwc;
+ int nfrags = 0;
+ int npages = 0;
+ int cfur;
+
+ dma_mapped = 0;
+ len = iov[idx].iov_len;
+ nw = len >> 2;
+ page = NULL;
+
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_list;
+ }
+
+ if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
+ len > PAGE_SIZE || len & 3 || addr & 3) {
+ ret = -EINVAL;
+ goto free_pkt;
+ }
+
+ if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
+ pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ &dma_addr);
+ else
+ pbc = NULL;
+
+ if (!pbc) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pbc = kmap(page);
+ }
+
+ cfur = copy_from_user(pbc, iov[idx].iov_base, len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pbc;
+ }
+
+ /*
+ * this assignment is a bit strange. it's because the
+ * the pbc counts the number of 32 bit words in the full
+ * packet _except_ the first word of the pbc itself...
+ */
+ pktnwc = nw - 1;
+
+ /*
+ * pktnw computation yields the number of 32 bit words
+ * that the caller has indicated in the PBC. note that
+ * this is one less than the total number of words that
+ * goes to the send DMA engine as the first 32 bit word
+ * of the PBC itself is not counted. Armed with this count,
+ * we can verify that the packet is consistent with the
+ * iovec lengths.
+ */
+ pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
+ if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+
+ idx++;
+ while (pktnwc < pktnw && idx < niov) {
+ const size_t slen = iov[idx].iov_len;
+ const unsigned long faddr =
+ (unsigned long) iov[idx].iov_base;
+
+ if (slen & 3 || faddr & 3 || !slen ||
+ slen > PAGE_SIZE) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ npages++;
+ if ((faddr & PAGE_MASK) !=
+ ((faddr + slen - 1) & PAGE_MASK))
+ npages++;
+
+ pktnwc += slen >> 2;
+ idx++;
+ nfrags++;
+ }
+
+ if (pktnwc != pktnw) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (page) {
+ dma_addr = dma_map_page(&dd->pcidev->dev,
+ page, 0, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+
+ dma_mapped = 1;
+ }
+
+ ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
+ page, pbc, dma_addr);
+
+ if (nfrags) {
+ ret = ipath_user_sdma_init_payload(dd, pq, pkt,
+ iov + idx_save + 1,
+ nfrags, npages);
+ if (ret < 0)
+ goto free_pbc_dma;
+ }
+
+ counter++;
+ npkts++;
+
+ list_add_tail(&pkt->list, list);
+ }
+
+ ret = idx;
+ goto done;
+
+free_pbc_dma:
+ if (dma_mapped)
+ dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pbc:
+ if (page) {
+ kunmap(page);
+ __free_page(page);
+ } else
+ dma_pool_free(pq->header_cache, pbc, dma_addr);
+free_pkt:
+ kmem_cache_free(pq->pkt_slab, pkt);
+free_list:
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
+done:
+ return ret;
+}
+
+static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
+ u32 c)
+{
+ pq->sent_counter = c;
+}
+
+/* try to clean out queue -- needs pq->lock */
+static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq)
+{
+ struct list_head free_list;
+ struct ipath_user_sdma_pkt *pkt;
+ struct ipath_user_sdma_pkt *pkt_prev;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&free_list);
+
+ list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
+ s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
+
+ if (descd < 0)
+ break;
+
+ list_move_tail(&pkt->list, &free_list);
+
+ /* one more packet cleaned */
+ ret++;
+ }
+
+ if (!list_empty(&free_list)) {
+ u32 counter;
+
+ pkt = list_entry(free_list.prev,
+ struct ipath_user_sdma_pkt, list);
+ counter = pkt->counter;
+
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ ipath_user_sdma_set_complete_counter(pq, counter);
+ }
+
+ return ret;
+}
+
+void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
+{
+ if (!pq)
+ return;
+
+ kmem_cache_destroy(pq->pkt_slab);
+ dma_pool_destroy(pq->header_cache);
+ kfree(pq);
+}
+
+/* clean descriptor queue, returns > 0 if some elements cleaned */
+static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ ret = ipath_sdma_make_progress(dd);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ return ret;
+}
+
+/* we're in close, drain packets so that we can cleanup successfully... */
+void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq)
+{
+ int i;
+
+ if (!pq)
+ return;
+
+ for (i = 0; i < 100; i++) {
+ mutex_lock(&pq->lock);
+ if (list_empty(&pq->sent)) {
+ mutex_unlock(&pq->lock);
+ break;
+ }
+ ipath_user_sdma_hwqueue_clean(dd);
+ ipath_user_sdma_queue_clean(dd, pq);
+ mutex_unlock(&pq->lock);
+ msleep(10);
+ }
+
+ if (!list_empty(&pq->sent)) {
+ struct list_head free_list;
+
+ printk(KERN_INFO "drain: lists not empty: forcing!\n");
+ INIT_LIST_HEAD(&free_list);
+ mutex_lock(&pq->lock);
+ list_splice_init(&pq->sent, &free_list);
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ mutex_unlock(&pq->lock);
+ }
+}
+
+static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
+ u64 addr, u64 dwlen, u64 dwoffset)
+{
+ return cpu_to_le64(/* SDmaPhyAddr[31:0] */
+ ((addr & 0xfffffffcULL) << 32) |
+ /* SDmaGeneration[1:0] */
+ ((dd->ipath_sdma_generation & 3ULL) << 30) |
+ /* SDmaDwordCount[10:0] */
+ ((dwlen & 0x7ffULL) << 16) |
+ /* SDmaBufOffset[12:2] */
+ (dwoffset & 0x7ffULL));
+}
+
+static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
+{
+ return descq | cpu_to_le64(1ULL << 12);
+}
+
+static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
+{
+ /* last */ /* dma head */
+ return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
+}
+
+static inline __le64 ipath_sdma_make_desc1(u64 addr)
+{
+ /* SDmaPhyAddr[47:32] */
+ return cpu_to_le64(addr >> 32);
+}
+
+static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
+ struct ipath_user_sdma_pkt *pkt, int idx,
+ unsigned ofs, u16 tail)
+{
+ const u64 addr = (u64) pkt->addr[idx].addr +
+ (u64) pkt->addr[idx].offset;
+ const u64 dwlen = (u64) pkt->addr[idx].length / 4;
+ __le64 *descqp;
+ __le64 descq0;
+
+ descqp = &dd->ipath_sdma_descq[tail].qw[0];
+
+ descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
+ if (idx == 0)
+ descq0 = ipath_sdma_make_first_desc0(descq0);
+ if (idx == pkt->naddr - 1)
+ descq0 = ipath_sdma_make_last_desc0(descq0);
+
+ descqp[0] = descq0;
+ descqp[1] = ipath_sdma_make_desc1(addr);
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct list_head *pktlist)
+{
+ int ret = 0;
+ unsigned long flags;
+ u16 tail;
+
+ if (list_empty(pktlist))
+ return 0;
+
+ if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
+ return -ECOMM;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
+ ret = -ECOMM;
+ goto unlock;
+ }
+
+ tail = dd->ipath_sdma_descq_tail;
+ while (!list_empty(pktlist)) {
+ struct ipath_user_sdma_pkt *pkt =
+ list_entry(pktlist->next, struct ipath_user_sdma_pkt,
+ list);
+ int i;
+ unsigned ofs = 0;
+ u16 dtail = tail;
+
+ if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
+ goto unlock_check_tail;
+
+ for (i = 0; i < pkt->naddr; i++) {
+ ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
+ ofs += pkt->addr[i].length >> 2;
+
+ if (++tail == dd->ipath_sdma_descq_cnt) {
+ tail = 0;
+ ++dd->ipath_sdma_generation;
+ }
+ }
+
+ if ((ofs<<2) > dd->ipath_ibmaxlen) {
+ ipath_dbg("packet size %X > ibmax %X, fail\n",
+ ofs<<2, dd->ipath_ibmaxlen);
+ ret = -EMSGSIZE;
+ goto unlock;
+ }
+
+ /*
+ * if the packet is >= 2KB mtu equivalent, we have to use
+ * the large buffers, and have to mark each descriptor as
+ * part of a large buffer packet.
+ */
+ if (ofs >= IPATH_SMALLBUF_DWORDS) {
+ for (i = 0; i < pkt->naddr; i++) {
+ dd->ipath_sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == dd->ipath_sdma_descq_cnt)
+ dtail = 0;
+ }
+ }
+
+ dd->ipath_sdma_descq_added += pkt->naddr;
+ pkt->added = dd->ipath_sdma_descq_added;
+ list_move_tail(&pkt->list, &pq->sent);
+ ret++;
+ }
+
+unlock_check_tail:
+ /* advance the tail on the chip if necessary */
+ if (dd->ipath_sdma_descq_tail != tail) {
+ wmb();
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
+ dd->ipath_sdma_descq_tail = tail;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ return ret;
+}
+
+int ipath_user_sdma_writev(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim)
+{
+ int ret = 0;
+ struct list_head list;
+ int npkts = 0;
+
+ INIT_LIST_HEAD(&list);
+
+ mutex_lock(&pq->lock);
+
+ if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
+ ipath_user_sdma_hwqueue_clean(dd);
+ ipath_user_sdma_queue_clean(dd, pq);
+ }
+
+ while (dim) {
+ const int mxp = 8;
+
+ ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+ if (ret <= 0)
+ goto done_unlock;
+ else {
+ dim -= ret;
+ iov += ret;
+ }
+
+ /* force packets onto the sdma hw queue... */
+ if (!list_empty(&list)) {
+ /*
+ * lazily clean hw queue. the 4 is a guess of about
+ * how many sdma descriptors a packet will take (it
+ * doesn't have to be perfect).
+ */
+ if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
+ ipath_user_sdma_hwqueue_clean(dd);
+ ipath_user_sdma_queue_clean(dd, pq);
+ }
+
+ ret = ipath_user_sdma_push_pkts(dd, pq, &list);
+ if (ret < 0)
+ goto done_unlock;
+ else {
+ npkts += ret;
+ pq->counter += ret;
+
+ if (!list_empty(&list))
+ goto done_unlock;
+ }
+ }
+ }
+
+done_unlock:
+ if (!list_empty(&list))
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
+ mutex_unlock(&pq->lock);
+
+ return (ret < 0) ? ret : npkts;
+}
+
+int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq)
+{
+ int ret = 0;
+
+ mutex_lock(&pq->lock);
+ ipath_user_sdma_hwqueue_clean(dd);
+ ret = ipath_user_sdma_queue_clean(dd, pq);
+ mutex_unlock(&pq->lock);
+
+ return ret;
+}
+
+u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
+{
+ return pq->sent_counter;
+}
+
+u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
+{
+ return pq->counter;
+}
+
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
index 3854a4eae68..fc76316c4a5 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -1,6 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,42 +29,24 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include <linux/device.h>
-#ifndef _IPATH_LAYER_H
-#define _IPATH_LAYER_H
+struct ipath_user_sdma_queue;
-/*
- * This header file is for symbols shared between the infinipath driver
- * and drivers layered upon it (such as ipath).
- */
-
-struct sk_buff;
-struct ipath_devdata;
-struct ether_header;
+struct ipath_user_sdma_queue *
+ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
-int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *),
- int (*l_intr)(void *, u32),
- int (*l_rcv)(void *, void *,
- struct sk_buff *),
- u16 rcv_opcode,
- int (*l_rcv_lid)(void *, void *));
-void ipath_layer_unregister(void);
-int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
-u16 ipath_layer_get_lid(struct ipath_devdata *dd);
-int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
-u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-int ipath_layer_send_hdr(struct ipath_devdata *dd,
- struct ether_header *hdr);
-int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
+int ipath_user_sdma_writev(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim);
-/* ipath_ether interrupt values */
-#define IPATH_LAYER_INT_IF_UP 0x2
-#define IPATH_LAYER_INT_IF_DOWN 0x4
-#define IPATH_LAYER_INT_LID 0x8
-#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
-#define IPATH_LAYER_INT_BCAST 0x40
+int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq);
-extern unsigned ipath_debug; /* debugging bit mask */
+void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq);
-#endif /* _IPATH_LAYER_H */
+u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
+u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index b8381c5e72b..44ea9390417 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,7 +34,10 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_verbs.h>
#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/utsname.h>
+#include <linux/rculist.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -109,18 +112,26 @@ MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
static unsigned int ib_ipath_disable_sma;
module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
+MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; ipath_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0,
[IB_QPS_INIT] = IPATH_POST_RECV_OK,
[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
- IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
+ IPATH_PROCESS_NEXT_SEND_OK,
[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
- IPATH_POST_SEND_OK,
- [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
- [IB_QPS_ERR] = 0,
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
+ [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
};
struct ipath_ucontext {
@@ -164,9 +175,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
while (length) {
u32 len = sge->length;
- BUG_ON(len == 0);
if (len > length)
len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
memcpy(sge->vaddr, data, len);
sge->vaddr += len;
sge->length -= len;
@@ -202,9 +215,97 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
while (length) {
u32 len = sge->length;
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+}
+
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the ipath_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
+{
+ struct ipath_sge *sg_list = ss->sg_list;
+ struct ipath_sge sge = ss->sge;
+ u8 num_sge = ss->num_sge;
+ u32 ndesc = 1; /* count the header */
+
+ while (length) {
+ u32 len = sge.length;
+
+ if (len > length)
+ len = length;
+ if (len > sge.sge_length)
+ len = sge.sge_length;
BUG_ON(len == 0);
+ if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+ (len != length && (len & (sizeof(u32) - 1)))) {
+ ndesc = 0;
+ break;
+ }
+ ndesc++;
+ sge.vaddr += len;
+ sge.length -= len;
+ sge.sge_length -= len;
+ if (sge.sge_length == 0) {
+ if (--num_sge)
+ sge = *sg_list++;
+ } else if (sge.length == 0 && sge.mr != NULL) {
+ if (++sge.n >= IPATH_SEGSZ) {
+ if (++sge.m >= sge.mr->mapsz)
+ break;
+ sge.n = 0;
+ }
+ sge.vaddr =
+ sge.mr->map[sge.m]->segs[sge.n].vaddr;
+ sge.length =
+ sge.mr->map[sge.m]->segs[sge.n].length;
+ }
+ length -= len;
+ }
+ return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
+ u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
if (len > length)
len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(data, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -222,11 +323,118 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
+ data += len;
length -= len;
}
}
/**
+ * ipath_post_one_send - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+{
+ struct ipath_swqe *wqe;
+ u32 next;
+ int i;
+ int j;
+ int acc;
+ int ret;
+ unsigned long flags;
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (qp->ibqp.qp_type != IB_QPT_SMI &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ ret = -ENETDOWN;
+ goto bail;
+ }
+
+ /* Check that state is OK to post send. */
+ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
+ goto bail_inval;
+
+ /* IB spec says that num_sge == 0 is OK. */
+ if (wr->num_sge > qp->s_max_sge)
+ goto bail_inval;
+
+ /*
+ * Don't allow RDMA reads or atomic operations on UC or
+ * undefined operations.
+ * Make sure buffer is large enough to hold the result for atomics.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
+ goto bail_inval;
+ } else if (qp->ibqp.qp_type == IB_QPT_UD) {
+ /* Check UD opcode */
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)
+ goto bail_inval;
+ /* Check UD destination address PD */
+ if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ goto bail_inval;
+ } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1)))
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
+ goto bail_inval;
+
+ next = qp->s_head + 1;
+ if (next >= qp->s_size)
+ next = 0;
+ if (next == qp->s_last) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_swqe_ptr(qp, qp->s_head);
+ wqe->wr = *wr;
+ wqe->length = 0;
+ if (wr->num_sge) {
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
+ IB_ACCESS_LOCAL_WRITE : 0;
+ for (i = 0, j = 0; i < wr->num_sge; i++) {
+ u32 length = wr->sg_list[i].length;
+ int ok;
+
+ if (length == 0)
+ continue;
+ ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
+ &wr->sg_list[i], acc);
+ if (!ok)
+ goto bail_inval;
+ wqe->length += length;
+ j++;
+ }
+ wqe->wr.num_sge = j;
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_RC) {
+ if (wqe->length > 0x80000000U)
+ goto bail_inval;
+ } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
+ goto bail_inval;
+ wqe->ssn = qp->s_ssn++;
+ qp->s_head = next;
+
+ ret = 0;
+ goto bail;
+
+bail_inval:
+ ret = -EINVAL;
+bail:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
* ipath_post_send - post a send on a QP
* @ibqp: the QP to post the send on
* @wr: the list of work requests to post
@@ -240,35 +448,17 @@ static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ipath_qp *qp = to_iqp(ibqp);
int err = 0;
- /* Check that state is OK to post send. */
- if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
- *bad_wr = wr;
- err = -EINVAL;
- goto bail;
- }
-
for (; wr; wr = wr->next) {
- switch (qp->ibqp.qp_type) {
- case IB_QPT_UC:
- case IB_QPT_RC:
- err = ipath_post_ruc_send(qp, wr);
- break;
-
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- case IB_QPT_UD:
- err = ipath_post_ud_send(qp, wr);
- break;
-
- default:
- err = -EINVAL;
- }
+ err = ipath_post_one_send(qp, wr);
if (err) {
*bad_wr = wr;
- break;
+ goto bail;
}
}
+ /* Try to do the send work in the caller's context. */
+ ipath_do_send((unsigned long) qp);
+
bail:
return err;
}
@@ -303,7 +493,7 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
*bad_wr = wr;
- ret = -ENOMEM;
+ ret = -EINVAL;
goto bail;
}
@@ -323,6 +513,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
wq->head = next;
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
}
@@ -410,7 +602,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
/* Check for a valid destination LID (see ch. 7.11.1). */
lid = be16_to_cpu(hdr->lrh[1]);
if (lid < IPATH_MULTICAST_LID_BASE) {
- lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+ lid &= ~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid != dev->dd->ipath_lid)) {
dev->rcv_errors++;
goto bail;
@@ -428,7 +620,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
goto bail;
}
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
dev->opstats[opcode].n_bytes += tlen;
dev->opstats[opcode].n_packets++;
@@ -438,6 +630,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
struct ipath_mcast *mcast;
struct ipath_mcast_qp *p;
+ if (lnh != IPATH_LRH_GRH) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
if (mcast == NULL) {
dev->n_pkt_drops++;
@@ -445,8 +641,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
}
dev->n_multicast_rcv++;
list_for_each_entry_rcu(p, &mcast->qp_list, list)
- ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
- tlen, p->qp);
+ ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
/*
* Notify ipath_multicast_detach() if it is waiting for us
* to finish.
@@ -479,9 +674,10 @@ bail:;
* This is called from ipath_do_rcv_timer() at interrupt level to check for
* QPs which need retransmits and to collect performance numbers.
*/
-void ipath_ib_timer(struct ipath_ibdev *dev)
+static void ipath_ib_timer(struct ipath_ibdev *dev)
{
struct ipath_qp *resend = NULL;
+ struct ipath_qp *rnr = NULL;
struct list_head *last;
struct ipath_qp *qp;
unsigned long flags;
@@ -508,7 +704,9 @@ void ipath_ib_timer(struct ipath_ibdev *dev)
if (--qp->s_rnr_timeout == 0) {
do {
list_del_init(&qp->timerwait);
- tasklet_hi_schedule(&qp->s_task);
+ qp->timer_next = rnr;
+ rnr = qp;
+ atomic_inc(&qp->refcount);
if (list_empty(last))
break;
qp = list_entry(last->next, struct ipath_qp,
@@ -548,13 +746,15 @@ void ipath_ib_timer(struct ipath_ibdev *dev)
spin_unlock_irqrestore(&dev->pending_lock, flags);
/* XXX What if timer fires again while this is running? */
- for (qp = resend; qp != NULL; qp = qp->timer_next) {
- struct ib_wc wc;
+ while (resend != NULL) {
+ qp = resend;
+ resend = qp->timer_next;
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
+ if (qp->s_last != qp->s_tail &&
+ ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
dev->n_timeouts++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -562,6 +762,19 @@ void ipath_ib_timer(struct ipath_ibdev *dev)
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
+ while (rnr != NULL) {
+ qp = rnr;
+ rnr = qp->timer_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
}
static void update_sge(struct ipath_sge_state *ss, u32 length)
@@ -622,7 +835,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
#endif
static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
- u32 length)
+ u32 length, unsigned flush_wc)
{
u32 extra = 0;
u32 data = 0;
@@ -632,11 +845,11 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
u32 len = ss->sge.length;
u32 off;
- BUG_ON(len == 0);
if (len > length)
len = length;
if (len > ss->sge.sge_length)
len = ss->sge.sge_length;
+ BUG_ON(len == 0);
/* If the source address is not aligned, try to align it. */
off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
if (off) {
@@ -748,94 +961,394 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
}
/* Update address before sending packet. */
update_sge(ss, length);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(last, piobuf);
- /* be sure trigger word is written */
- ipath_flush_wc();
+ if (flush_wc) {
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ } else
+ __raw_writel(last, piobuf);
}
-/**
- * ipath_verbs_send - send a packet
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
+/*
+ * Convert IB rate to delay multiplier.
*/
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
- u32 *hdr, u32 len, struct ipath_sge_state *ss)
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
{
- u32 __iomem *piobuf;
- u32 plen;
+ switch (rate) {
+ case IB_RATE_2_5_GBPS: return 8;
+ case IB_RATE_5_GBPS: return 4;
+ case IB_RATE_10_GBPS: return 2;
+ case IB_RATE_20_GBPS: return 1;
+ default: return 0;
+ }
+}
+
+/*
+ * Convert delay multiplier to IB rate
+ */
+static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
+{
+ switch (mult) {
+ case 8: return IB_RATE_2_5_GBPS;
+ case 4: return IB_RATE_5_GBPS;
+ case 2: return IB_RATE_10_GBPS;
+ case 1: return IB_RATE_20_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
+ }
+}
+
+static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
+{
+ struct ipath_verbs_txreq *tx = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+
+ list_del(l);
+ tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+ return tx;
+}
+
+static inline void put_txreq(struct ipath_ibdev *dev,
+ struct ipath_verbs_txreq *tx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ list_add(&tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+static void sdma_complete(void *cookie, int status)
+{
+ struct ipath_verbs_txreq *tx = cookie;
+ struct ipath_qp *qp = tx->qp;
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ unsigned long flags;
+ enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
+
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (tx->wqe)
+ ipath_send_complete(qp, tx->wqe, ibs);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ } else if (tx->wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, tx->wqe, ibs);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+ kfree(tx->txreq.map_addr);
+ put_txreq(dev, tx);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+static void decrement_dma_busy(struct ipath_qp *qp)
+{
+ unsigned long flags;
+
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ }
+}
+
+/*
+ * Compute the number of clock cycles of delay before sending the next packet.
+ * The multipliers reflect the number of clocks for the fastest rate so
+ * one tick at 4xDDR is 8 ticks at 1xSDR.
+ * If the destination port will take longer to receive a packet than
+ * the outgoing link can send it, we need to delay sending the next packet
+ * by the difference in time it takes the receiver to receive and the sender
+ * to send this packet.
+ * Note that this delay is always correct for UC and RC but not always
+ * optimal for UD. For UD, the destination HCA can be different for each
+ * packet, in which case, we could send packets to a different destination
+ * while "waiting" for the delay. The overhead for doing this without
+ * HW support is more than just paying the cost of delaying some packets
+ * unnecessarily.
+ */
+static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
+{
+ return (rcv_mult > snd_mult) ?
+ (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
+}
+
+static int ipath_verbs_send_dma(struct ipath_qp *qp,
+ struct ipath_ib_header *hdr, u32 hdrwords,
+ struct ipath_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_devdata *dd = dev->dd;
+ struct ipath_verbs_txreq *tx;
+ u32 *piobuf;
+ u32 control;
+ u32 ndesc;
int ret;
- /* +1 is for the qword padding of pbc */
- plen = hdrwords + ((len + 3) >> 2) + 1;
- if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
- ipath_dbg("packet len 0x%x too long, failing\n", plen);
- ret = -EINVAL;
+ tx = qp->s_tx;
+ if (tx) {
+ qp->s_tx = NULL;
+ /* resend previously constructed packet */
+ atomic_inc(&qp->s_dma_busy);
+ ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
+ if (ret) {
+ qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
+ goto bail;
+ }
+
+ tx = get_txreq(dev);
+ if (!tx) {
+ ret = -EBUSY;
goto bail;
}
- /* Get a PIO buffer to use. */
- piobuf = ipath_getpiobuf(dd, NULL);
+ /*
+ * Get the saved delay count we computed for the previous packet
+ * and save the delay count for this packet to be used next time
+ * we get here.
+ */
+ control = qp->s_pkt_delay;
+ qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+ tx->qp = qp;
+ atomic_inc(&qp->refcount);
+ tx->wqe = qp->s_wqe;
+ tx->txreq.callback = sdma_complete;
+ tx->txreq.callback_cookie = tx;
+ tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
+ IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
+ if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+ tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
+
+ /* VL15 packets bypass credit check */
+ if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
+ control |= 1ULL << 31;
+ tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
+ }
+
+ if (len) {
+ /*
+ * Don't try to DMA if it takes more descriptors than
+ * the queue holds.
+ */
+ ndesc = ipath_count_sge(ss, len);
+ if (ndesc >= dd->ipath_sdma_descq_cnt)
+ ndesc = 0;
+ } else
+ ndesc = 1;
+ if (ndesc) {
+ tx->hdr.pbc[0] = cpu_to_le32(plen);
+ tx->hdr.pbc[1] = cpu_to_le32(control);
+ memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
+ tx->txreq.sg_count = ndesc;
+ tx->map_len = (hdrwords + 2) << 2;
+ tx->txreq.map_addr = &tx->hdr;
+ atomic_inc(&qp->s_dma_busy);
+ ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
+ if (ret) {
+ /* save ss and length in dwords */
+ tx->ss = ss;
+ tx->len = dwords;
+ qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
+ goto bail;
+ }
+
+ /* Allocate a buffer and copy the header and payload to it. */
+ tx->map_len = (plen + 1) << 2;
+ piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
+ if (unlikely(piobuf == NULL)) {
+ ret = -EBUSY;
+ goto err_tx;
+ }
+ tx->txreq.map_addr = piobuf;
+ tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
+ tx->txreq.sg_count = 1;
+
+ *piobuf++ = (__force u32) cpu_to_le32(plen);
+ *piobuf++ = (__force u32) cpu_to_le32(control);
+ memcpy(piobuf, hdr, hdrwords << 2);
+ ipath_copy_from_sge(piobuf + hdrwords, ss, len);
+
+ atomic_inc(&qp->s_dma_busy);
+ ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
+ /*
+ * If we couldn't queue the DMA request, save the info
+ * and try again later rather than destroying the
+ * buffer and undoing the side effects of the copy.
+ */
+ if (ret) {
+ tx->ss = NULL;
+ tx->len = 0;
+ qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
+ dev->n_unaligned++;
+ goto bail;
+
+err_tx:
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ put_txreq(dev, tx);
+bail:
+ return ret;
+}
+
+static int ipath_verbs_send_pio(struct ipath_qp *qp,
+ struct ipath_ib_header *ibhdr, u32 hdrwords,
+ struct ipath_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+ u32 *hdr = (u32 *) ibhdr;
+ u32 __iomem *piobuf;
+ unsigned flush_wc;
+ u32 control;
+ int ret;
+ unsigned long flags;
+
+ piobuf = ipath_getpiobuf(dd, plen, NULL);
if (unlikely(piobuf == NULL)) {
ret = -EBUSY;
goto bail;
}
/*
- * Write len to control qword, no flags.
+ * Get the saved delay count we computed for the previous packet
+ * and save the delay count for this packet to be used next time
+ * we get here.
+ */
+ control = qp->s_pkt_delay;
+ qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+ /* VL15 packets bypass credit check */
+ if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
+ control |= 1ULL << 31;
+
+ /*
+ * Write the length to the control qword plus any needed flags.
* We have to flush after the PBC for correctness on some cpus
* or WC buffer can be written out of order.
*/
- writeq(plen, piobuf);
- ipath_flush_wc();
+ writeq(((u64) control << 32) | plen, piobuf);
piobuf += 2;
+
+ flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
if (len == 0) {
/*
* If there is just the header portion, must flush before
* writing last word of header for correctness, and after
* the last header word (trigger word).
*/
- __iowrite32_copy(piobuf, hdr, hdrwords - 1);
- ipath_flush_wc();
- __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
- ipath_flush_wc();
- ret = 0;
- goto bail;
+ if (flush_wc) {
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ ipath_flush_wc();
+ } else
+ __iowrite32_copy(piobuf, hdr, hdrwords);
+ goto done;
}
+ if (flush_wc)
+ ipath_flush_wc();
__iowrite32_copy(piobuf, hdr, hdrwords);
piobuf += hdrwords;
/* The common case is aligned and contained in one segment. */
if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
!((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
- u32 w;
u32 *addr = (u32 *) ss->sge.vaddr;
/* Update address before sending packet. */
update_sge(ss, len);
- /* Need to round up for the last dword in the packet. */
- w = (len + 3) >> 2;
- __iowrite32_copy(piobuf, addr, w - 1);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(addr[w - 1], piobuf + w - 1);
- /* be sure trigger word is written */
- ipath_flush_wc();
- ret = 0;
- goto bail;
+ if (flush_wc) {
+ __iowrite32_copy(piobuf, addr, dwords - 1);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ } else
+ __iowrite32_copy(piobuf, addr, dwords);
+ goto done;
+ }
+ copy_io(piobuf, ss, len, flush_wc);
+done:
+ if (qp->s_wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
- copy_io(piobuf, ss, len);
ret = 0;
-
bail:
return ret;
}
+/**
+ * ipath_verbs_send - send a packet
+ * @qp: the QP to send on
+ * @hdr: the packet header
+ * @hdrwords: the number of 32-bit words in the header
+ * @ss: the SGE to send
+ * @len: the length of the packet in bytes
+ */
+int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
+ u32 hdrwords, struct ipath_sge_state *ss, u32 len)
+{
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+ u32 plen;
+ int ret;
+ u32 dwords = (len + 3) >> 2;
+
+ /*
+ * Calculate the send buffer trigger address.
+ * The +1 counts for the pbc control dword following the pbc length.
+ */
+ plen = hdrwords + dwords + 1;
+
+ /*
+ * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+ * can defer SDMA restart until link goes ACTIVE without
+ * worrying about just how we got there.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+ ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+ else
+ ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+
+ return ret;
+}
+
int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
u64 *rwords, u64 *spkts, u64 *rpkts,
u64 *xmit_wait)
@@ -844,7 +1357,6 @@ int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
if (!(dd->ipath_flags & IPATH_INITTED)) {
/* no hardware, freeze, etc. */
- ipath_dbg("unit %u not usable\n", dd->ipath_unit);
ret = -EINVAL;
goto bail;
}
@@ -870,49 +1382,61 @@ bail:
int ipath_get_counters(struct ipath_devdata *dd,
struct ipath_verbs_counters *cntrs)
{
+ struct ipath_cregs const *crp = dd->ipath_cregs;
int ret;
if (!(dd->ipath_flags & IPATH_INITTED)) {
/* no hardware, freeze, etc. */
- ipath_dbg("unit %u not usable\n", dd->ipath_unit);
ret = -EINVAL;
goto bail;
}
cntrs->symbol_error_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+ ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
cntrs->link_error_recovery_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
/*
* The link downed counter counts when the other side downs the
* connection. We add in the number of times we downed the link
* due to local link integrity errors to compensate.
*/
cntrs->link_downed_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+ ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
cntrs->port_rcv_errors =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+ ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
+ ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
+ ipath_snap_cntr(dd, crp->cr_portovflcnt) +
+ ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
+ ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
+ ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
+ ipath_snap_cntr(dd, crp->cr_erricrccnt) +
+ ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
+ ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
+ ipath_snap_cntr(dd, crp->cr_badformatcnt) +
+ dd->ipath_rxfc_unsupvl_errs;
+ if (crp->cr_rxotherlocalphyerrcnt)
+ cntrs->port_rcv_errors +=
+ ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
+ if (crp->cr_rxvlerrcnt)
+ cntrs->port_rcv_errors +=
+ ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
cntrs->port_rcv_remphys_errors =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
- cntrs->port_xmit_discards =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
- cntrs->port_xmit_data =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- cntrs->port_rcv_data =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
- cntrs->port_xmit_packets =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
- cntrs->port_rcv_packets =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
- cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
- cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+ ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
+ cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
+ cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
+ cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
+ cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
+ cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
+ cntrs->local_link_integrity_errors =
+ crp->cr_locallinkintegrityerrcnt ?
+ ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
+ ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors);
+ cntrs->excessive_buffer_overrun_errors =
+ crp->cr_excessbufferovflcnt ?
+ ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
+ dd->ipath_overrun_thresh_errs;
+ cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
+ ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
ret = 0;
@@ -927,26 +1451,46 @@ bail:
* This is called from ipath_intr() at interrupt level when a PIO buffer is
* available after ipath_verbs_send() returned an error that no buffers were
* available. Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
+ * QPs waiting for buffers (for now, just restart the send tasklet and
* return zero).
*/
int ipath_ib_piobufavail(struct ipath_ibdev *dev)
{
+ struct list_head *list;
+ struct ipath_qp *qplist;
struct ipath_qp *qp;
unsigned long flags;
if (dev == NULL)
goto bail;
+ list = &dev->piowait;
+ qplist = NULL;
+
spin_lock_irqsave(&dev->pending_lock, flags);
- while (!list_empty(&dev->piowait)) {
- qp = list_entry(dev->piowait.next, struct ipath_qp,
- piowait);
+ while (!list_empty(list)) {
+ qp = list_entry(list->next, struct ipath_qp, piowait);
list_del_init(&qp->piowait);
- tasklet_hi_schedule(&qp->s_task);
+ qp->pio_next = qplist;
+ qplist = qp;
+ atomic_inc(&qp->refcount);
}
spin_unlock_irqrestore(&dev->pending_lock, flags);
+ while (qplist != NULL) {
+ qp = qplist;
+ qplist = qp->pio_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+
bail:
return 0;
}
@@ -960,9 +1504,11 @@ static int ipath_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
- IB_DEVICE_SYS_IMAGE_GUID;
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
props->page_size_cap = PAGE_SIZE;
- props->vendor_id = dev->dd->ipath_vendorid;
+ props->vendor_id =
+ IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
props->vendor_part_id = dev->dd->ipath_deviceid;
props->hw_ver = dev->dd->ipath_pcirev;
@@ -976,15 +1522,17 @@ static int ipath_query_device(struct ib_device *ibdev,
props->max_ah = ib_ipath_max_ahs;
props->max_cqe = ib_ipath_max_cqes;
props->max_mr = dev->lk_table.max;
+ props->max_fmr = dev->lk_table.max;
+ props->max_map_per_fmr = 32767;
props->max_pd = ib_ipath_max_pds;
- props->max_qp_rd_atom = 1;
- props->max_qp_init_rd_atom = 1;
+ props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+ props->max_qp_init_rd_atom = 255;
/* props->max_res_rd_atom */
props->max_srq = ib_ipath_max_srqs;
props->max_srq_wr = ib_ipath_max_srq_wrs;
props->max_srq_sge = ib_ipath_max_srq_sges;
/* props->local_ca_ack_delay */
- props->atomic_cap = IB_ATOMIC_HCA;
+ props->atomic_cap = IB_ATOMIC_GLOB;
props->max_pkeys = ipath_get_npkeys(dev->dd);
props->max_mcast_grp = ib_ipath_max_mcast_grps;
props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -994,20 +1542,34 @@ static int ipath_query_device(struct ib_device *ibdev,
return 0;
}
-const u8 ipath_cvt_physportstate[16] = {
- [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3,
- [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5,
- [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2,
- [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2,
- [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1,
- [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1,
- [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4,
- [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4,
- [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4,
- [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4,
- [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6,
- [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6,
- [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
+const u8 ipath_cvt_physportstate[32] = {
+ [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
+ [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
+ [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
+ [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
+ [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
+ [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
+ [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
};
u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
@@ -1019,35 +1581,39 @@ static int ipath_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_devdata *dd = dev->dd;
enum ib_mtu mtu;
- u16 lid = dev->dd->ipath_lid;
+ u16 lid = dd->ipath_lid;
u64 ibcstat;
memset(props, 0, sizeof(*props));
- props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
- props->lmc = dev->mkeyprot_resv_lmc & 7;
+ props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
+ props->lmc = dd->ipath_lmc;
props->sm_lid = dev->sm_lid;
props->sm_sl = dev->sm_sl;
- ibcstat = dev->dd->ipath_lastibcstat;
- props->state = ((ibcstat >> 4) & 0x3) + 1;
+ ibcstat = dd->ipath_lastibcstat;
+ /* map LinkState to IB portinfo values. */
+ props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
+
/* See phys_state_show() */
- props->phys_state = ipath_cvt_physportstate[
- dev->dd->ipath_lastibcstat & 0xf];
+ props->phys_state = /* MEA: assumes shift == 0 */
+ ipath_cvt_physportstate[dd->ipath_lastibcstat &
+ dd->ibcs_lts_mask];
props->port_cap_flags = dev->port_cap_flags;
props->gid_tbl_len = 1;
props->max_msg_sz = 0x80000000;
- props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
- props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
+ props->pkey_tbl_len = ipath_get_npkeys(dd);
+ props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
dev->z_pkey_violations;
props->qkey_viol_cntr = dev->qkey_violations;
- props->active_width = IB_WIDTH_4X;
+ props->active_width = dd->ipath_link_width_active;
/* See rate_show() */
- props->active_speed = 1; /* Regular 10Mbs speed. */
+ props->active_speed = dd->ipath_link_speed_active;
props->max_vl_num = 1; /* VLCap = VL0 */
props->init_type_reply = 0;
- props->max_mtu = IB_MTU_4096;
- switch (dev->dd->ipath_ibmtu) {
+ props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
+ switch (dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
break;
@@ -1199,6 +1765,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
struct ipath_ah *ah;
struct ib_ah *ret;
struct ipath_ibdev *dev = to_idev(pd->device);
+ unsigned long flags;
/* A multicast address requires a GRH (see ch. 8.4.1). */
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
@@ -1225,19 +1792,20 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
goto bail;
}
- spin_lock(&dev->n_ahs_lock);
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
dev->n_ahs_allocated++;
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
/* ib_create_ah() will initialize ah->ibah. */
ah->attr = *ah_attr;
+ ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
ret = &ah->ibah;
@@ -1255,10 +1823,11 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
{
struct ipath_ibdev *dev = to_idev(ibah->device);
struct ipath_ah *ah = to_iah(ibah);
+ unsigned long flags;
- spin_lock(&dev->n_ahs_lock);
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
dev->n_ahs_allocated--;
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
@@ -1270,6 +1839,7 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
struct ipath_ah *ah = to_iah(ibah);
*ah_attr = ah->attr;
+ ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
return 0;
}
@@ -1284,7 +1854,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd)
}
/**
- * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * ipath_get_pkey - return the indexed PKEY from the port PKEY table
* @dd: the infinipath device
* @index: the PKEY index
*/
@@ -1292,6 +1862,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
{
unsigned ret;
+ /* always a kernel port, no locking needed */
if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
ret = 0;
else
@@ -1354,13 +1925,6 @@ static void __verbs_timer(unsigned long arg)
{
struct ipath_devdata *dd = (struct ipath_devdata *) arg;
- /*
- * If port 0 receive packet interrupts are not available, or
- * can be missed, poll the receive queue
- */
- if (dd->ipath_flags & IPATH_POLL_RX_INTR)
- ipath_kreceive(dd);
-
/* Handle verbs layer timeouts. */
ipath_ib_timer(dd->verbs_dev);
@@ -1383,8 +1947,9 @@ static int enable_timer(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
0x2074076542310ULL);
/* Enable GPIO bit 2 interrupt */
+ dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
- (u64) (1 << 2));
+ dd->ipath_gpio_mask);
}
init_timer(&dd->verbs_timer);
@@ -1399,8 +1964,16 @@ static int enable_timer(struct ipath_devdata *dd)
static int disable_timer(struct ipath_devdata *dd)
{
/* Disable GPIO bit 2 interrupt */
- if (dd->ipath_flags & IPATH_GPIO_INTR)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ /* Disable GPIO bit 2 interrupt */
+ dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ /*
+ * We might want to undo changes to debugportselect,
+ * but how?
+ */
+ }
del_timer_sync(&dd->verbs_timer);
@@ -1417,6 +1990,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
struct ipath_verbs_counters cntrs;
struct ipath_ibdev *idev;
struct ib_device *dev;
+ struct ipath_verbs_txreq *tx;
+ unsigned i;
int ret;
idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
@@ -1427,6 +2002,17 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev = &idev->ibdev;
+ if (dd->ipath_sdma_descq_cnt) {
+ tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
+ GFP_KERNEL);
+ if (tx == NULL) {
+ ret = -ENOMEM;
+ goto err_tx;
+ }
+ } else
+ tx = NULL;
+ idev->txreq_bufs = tx;
+
/* Only need to initialize non-zero fields. */
spin_lock_init(&idev->n_pds_lock);
spin_lock_init(&idev->n_ahs_lock);
@@ -1458,21 +2044,26 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
ret = -ENOMEM;
goto err_lk;
}
+ INIT_LIST_HEAD(&idev->pending_mmaps);
spin_lock_init(&idev->pending_lock);
+ idev->mmap_offset = PAGE_SIZE;
+ spin_lock_init(&idev->mmap_offset_lock);
INIT_LIST_HEAD(&idev->pending[0]);
INIT_LIST_HEAD(&idev->pending[1]);
INIT_LIST_HEAD(&idev->pending[2]);
INIT_LIST_HEAD(&idev->piowait);
INIT_LIST_HEAD(&idev->rnrwait);
+ INIT_LIST_HEAD(&idev->txreq_free);
idev->pending_index = 0;
idev->port_cap_flags =
IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
+ if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
+ idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
- idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
- idev->link_width_enabled = 3; /* 1x or 4x */
+ idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
/* Snapshot current HW counters to "clear" them. */
ipath_get_counters(dd, &cntrs);
@@ -1492,6 +2083,10 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
cntrs.local_link_integrity_errors;
idev->z_excessive_buffer_overrun_errors =
cntrs.excessive_buffer_overrun_errors;
+ idev->z_vl15_dropped = cntrs.vl15_dropped;
+
+ for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
+ list_add(&tx->txreq.list, &idev->txreq_free);
/*
* The system image GUID is supposed to be the same for all
@@ -1540,8 +2135,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
+ dev->num_comp_vectors = 1;
dev->dma_device = &dd->pcidev->dev;
- dev->class_dev.dev = dev->dma_device;
dev->query_device = ipath_query_device;
dev->modify_device = ipath_modify_device;
dev->query_port = ipath_query_port;
@@ -1583,15 +2178,17 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev->detach_mcast = ipath_multicast_detach;
dev->process_mad = ipath_process_mad;
dev->mmap = ipath_mmap;
+ dev->dma_ops = &ipath_dma_mapping_ops;
snprintf(dev->node_desc, sizeof(dev->node_desc),
- IPATH_IDSTR " %s", system_utsname.nodename);
+ IPATH_IDSTR " %s", init_utsname()->nodename);
- ret = ib_register_device(dev);
+ ret = ib_register_device(dev, NULL);
if (ret)
goto err_reg;
- if (ipath_verbs_register_sysfs(dev))
+ ret = ipath_verbs_register_sysfs(dev);
+ if (ret)
goto err_class;
enable_timer(dd);
@@ -1605,6 +2202,8 @@ err_reg:
err_lk:
kfree(idev->qp_table.table);
err_qp:
+ kfree(idev->txreq_bufs);
+err_tx:
ib_dealloc_device(dev);
ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
idev = NULL;
@@ -1617,11 +2216,12 @@ bail:
void ipath_unregister_ib_device(struct ipath_ibdev *dev)
{
struct ib_device *ibdev = &dev->ibdev;
-
- disable_timer(dev->dd);
+ u32 qps_inuse;
ib_unregister_device(ibdev);
+ disable_timer(dev->dd);
+
if (!list_empty(&dev->pending[0]) ||
!list_empty(&dev->pending[1]) ||
!list_empty(&dev->pending[2]))
@@ -1636,24 +2236,30 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
* Note that ipath_unregister_ib_device() can be called before all
* the QPs are destroyed!
*/
- ipath_free_all_qps(&dev->qp_table);
+ qps_inuse = ipath_free_all_qps(&dev->qp_table);
+ if (qps_inuse)
+ ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
+ qps_inuse);
kfree(dev->qp_table.table);
kfree(dev->lk_table.table);
+ kfree(dev->txreq_bufs);
ib_dealloc_device(ibdev);
}
-static ssize_t show_rev(struct class_device *cdev, char *buf)
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct ipath_ibdev *dev =
- container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
+ container_of(device, struct ipath_ibdev, ibdev.dev);
return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
}
-static ssize_t show_hca(struct class_device *cdev, char *buf)
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct ipath_ibdev *dev =
- container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
+ container_of(device, struct ipath_ibdev, ibdev.dev);
int ret;
ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
@@ -1666,10 +2272,11 @@ bail:
return ret;
}
-static ssize_t show_stats(struct class_device *cdev, char *buf)
+static ssize_t show_stats(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct ipath_ibdev *dev =
- container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
+ container_of(device, struct ipath_ibdev, ibdev.dev);
int i;
int len;
@@ -1684,14 +2291,14 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
"RC timeouts %d\n"
"RC RDMA dup %d\n"
"piobuf wait %d\n"
- "no piobuf %d\n"
+ "unaligned %d\n"
"PKT drops %d\n"
"WQE errs %d\n",
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
- dev->n_rdma_dup_busy, dev->n_piowait,
- dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
+ dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
+ dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
@@ -1704,16 +2311,16 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
return len;
}
-static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
-static struct class_device_attribute *ipath_class_attributes[] = {
- &class_device_attr_hw_rev,
- &class_device_attr_hca_type,
- &class_device_attr_board_id,
- &class_device_attr_stats
+static struct device_attribute *ipath_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+ &dev_attr_stats
};
static int ipath_verbs_register_sysfs(struct ib_device *dev)
@@ -1721,15 +2328,15 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
int i;
int ret;
- for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
- if (class_device_create_file(&dev->class_dev,
- ipath_class_attributes[i])) {
- ret = 1;
+ for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
+ ret = device_create_file(&dev->dev,
+ ipath_class_attributes[i]);
+ if (ret)
goto bail;
- }
-
- ret = 0;
-
+ }
+ return 0;
bail:
+ for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
+ device_remove_file(&dev->dev, ipath_class_attributes[i]);
return ret;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 09bbb3f9a21..ae6cff4abff 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -40,8 +40,11 @@
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
-#include "ipath_layer.h"
+#include "ipath_kernel.h"
+
+#define IPATH_MAX_RDMA_ATOMIC 4
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -58,6 +61,7 @@
*/
#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
+/* AETH NAK opcode values */
#define IB_RNR_NAK 0x20
#define IB_NAK_PSN_ERROR 0x60
#define IB_NAK_INVALID_REQUEST 0x61
@@ -65,10 +69,16 @@
#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
#define IB_NAK_INVALID_RD_REQUEST 0x64
+/* Flags for checking QP state (see ib_ipath_state_ops[]) */
#define IPATH_POST_SEND_OK 0x01
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08
+#define IPATH_PROCESS_NEXT_SEND_OK 0x10
+#define IPATH_FLUSH_SEND 0x20
+#define IPATH_FLUSH_RECV 0x40
+#define IPATH_PROCESS_OR_FLUSH_SEND \
+ (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
@@ -76,11 +86,11 @@
#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001)
-#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003)
-#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005)
+#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
+#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
struct ib_reth {
__be64 vaddr;
@@ -89,7 +99,7 @@ struct ib_reth {
} __attribute__ ((packed));
struct ib_atomic_eth {
- __be64 vaddr;
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
@@ -108,7 +118,7 @@ struct ipath_other_headers {
} rc;
struct {
__be32 aeth;
- __be64 atomic_ack_eth;
+ __be32 atomic_ack_eth[2];
} at;
__be32 imm_data;
__be32 aeth;
@@ -133,6 +143,11 @@ struct ipath_ib_header {
} u;
} __attribute__ ((packed));
+struct ipath_pio_header {
+ __le32 pbc[2];
+ struct ipath_ib_header hdr;
+} __attribute__ ((packed));
+
/*
* There is one struct ipath_mcast for each multicast GID.
* All attached QPs are then stored as a list of
@@ -170,12 +185,12 @@ struct ipath_ah {
* this as its vm_private_data.
*/
struct ipath_mmap_info {
- struct ipath_mmap_info *next;
+ struct list_head pending_mmaps;
struct ib_ucontext *context;
void *obj;
+ __u64 offset;
struct kref ref;
unsigned size;
- unsigned mmap_cnt;
};
/*
@@ -186,7 +201,11 @@ struct ipath_mmap_info {
struct ipath_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
- struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+ union {
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
+ struct ib_wc kqueue[0];
+ };
};
/*
@@ -220,6 +239,7 @@ struct ipath_segarray {
};
struct ipath_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
u64 user_base; /* User's address for this region */
u64 iova; /* IB start address of this region */
size_t length;
@@ -237,7 +257,7 @@ struct ipath_mregion {
*/
struct ipath_sge {
struct ipath_mregion *mr;
- void *vaddr; /* current pointer into the segment */
+ void *vaddr; /* kernel virtual address of segment */
u32 sge_length; /* length of the SGE */
u32 length; /* remaining length of the segment */
u16 m; /* current index: mr->map[m] */
@@ -247,6 +267,7 @@ struct ipath_sge {
/* Memory region */
struct ipath_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */
};
@@ -308,6 +329,21 @@ struct ipath_sge_state {
struct ipath_sge *sg_list; /* next SGE to be used if any */
struct ipath_sge sge; /* progress state for the current SGE */
u8 num_sge;
+ u8 static_rate;
+};
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+ u8 opcode;
+ u8 sent;
+ u32 psn;
+ union {
+ struct ipath_sge_state rdma_sge;
+ u64 atomic_data;
+ };
};
/*
@@ -322,34 +358,39 @@ struct ipath_qp {
struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */
struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
+ struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount;
wait_queue_head_t wait;
+ wait_queue_head_t wait_dma;
struct tasklet_struct s_task;
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
+ struct ipath_verbs_txreq *s_tx;
struct ipath_sge_state s_sge; /* current send request data */
- /* current RDMA read send data */
- struct ipath_sge_state s_rdma_sge;
+ struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+ struct ipath_sge_state s_ack_rdma_sge;
+ struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
- unsigned long s_flags;
- u32 s_hdrwords; /* size of s_hdr in 32 bit words */
+ atomic_t s_dma_busy;
+ u16 s_pkt_delay;
+ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
- u32 s_rdma_len; /* total length of s_rdma_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
- u32 s_ack_psn; /* PSN for RDMA_READ */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
- u64 r_atomic_data; /* data for last atomic op */
- u32 r_atomic_psn; /* PSN of last atomic op */
+ unsigned long r_aflags;
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
@@ -359,11 +400,11 @@ struct ipath_qp {
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
- u8 r_ack_state; /* opcode of packet to ACK */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 r_reuse_sge; /* for UC receive errors */
- u8 r_sge_inx; /* current index into sg_list */
+ u8 r_flags;
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
@@ -371,6 +412,12 @@ struct ipath_qp {
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_pkey_index; /* PKEY index to use */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_flags;
+ u8 s_dmult;
+ u8 s_draining;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
@@ -383,15 +430,48 @@ struct ipath_qp {
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
+ struct ipath_swqe *s_wqe;
+ struct ipath_sge *r_ud_sg_list;
struct ipath_rq r_rq; /* receive work queue */
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define IPATH_R_WRID_VALID 0
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define IPATH_R_REUSE_SGE 0x01
+#define IPATH_R_RDMAR_SEQ 0x02
+
+/*
* Bit definitions for s_flags.
+ *
+ * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
+ * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA.
*/
-#define IPATH_S_BUSY 0
-#define IPATH_S_SIGNAL_REQ_WR 1
+#define IPATH_S_SIGNAL_REQ_WR 0x01
+#define IPATH_S_FENCE_PENDING 0x02
+#define IPATH_S_RDMAR_PENDING 0x04
+#define IPATH_S_ACK_PENDING 0x08
+#define IPATH_S_BUSY 0x10
+#define IPATH_S_WAITING 0x20
+#define IPATH_S_WAIT_SSN_CREDIT 0x40
+#define IPATH_S_WAIT_DMA 0x80
+
+#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
+ IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
+
+#define IPATH_PSN_CREDIT 512
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
@@ -454,13 +534,14 @@ struct ipath_opcode_stats {
struct ipath_ibdev {
struct ib_device ibdev;
- struct list_head dev_list;
struct ipath_devdata *dd;
- struct ipath_mmap_info *pending_mmaps;
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock;
+ u32 mmap_offset;
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
- u8 mkeyprot_resv_lmc;
+ u8 mkeyprot;
/* non-zero when timer is set */
unsigned long mkey_lease_timeout;
@@ -469,6 +550,8 @@ struct ipath_ibdev {
struct ipath_lkey_table lk_table;
struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */
struct list_head piowait; /* list for wait PIO buf */
+ struct list_head txreq_free;
+ void *txreq_bufs;
/* list of QPs waiting for RNR timer */
struct list_head rnrwait;
spinlock_t pending_lock;
@@ -513,6 +596,7 @@ struct ipath_ibdev {
u32 z_pkey_violations; /* starting count for PMA */
u32 z_local_link_integrity_errors; /* starting count for PMA */
u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
+ u32 z_vl15_dropped; /* starting count for PMA */
u32 n_rc_resends;
u32 n_rc_acks;
u32 n_rc_qacks;
@@ -526,7 +610,7 @@ struct ipath_ibdev {
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
- u32 n_no_piobuf;
+ u32 n_unaligned;
u32 port_cap_flags;
u32 pma_sample_start;
u32 pma_sample_interval;
@@ -538,7 +622,6 @@ struct ipath_ibdev {
u16 pending_index; /* which pending queue is active */
u8 pma_sample_status;
u8 subnet_timeout;
- u8 link_width_enabled;
u8 vl_high_limit;
struct ipath_opcode_stats opstats[128];
};
@@ -556,6 +639,17 @@ struct ipath_verbs_counters {
u64 port_rcv_packets;
u32 local_link_integrity_errors;
u32 excessive_buffer_overrun_errors;
+ u32 vl15_dropped;
+};
+
+struct ipath_verbs_txreq {
+ struct ipath_qp *qp;
+ struct ipath_swqe *wqe;
+ u32 map_len;
+ u32 len;
+ struct ipath_sge_state *ss;
+ struct ipath_pio_header hdr;
+ struct ipath_sdma_txreq txreq;
};
static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -593,6 +687,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
return container_of(ibdev, struct ipath_ibdev, ibdev);
}
+/*
+ * This must be called with s_lock held.
+ */
+static inline void ipath_schedule_send(struct ipath_qp *qp)
+{
+ if (qp->s_flags & IPATH_S_ANY_WAIT)
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ if (!(qp->s_flags & IPATH_S_BUSY))
+ tasklet_hi_schedule(&qp->s_task);
+}
+
int ipath_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
@@ -634,44 +739,38 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
int ipath_destroy_qp(struct ib_qp *ibqp);
+int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
-void ipath_free_all_qps(struct ipath_qp_table *qpt);
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
-
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
- u32 *hdr, u32 len, struct ipath_sge_state *ss);
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
- u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
- struct ib_sge *sge, int acc);
+int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
+ u32 hdrwords, struct ipath_sge_state *ss, u32 len);
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc);
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
@@ -683,10 +782,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -708,13 +807,13 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
+struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata);
int ipath_destroy_cq(struct ib_cq *ibcq);
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
@@ -724,8 +823,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start);
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int mr_access_flags,
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr);
@@ -742,27 +841,41 @@ int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
void ipath_release_mmap_info(struct kref *ref);
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+void ipath_update_mmap_info(struct ipath_ibdev *dev,
+ struct ipath_mmap_info *ip,
+ u32 size, void *obj);
-void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void ipath_insert_rnr_queue(struct ipath_qp *qp);
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+ u32 *lengthp, struct ipath_sge_state *ss);
+
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void ipath_do_ruc_send(unsigned long data);
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 bth0, u32 bth2);
+
+void ipath_do_send(unsigned long data);
-u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
- u32 pmtu);
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+ enum ib_wc_status status);
-int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
- u32 pmtu, u32 *bth0p, u32 *bth2p);
+int ipath_make_rc_req(struct ipath_qp *qp);
-int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
- u32 pmtu, u32 *bth0p, u32 *bth2p);
+int ipath_make_uc_req(struct ipath_qp *qp);
+
+int ipath_make_ud_req(struct ipath_qp *qp);
int ipath_register_ib_device(struct ipath_devdata *);
@@ -772,8 +885,6 @@ void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
int ipath_ib_piobufavail(struct ipath_ibdev *);
-void ipath_ib_timer(struct ipath_ibdev *);
-
unsigned ipath_get_npkeys(struct ipath_devdata *);
u32 ipath_get_cr_errpkey(struct ipath_devdata *);
@@ -782,7 +893,17 @@ unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
+/*
+ * Below converts HCA-specific LinkTrainingState to IB PhysPortState
+ * values.
+ */
extern const u8 ipath_cvt_physportstate[];
+#define IB_PHYSPORTSTATE_SLEEP 1
+#define IB_PHYSPORTSTATE_POLL 2
+#define IB_PHYSPORTSTATE_DISABLED 3
+#define IB_PHYSPORTSTATE_CFG_TRAIN 4
+#define IB_PHYSPORTSTATE_LINKUP 5
+#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
extern const int ib_ipath_state_ops[];
@@ -810,4 +931,6 @@ extern unsigned int ib_ipath_max_srq_wrs;
extern const u32 ib_ipath_rnr_table[];
+extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
+
#endif /* IPATH_VERBS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 085e28b939e..6216ea92385 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,8 +31,9 @@
* SOFTWARE.
*/
-#include <linux/list.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
#include "ipath_verbs.h"
@@ -165,10 +166,9 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
{
struct rb_node **n = &mcast_tree.rb_node;
struct rb_node *pn = NULL;
- unsigned long flags;
int ret;
- spin_lock_irqsave(&mcast_lock, flags);
+ spin_lock_irq(&mcast_lock);
while (*n) {
struct ipath_mcast *tmcast;
@@ -228,7 +228,7 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
ret = 0;
bail:
- spin_unlock_irqrestore(&mcast_lock, flags);
+ spin_unlock_irq(&mcast_lock);
return ret;
}
@@ -289,17 +289,16 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct ipath_mcast *mcast = NULL;
struct ipath_mcast_qp *p, *tmp;
struct rb_node *n;
- unsigned long flags;
int last = 0;
int ret;
- spin_lock_irqsave(&mcast_lock, flags);
+ spin_lock_irq(&mcast_lock);
/* Find the GID in the mcast table. */
n = mcast_tree.rb_node;
while (1) {
if (n == NULL) {
- spin_unlock_irqrestore(&mcast_lock, flags);
+ spin_unlock_irq(&mcast_lock);
ret = -EINVAL;
goto bail;
}
@@ -334,7 +333,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
break;
}
- spin_unlock_irqrestore(&mcast_lock, flags);
+ spin_unlock_irq(&mcast_lock);
if (p) {
/*
@@ -348,9 +347,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
- spin_lock(&dev->n_mcast_grps_lock);
+ spin_lock_irq(&dev->n_mcast_grps_lock);
dev->n_mcast_grps_allocated--;
- spin_unlock(&dev->n_mcast_grps_lock);
+ spin_unlock_irq(&dev->n_mcast_grps_lock);
}
ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
index 036fde662aa..1d7bd82a1fb 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,13 +38,23 @@
#include "ipath_kernel.h"
/**
- * ipath_unordered_wc - indicate whether write combining is ordered
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
*
- * PowerPC systems (at least those in the 970 processor family)
- * write partially filled store buffers in address order, but will write
- * completely filled store buffers in "random" order, and therefore must
- * have serialization for correctness with current InfiniPath chips.
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
*
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
*/
int ipath_unordered_wc(void)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index f8f9e2e8cbd..3428acb0868 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd)
* of 2 address matching the length (which has to be a power of 2).
* For rev1, that means the base address, for rev2, it will be just
* the PIO buffers themselves.
+ * For chips with two sets of buffers, the calculations are
+ * somewhat more complicated; we need to sum, and the piobufbase
+ * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+ * The buffers are still packed, so a single range covers both.
*/
- pioaddr = addr + dd->ipath_piobufbase;
- piolen = (dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k) *
- ALIGN(dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k, dd->ipath_palign);
+ if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
+ unsigned long pio2kbase, pio4kbase;
+ pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
+ pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
+ if (pio2kbase < pio4kbase) { /* all, for now */
+ pioaddr = addr + pio2kbase;
+ piolen = pio4kbase - pio2kbase +
+ dd->ipath_piobcnt4k * dd->ipath_4kalign;
+ } else {
+ pioaddr = addr + pio4kbase;
+ piolen = pio2kbase - pio4kbase +
+ dd->ipath_piobcnt2k * dd->ipath_palign;
+ }
+ } else { /* single buffer size (2K, currently) */
+ pioaddr = addr + dd->ipath_piobufbase;
+ piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
+ dd->ipath_piobcnt4k * dd->ipath_4kalign;
+ }
for (bits = 0; !(piolen & (1ULL << bits)); bits++)
/* do nothing */ ;
@@ -123,6 +140,8 @@ int ipath_enable_wc(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
"cookie is %d\n", cookie);
dd->ipath_wc_cookie = cookie;
+ dd->ipath_wc_base = (unsigned long) pioaddr;
+ dd->ipath_wc_len = (unsigned long) piolen;
}
}
@@ -136,9 +155,16 @@ int ipath_enable_wc(struct ipath_devdata *dd)
void ipath_disable_wc(struct ipath_devdata *dd)
{
if (dd->ipath_wc_cookie) {
+ int r;
ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
- mtrr_del(dd->ipath_wc_cookie, 0, 0);
- dd->ipath_wc_cookie = 0;
+ r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len);
+ if (r < 0)
+ dev_info(&dd->pcidev->dev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len, r);
+ dd->ipath_wc_cookie = 0; /* even on failure */
}
}