aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mthca
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mthca')
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig13
-rw-r--r--drivers/infiniband/hw/mthca/Makefile9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c148
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c187
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c200
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1256
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h155
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c724
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h198
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c409
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c192
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c909
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c239
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c394
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h49
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c632
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c30
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c970
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h164
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c1931
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c72
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c696
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h112
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h131
29 files changed, 6464 insertions, 3427 deletions
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index e88be85b3d5..da314c3fec2 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -1,16 +1,17 @@
config INFINIBAND_MTHCA
tristate "Mellanox HCA support"
- depends on PCI && INFINIBAND
+ depends on PCI
---help---
This is a low-level driver for Mellanox InfiniHost host
channel adapters (HCAs), including the MT23108 PCI-X HCA
("Tavor") and the MT25208 PCI Express HCA ("Arbel").
config INFINIBAND_MTHCA_DEBUG
- bool "Verbose debugging output"
+ bool "Verbose debugging output" if EXPERT
depends on INFINIBAND_MTHCA
- default n
+ default y
---help---
- This option causes the mthca driver produce a bunch of debug
- messages. Select this is you are developing the driver or
- trying to diagnose a problem.
+ This option causes debugging code to be compiled into the
+ mthca driver. The output can be turned on via the
+ debug_level module parameter (which can also be set after
+ the driver is loaded through sysfs).
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index 5dcbd43073e..e388d95d0cf 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -1,12 +1,7 @@
-EXTRA_CFLAGS += -Idrivers/infiniband/include
-
-ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
-EXTRA_CFLAGS += -DDEBUG
-endif
-
obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
- mthca_provider.o mthca_memfree.o mthca_uar.o
+ mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
+ mthca_catas.o
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index b1db48dd91d..b4e0cf4e95c 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/errno.h>
@@ -41,9 +39,11 @@
/* Trivial bitmap-based allocator */
u32 mthca_alloc(struct mthca_alloc *alloc)
{
+ unsigned long flags;
u32 obj;
- spin_lock(&alloc->lock);
+ spin_lock_irqsave(&alloc->lock, flags);
+
obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
if (obj >= alloc->max) {
alloc->top = (alloc->top + alloc->max) & alloc->mask;
@@ -56,19 +56,24 @@ u32 mthca_alloc(struct mthca_alloc *alloc)
} else
obj = -1;
- spin_unlock(&alloc->lock);
+ spin_unlock_irqrestore(&alloc->lock, flags);
return obj;
}
void mthca_free(struct mthca_alloc *alloc, u32 obj)
{
+ unsigned long flags;
+
obj &= alloc->max - 1;
- spin_lock(&alloc->lock);
+
+ spin_lock_irqsave(&alloc->lock, flags);
+
clear_bit(obj, alloc->table);
alloc->last = min(alloc->last, obj);
alloc->top = (alloc->top + alloc->max) & alloc->mask;
- spin_unlock(&alloc->lock);
+
+ spin_unlock_irqrestore(&alloc->lock, flags);
}
int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
@@ -108,14 +113,15 @@ void mthca_alloc_cleanup(struct mthca_alloc *alloc)
* serialize access to the array.
*/
+#define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1)
+
void *mthca_array_get(struct mthca_array *array, int index)
{
int p = (index * sizeof (void *)) >> PAGE_SHIFT;
- if (array->page_list[p].page) {
- int i = index & (PAGE_SIZE / sizeof (void *) - 1);
- return array->page_list[p].page[i];
- } else
+ if (array->page_list[p].page)
+ return array->page_list[p].page[index & MTHCA_ARRAY_MASK];
+ else
return NULL;
}
@@ -130,8 +136,7 @@ int mthca_array_set(struct mthca_array *array, int index, void *value)
if (!array->page_list[p].page)
return -ENOMEM;
- array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] =
- value;
+ array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value;
++array->page_list[p].used;
return 0;
@@ -144,7 +149,8 @@ void mthca_array_clear(struct mthca_array *array, int index)
if (--array->page_list[p].used == 0) {
free_page((unsigned long) array->page_list[p].page);
array->page_list[p].page = NULL;
- }
+ } else
+ array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL;
if (array->page_list[p].used < 0)
pr_debug("Array %p index %d page %d with ref count %d < 0\n",
@@ -177,3 +183,119 @@ void mthca_array_cleanup(struct mthca_array *array, int nent)
kfree(array->page_list);
}
+
+/*
+ * Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0. If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+
+int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
+ union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+ int hca_write, struct mthca_mr *mr)
+{
+ int err = -ENOMEM;
+ int npages, shift;
+ u64 *dma_list = NULL;
+ dma_addr_t t;
+ int i;
+
+ if (size <= max_direct) {
+ *is_direct = 1;
+ npages = 1;
+ shift = get_order(size) + PAGE_SHIFT;
+
+ buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+ size, &t, GFP_KERNEL);
+ if (!buf->direct.buf)
+ return -ENOMEM;
+
+ dma_unmap_addr_set(&buf->direct, mapping, t);
+
+ memset(buf->direct.buf, 0, size);
+
+ while (t & ((1 << shift) - 1)) {
+ --shift;
+ npages *= 2;
+ }
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_free;
+
+ for (i = 0; i < npages; ++i)
+ dma_list[i] = t + i * (1 << shift);
+ } else {
+ *is_direct = 0;
+ npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ shift = PAGE_SHIFT;
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ return -ENOMEM;
+
+ buf->page_list = kmalloc(npages * sizeof *buf->page_list,
+ GFP_KERNEL);
+ if (!buf->page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i)
+ buf->page_list[i].buf = NULL;
+
+ for (i = 0; i < npages; ++i) {
+ buf->page_list[i].buf =
+ dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ &t, GFP_KERNEL);
+ if (!buf->page_list[i].buf)
+ goto err_free;
+
+ dma_list[i] = t;
+ dma_unmap_addr_set(&buf->page_list[i], mapping, t);
+
+ clear_page(buf->page_list[i].buf);
+ }
+ }
+
+ err = mthca_mr_alloc_phys(dev, pd->pd_num,
+ dma_list, shift, npages,
+ 0, size,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0),
+ mr);
+ if (err)
+ goto err_free;
+
+ kfree(dma_list);
+
+ return 0;
+
+err_free:
+ mthca_buf_free(dev, size, buf, *is_direct, NULL);
+
+err_out:
+ kfree(dma_list);
+
+ return err;
+}
+
+void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
+ int is_direct, struct mthca_mr *mr)
+{
+ int i;
+
+ if (mr)
+ mthca_free_mr(dev, mr);
+
+ if (is_direct)
+ dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+ dma_unmap_addr(&buf->direct, mapping));
+ else {
+ for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ buf->page_list[i].buf,
+ dma_unmap_addr(&buf->page_list[i],
+ mapping));
+ kfree(buf->page_list);
+ }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 085baf393ca..32f6c631545 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,30 +29,127 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
-#include <ib_verbs.h>
-#include <ib_cache.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
#include "mthca_dev.h"
+enum {
+ MTHCA_RATE_TAVOR_FULL = 0,
+ MTHCA_RATE_TAVOR_1X = 1,
+ MTHCA_RATE_TAVOR_4X = 2,
+ MTHCA_RATE_TAVOR_1X_DDR = 3
+};
+
+enum {
+ MTHCA_RATE_MEMFREE_FULL = 0,
+ MTHCA_RATE_MEMFREE_QUARTER = 1,
+ MTHCA_RATE_MEMFREE_EIGHTH = 2,
+ MTHCA_RATE_MEMFREE_HALF = 3
+};
+
struct mthca_av {
- u32 port_pd;
- u8 reserved1;
- u8 g_slid;
- u16 dlid;
- u8 reserved2;
- u8 gid_index;
- u8 msg_sr;
- u8 hop_limit;
- u32 sl_tclass_flowlabel;
- u32 dgid[4];
+ __be32 port_pd;
+ u8 reserved1;
+ u8 g_slid;
+ __be16 dlid;
+ u8 reserved2;
+ u8 gid_index;
+ u8 msg_sr;
+ u8 hop_limit;
+ __be32 sl_tclass_flowlabel;
+ __be32 dgid[4];
};
+static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+ switch (mthca_rate) {
+ case MTHCA_RATE_MEMFREE_EIGHTH:
+ return mult_to_ib_rate(port_rate >> 3);
+ case MTHCA_RATE_MEMFREE_QUARTER:
+ return mult_to_ib_rate(port_rate >> 2);
+ case MTHCA_RATE_MEMFREE_HALF:
+ return mult_to_ib_rate(port_rate >> 1);
+ case MTHCA_RATE_MEMFREE_FULL:
+ default:
+ return mult_to_ib_rate(port_rate);
+ }
+}
+
+static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+ switch (mthca_rate) {
+ case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS;
+ case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
+ case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS;
+ default: return mult_to_ib_rate(port_rate);
+ }
+}
+
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
+{
+ if (mthca_is_memfree(dev)) {
+ /* Handle old Arbel FW */
+ if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
+ return IB_RATE_2_5_GBPS;
+
+ return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+ } else
+ return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+}
+
+static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
+{
+ if (cur_rate <= req_rate)
+ return 0;
+
+ /*
+ * Inter-packet delay (IPD) to get from rate X down to a rate
+ * no more than Y is (X - 1) / Y.
+ */
+ switch ((cur_rate - 1) / req_rate) {
+ case 0: return MTHCA_RATE_MEMFREE_FULL;
+ case 1: return MTHCA_RATE_MEMFREE_HALF;
+ case 2: /* fall through */
+ case 3: return MTHCA_RATE_MEMFREE_QUARTER;
+ default: return MTHCA_RATE_MEMFREE_EIGHTH;
+ }
+}
+
+static u8 ib_rate_to_tavor(u8 static_rate)
+{
+ switch (static_rate) {
+ case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
+ case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR;
+ case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X;
+ default: return MTHCA_RATE_TAVOR_FULL;
+ }
+}
+
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
+{
+ u8 rate;
+
+ if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
+ return 0;
+
+ if (mthca_is_memfree(dev))
+ rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
+ dev->rate[port - 1]);
+ else
+ rate = ib_rate_to_tavor(static_rate);
+
+ if (!(dev->limits.stat_rate_support & (1 << rate)))
+ rate = 1;
+
+ return rate;
+}
+
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
struct ib_ah_attr *ah_attr,
@@ -89,7 +187,7 @@ int mthca_create_ah(struct mthca_dev *dev,
on_hca_fail:
if (ah->type == MTHCA_AH_PCI_POOL) {
ah->av = pci_pool_alloc(dev->av_table.pool,
- SLAB_ATOMIC, &ah->avdma);
+ GFP_ATOMIC, &ah->avdma);
if (!ah->av)
return -ENOMEM;
@@ -104,7 +202,7 @@ on_hca_fail:
av->g_slid = ah_attr->src_path_bits;
av->dlid = cpu_to_be16(ah_attr->dlid);
av->msg_sr = (3 << 4) | /* 2K message */
- ah_attr->static_rate;
+ mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
if (ah_attr->ah_flags & IB_AH_GRH) {
av->g_slid |= 0x80;
@@ -127,7 +225,7 @@ on_hca_fail:
av, (unsigned long) ah->avdma);
for (j = 0; j < 8; ++j)
printk(KERN_DEBUG " [%2x] %08x\n",
- j * 4, be32_to_cpu(((u32 *) av)[j]));
+ j * 4, be32_to_cpu(((__be32 *) av)[j]));
}
if (ah->type == MTHCA_AH_ON_HCA) {
@@ -144,7 +242,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
switch (ah->type) {
case MTHCA_AH_ON_HCA:
mthca_free(&dev->av_table.alloc,
- (ah->avdma - dev->av_table.ddr_av_base) /
+ (ah->avdma - dev->av_table.ddr_av_base) /
MTHCA_AV_SIZE);
break;
@@ -160,6 +258,11 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
return 0;
}
+int mthca_ah_grh_present(struct mthca_ah *ah)
+{
+ return !!(ah->av->g_slid & 0x80);
+}
+
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header)
{
@@ -168,27 +271,57 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
header->lrh.destination_lid = ah->av->dlid;
- header->lrh.source_lid = ah->av->g_slid & 0x7f;
- if (ah->av->g_slid & 0x80) {
- header->grh_present = 1;
+ header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f);
+ if (mthca_ah_grh_present(ah)) {
header->grh.traffic_class =
(be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
header->grh.flow_label =
ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ header->grh.hop_limit = ah->av->hop_limit;
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
- ah->av->gid_index,
+ ah->av->gid_index % dev->limits.gid_table_len,
&header->grh.source_gid);
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
- } else {
- header->grh_present = 0;
}
return 0;
}
-int __devinit mthca_init_av_table(struct mthca_dev *dev)
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ struct mthca_ah *ah = to_mah(ibah);
+ struct mthca_dev *dev = to_mdev(ibah->device);
+
+ /* Only implement for MAD and memfree ah for now. */
+ if (ah->type == MTHCA_AH_ON_HCA)
+ return -ENOSYS;
+
+ memset(attr, 0, sizeof *attr);
+ attr->dlid = be16_to_cpu(ah->av->dlid);
+ attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
+ attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
+ attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
+ attr->port_num);
+ attr->src_path_bits = ah->av->g_slid & 0x7F;
+ attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
+
+ if (attr->ah_flags) {
+ attr->grh.traffic_class =
+ be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
+ attr->grh.flow_label =
+ be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
+ attr->grh.hop_limit = ah->av->hop_limit;
+ attr->grh.sgid_index = ah->av->gid_index &
+ (dev->limits.gid_table_len - 1);
+ memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+ }
+
+ return 0;
+}
+
+int mthca_init_av_table(struct mthca_dev *dev)
{
int err;
@@ -229,7 +362,7 @@ int __devinit mthca_init_av_table(struct mthca_dev *dev)
return -ENOMEM;
}
-void __devexit mthca_cleanup_av_table(struct mthca_dev *dev)
+void mthca_cleanup_av_table(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev))
return;
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
new file mode 100644
index 00000000000..712d2a30fbe
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+
+#include "mthca_dev.h"
+
+enum {
+ MTHCA_CATAS_POLL_INTERVAL = 5 * HZ,
+
+ MTHCA_CATAS_TYPE_INTERNAL = 0,
+ MTHCA_CATAS_TYPE_UPLINK = 3,
+ MTHCA_CATAS_TYPE_DDR = 4,
+ MTHCA_CATAS_TYPE_PARITY = 5,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int catas_reset_disable;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
+
+static void catas_reset(struct work_struct *work)
+{
+ struct mthca_dev *dev, *tmpdev;
+ LIST_HEAD(tlist);
+ int ret;
+
+ mutex_lock(&mthca_device_mutex);
+
+ spin_lock_irq(&catas_lock);
+ list_splice_init(&catas_list, &tlist);
+ spin_unlock_irq(&catas_lock);
+
+ list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+ struct pci_dev *pdev = dev->pdev;
+ ret = __mthca_restart_one(dev->pdev);
+ /* 'dev' now is not valid */
+ if (ret)
+ printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
+ pci_name(pdev), ret);
+ else {
+ struct mthca_dev *d = pci_get_drvdata(pdev);
+ mthca_dbg(d, "Reset succeeded\n");
+ }
+ }
+
+ mutex_unlock(&mthca_device_mutex);
+}
+
+static void handle_catas(struct mthca_dev *dev)
+{
+ struct ib_event event;
+ unsigned long flags;
+ const char *type;
+ int i;
+
+ event.device = &dev->ib_dev;
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.element.port_num = 0;
+ dev->active = false;
+
+ ib_dispatch_event(&event);
+
+ switch (swab32(readl(dev->catas_err.map)) >> 24) {
+ case MTHCA_CATAS_TYPE_INTERNAL:
+ type = "internal error";
+ break;
+ case MTHCA_CATAS_TYPE_UPLINK:
+ type = "uplink bus error";
+ break;
+ case MTHCA_CATAS_TYPE_DDR:
+ type = "DDR data error";
+ break;
+ case MTHCA_CATAS_TYPE_PARITY:
+ type = "internal parity error";
+ break;
+ default:
+ type = "unknown error";
+ break;
+ }
+
+ mthca_err(dev, "Catastrophic error detected: %s\n", type);
+ for (i = 0; i < dev->catas_err.size; ++i)
+ mthca_err(dev, " buf[%02x]: %08x\n",
+ i, swab32(readl(dev->catas_err.map + i)));
+
+ if (catas_reset_disable)
+ return;
+
+ spin_lock_irqsave(&catas_lock, flags);
+ list_add(&dev->catas_err.list, &catas_list);
+ queue_work(catas_wq, &catas_work);
+ spin_unlock_irqrestore(&catas_lock, flags);
+}
+
+static void poll_catas(unsigned long dev_ptr)
+{
+ struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
+ int i;
+
+ for (i = 0; i < dev->catas_err.size; ++i)
+ if (readl(dev->catas_err.map + i)) {
+ handle_catas(dev);
+ return;
+ }
+
+ mod_timer(&dev->catas_err.timer,
+ round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
+}
+
+void mthca_start_catas_poll(struct mthca_dev *dev)
+{
+ phys_addr_t addr;
+
+ init_timer(&dev->catas_err.timer);
+ dev->catas_err.map = NULL;
+
+ addr = pci_resource_start(dev->pdev, 0) +
+ ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->catas_err.addr);
+
+ dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
+ if (!dev->catas_err.map) {
+ mthca_warn(dev, "couldn't map catastrophic error region "
+ "at 0x%llx/0x%x\n", (unsigned long long) addr,
+ dev->catas_err.size * 4);
+ return;
+ }
+
+ dev->catas_err.timer.data = (unsigned long) dev;
+ dev->catas_err.timer.function = poll_catas;
+ dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+ INIT_LIST_HEAD(&dev->catas_err.list);
+ add_timer(&dev->catas_err.timer);
+}
+
+void mthca_stop_catas_poll(struct mthca_dev *dev)
+{
+ del_timer_sync(&dev->catas_err.timer);
+
+ if (dev->catas_err.map)
+ iounmap(dev->catas_err.map);
+
+ spin_lock_irq(&catas_lock);
+ list_del(&dev->catas_err.list);
+ spin_unlock_irq(&catas_lock);
+}
+
+int __init mthca_catas_init(void)
+{
+ INIT_WORK(&catas_work, catas_reset);
+
+ catas_wq = create_singlethread_workqueue("mthca_catas");
+ if (!catas_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+ destroy_workqueue(catas_wq);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index cd9ed958d92..9d3e5c1ac60 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,15 +30,16 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <linux/sched.h>
+#include <linux/completion.h>
#include <linux/pci.h>
#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/io.h>
-#include <ib_mad.h>
+#include <rdma/ib_mad.h>
#include "mthca_dev.h"
#include "mthca_config_reg.h"
@@ -108,6 +111,7 @@ enum {
CMD_SW2HW_SRQ = 0x35,
CMD_HW2SW_SRQ = 0x36,
CMD_QUERY_SRQ = 0x37,
+ CMD_ARM_SRQ = 0x40,
/* QP/EE commands */
CMD_RST2INIT_QPEE = 0x19,
@@ -155,13 +159,15 @@ enum {
enum {
CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1,
CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1,
- CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1
+ CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1,
+ CMD_TIME_CLASS_D = 60 * HZ
};
#else
enum {
CMD_TIME_CLASS_A = 60 * HZ,
CMD_TIME_CLASS_B = 60 * HZ,
- CMD_TIME_CLASS_C = 60 * HZ
+ CMD_TIME_CLASS_C = 60 * HZ,
+ CMD_TIME_CLASS_D = 60 * HZ
};
#endif
@@ -171,7 +177,6 @@ enum {
struct mthca_cmd_context {
struct completion done;
- struct timer_list timer;
int result;
int next;
u64 out_param;
@@ -179,26 +184,58 @@ struct mthca_cmd_context {
u8 status;
};
+static int fw_cmd_doorbell = 0;
+module_param(fw_cmd_doorbell, int, 0644);
+MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero "
+ "(and supported by FW)");
+
static inline int go_bit(struct mthca_dev *dev)
{
return readl(dev->hcr + HCR_STATUS_OFFSET) &
swab32(1 << HCR_GO_BIT);
}
-static int mthca_cmd_post(struct mthca_dev *dev,
- u64 in_param,
- u64 out_param,
- u32 in_modifier,
- u8 op_modifier,
- u16 op,
- u16 token,
- int event)
+static void mthca_cmd_post_dbell(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token)
{
- int err = 0;
+ void __iomem *ptr = dev->cmd.dbell_map;
+ u16 *offs = dev->cmd.dbell_offsets;
- if (down_interruptible(&dev->cmd.hcr_sem))
- return -EINTR;
+ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), ptr + offs[0]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), ptr + offs[1]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(in_modifier), ptr + offs[2]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(out_param >> 32), ptr + offs[3]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(token << 16), ptr + offs[5]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
+ (1 << HCA_E_BIT) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), ptr + offs[6]);
+ wmb();
+ __raw_writel((__force u32) 0, ptr + offs[7]);
+ wmb();
+}
+static int mthca_cmd_post_hcr(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
if (event) {
unsigned long end = jiffies + GO_BIT_TIMEOUT;
@@ -208,10 +245,8 @@ static int mthca_cmd_post(struct mthca_dev *dev,
}
}
- if (go_bit(dev)) {
- err = -EAGAIN;
- goto out;
- }
+ if (go_bit(dev))
+ return -EAGAIN;
/*
* We use writel (instead of something like memcpy_toio)
@@ -219,26 +254,86 @@ static int mthca_cmd_post(struct mthca_dev *dev,
* (and some architectures such as ia64 implement memcpy_toio
* in terms of writeb).
*/
- __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4);
- __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4);
- __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4);
- __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4);
- __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
- __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4);
+ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4);
+ __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4);
+ __raw_writel((__force u32) cpu_to_be32(in_modifier), dev->hcr + 2 * 4);
+ __raw_writel((__force u32) cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4);
+ __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
+ __raw_writel((__force u32) cpu_to_be32(token << 16), dev->hcr + 5 * 4);
/* __raw_writel may not order writes. */
wmb();
- __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) |
- (event ? (1 << HCA_E_BIT) : 0) |
- (op_modifier << HCR_OPMOD_SHIFT) |
- op), dev->hcr + 6 * 4);
+ __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
+ (event ? (1 << HCA_E_BIT) : 0) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), dev->hcr + 6 * 4);
-out:
- up(&dev->cmd.hcr_sem);
+ return 0;
+}
+
+static int mthca_cmd_post(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
+ int err = 0;
+
+ mutex_lock(&dev->cmd.hcr_mutex);
+
+ if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell)
+ mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier,
+ op_modifier, op, token);
+ else
+ err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
+ op_modifier, op, token, event);
+
+ /*
+ * Make sure that our HCR writes don't get mixed in with
+ * writes from another CPU starting a FW command.
+ */
+ mmiowb();
+
+ mutex_unlock(&dev->cmd.hcr_mutex);
return err;
}
+
+static int mthca_status_to_errno(u8 status)
+{
+ static const int trans_table[] = {
+ [MTHCA_CMD_STAT_INTERNAL_ERR] = -EIO,
+ [MTHCA_CMD_STAT_BAD_OP] = -EPERM,
+ [MTHCA_CMD_STAT_BAD_PARAM] = -EINVAL,
+ [MTHCA_CMD_STAT_BAD_SYS_STATE] = -ENXIO,
+ [MTHCA_CMD_STAT_BAD_RESOURCE] = -EBADF,
+ [MTHCA_CMD_STAT_RESOURCE_BUSY] = -EBUSY,
+ [MTHCA_CMD_STAT_DDR_MEM_ERR] = -ENOMEM,
+ [MTHCA_CMD_STAT_EXCEED_LIM] = -ENOMEM,
+ [MTHCA_CMD_STAT_BAD_RES_STATE] = -EBADF,
+ [MTHCA_CMD_STAT_BAD_INDEX] = -EBADF,
+ [MTHCA_CMD_STAT_BAD_NVMEM] = -EFAULT,
+ [MTHCA_CMD_STAT_BAD_QPEE_STATE] = -EINVAL,
+ [MTHCA_CMD_STAT_BAD_SEG_PARAM] = -EFAULT,
+ [MTHCA_CMD_STAT_REG_BOUND] = -EBUSY,
+ [MTHCA_CMD_STAT_LAM_NOT_PRE] = -EAGAIN,
+ [MTHCA_CMD_STAT_BAD_PKT] = -EBADMSG,
+ [MTHCA_CMD_STAT_BAD_SIZE] = -ENOMEM,
+ };
+
+ if (status >= ARRAY_SIZE(trans_table) ||
+ (status != MTHCA_CMD_STAT_OK
+ && trans_table[status] == 0))
+ return -EINVAL;
+
+ return trans_table[status];
+}
+
+
static int mthca_cmd_poll(struct mthca_dev *dev,
u64 in_param,
u64 *out_param,
@@ -246,14 +341,13 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
int err = 0;
unsigned long end;
+ u8 status;
- if (down_interruptible(&dev->cmd.poll_sem))
- return -EINTR;
+ down(&dev->cmd.poll_sem);
err = mthca_cmd_post(dev, in_param,
out_param ? *out_param : 0,
@@ -273,13 +367,20 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
goto out;
}
- if (out_is_imm) {
- memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64));
- be64_to_cpus(out_param);
+ if (out_is_imm)
+ *out_param =
+ (u64) be32_to_cpu((__force __be32)
+ __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
+ (u64) be32_to_cpu((__force __be32)
+ __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
+
+ status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
+ if (status) {
+ mthca_dbg(dev, "Command %02x completed with status %02x\n",
+ op, status);
+ err = mthca_status_to_errno(status);
}
- *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
-
out:
up(&dev->cmd.poll_sem);
return err;
@@ -301,17 +402,6 @@ void mthca_cmd_event(struct mthca_dev *dev,
context->status = status;
context->out_param = out_param;
- context->token += dev->cmd.token_mask + 1;
-
- complete(&context->done);
-}
-
-static void event_timeout(unsigned long context_ptr)
-{
- struct mthca_cmd_context *context =
- (struct mthca_cmd_context *) context_ptr;
-
- context->result = -EBUSY;
complete(&context->done);
}
@@ -322,18 +412,17 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
int err = 0;
struct mthca_cmd_context *context;
- if (down_interruptible(&dev->cmd.event_sem))
- return -EINTR;
+ down(&dev->cmd.event_sem);
spin_lock(&dev->cmd.context_lock);
BUG_ON(dev->cmd.free_head < 0);
context = &dev->cmd.context[dev->cmd.free_head];
+ context->token += dev->cmd.token_mask + 1;
dev->cmd.free_head = context->next;
spin_unlock(&dev->cmd.context_lock);
@@ -346,20 +435,20 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
if (err)
goto out;
- context->timer.expires = jiffies + timeout;
- add_timer(&context->timer);
-
- wait_for_completion(&context->done);
- del_timer_sync(&context->timer);
+ if (!wait_for_completion_timeout(&context->done, timeout)) {
+ err = -EBUSY;
+ goto out;
+ }
err = context->result;
if (err)
goto out;
- *status = context->status;
- if (*status)
+ if (context->status) {
mthca_dbg(dev, "Command %02x completed with status %02x\n",
- op, *status);
+ op, context->status);
+ err = mthca_status_to_errno(context->status);
+ }
if (out_is_imm)
*out_param = context->out_param;
@@ -381,17 +470,16 @@ static int mthca_cmd_box(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
- if (dev->cmd.use_events)
+ if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
return mthca_cmd_wait(dev, in_param, &out_param, 0,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
else
return mthca_cmd_poll(dev, in_param, &out_param, 0,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
}
/* Invoke a command with no output parameter */
@@ -400,11 +488,10 @@ static int mthca_cmd(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
return mthca_cmd_box(dev, in_param, 0, in_modifier,
- op_modifier, op, timeout, status);
+ op_modifier, op, timeout);
}
/*
@@ -418,17 +505,48 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
- if (dev->cmd.use_events)
+ if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
return mthca_cmd_wait(dev, in_param, out_param, 1,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
else
return mthca_cmd_poll(dev, in_param, out_param, 1,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
+}
+
+int mthca_cmd_init(struct mthca_dev *dev)
+{
+ mutex_init(&dev->cmd.hcr_mutex);
+ sema_init(&dev->cmd.poll_sem, 1);
+ dev->cmd.flags = 0;
+
+ dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
+ if (!dev->hcr) {
+ mthca_err(dev, "Couldn't map command register.");
+ return -ENOMEM;
+ }
+
+ dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
+ MTHCA_MAILBOX_SIZE,
+ MTHCA_MAILBOX_SIZE, 0);
+ if (!dev->cmd.pool) {
+ iounmap(dev->hcr);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void mthca_cmd_cleanup(struct mthca_dev *dev)
+{
+ pci_pool_destroy(dev->cmd.pool);
+ iounmap(dev->hcr);
+ if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
+ iounmap(dev->cmd.dbell_map);
}
/*
@@ -448,10 +566,6 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
for (i = 0; i < dev->cmd.max_cmds; ++i) {
dev->cmd.context[i].token = i;
dev->cmd.context[i].next = i + 1;
- init_timer(&dev->cmd.context[i].timer);
- dev->cmd.context[i].timer.data =
- (unsigned long) &dev->cmd.context[i];
- dev->cmd.context[i].timer.function = event_timeout;
}
dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
@@ -466,7 +580,8 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
; /* nothing */
--dev->cmd.token_mask;
- dev->cmd.use_events = 1;
+ dev->cmd.flags |= MTHCA_CMD_USE_EVENTS;
+
down(&dev->cmd.poll_sem);
return 0;
@@ -479,7 +594,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
{
int i;
- dev->cmd.use_events = 0;
+ dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
for (i = 0; i < dev->cmd.max_cmds; ++i)
down(&dev->cmd.event_sem);
@@ -489,14 +604,41 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
up(&dev->cmd.poll_sem);
}
-int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+ gfp_t gfp_mask)
+{
+ struct mthca_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof *mailbox, gfp_mask);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
+ if (!mailbox->buf) {
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mailbox;
+}
+
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
+{
+ if (!mailbox)
+ return;
+
+ pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
+int mthca_SYS_EN(struct mthca_dev *dev)
{
u64 out;
int ret;
- ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status);
+ ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D);
- if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR)
+ if (ret == -ENOMEM)
mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, "
"sladdr=%d, SPD source=%s\n",
(int) (out >> 6) & 0xf, (int) (out >> 4) & 3,
@@ -505,28 +647,28 @@ int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
return ret;
}
-int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
+int mthca_SYS_DIS(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C);
}
static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
- u64 virt, u8 *status)
+ u64 virt)
{
- u32 *inbox;
- dma_addr_t indma;
+ struct mthca_mailbox *mailbox;
struct mthca_icm_iter iter;
+ __be64 *pages;
int lg;
int nent = 0;
int i;
int err = 0;
int ts = 0, tc = 0;
- inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
-
- memset(inbox, 0, PAGE_SIZE);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE);
+ pages = mailbox->buf;
for (mthca_icm_first(icm, &iter);
!mthca_icm_last(&iter);
@@ -537,30 +679,30 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
* address or size and use that as our log2 size.
*/
lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
- if (lg < 12) {
- mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n",
+ if (lg < MTHCA_ICM_PAGE_SHIFT) {
+ mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
+ MTHCA_ICM_PAGE_SIZE,
(unsigned long long) mthca_icm_addr(&iter),
mthca_icm_size(&iter));
err = -EINVAL;
goto out;
}
- for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
+ for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) {
if (virt != -1) {
- *((__be64 *) (inbox + nent * 4)) =
- cpu_to_be64(virt);
+ pages[nent * 2] = cpu_to_be64(virt);
virt += 1 << lg;
}
- *((__be64 *) (inbox + nent * 4 + 2)) =
- cpu_to_be64((mthca_icm_addr(&iter) +
- (i << lg)) | (lg - 12));
+ pages[nent * 2 + 1] =
+ cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) |
+ (lg - MTHCA_ICM_PAGE_SHIFT));
ts += 1 << (lg - 10);
++tc;
- if (nent == PAGE_SIZE / 16) {
- err = mthca_cmd(dev, indma, nent, 0, op,
- CMD_TIME_CLASS_B, status);
- if (err || *status)
+ if (++nent == MTHCA_MAILBOX_SIZE / 16) {
+ err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
+ CMD_TIME_CLASS_B);
+ if (err)
goto out;
nent = 0;
}
@@ -568,8 +710,8 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
}
if (nent)
- err = mthca_cmd(dev, indma, nent, 0, op,
- CMD_TIME_CLASS_B, status);
+ err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
+ CMD_TIME_CLASS_B);
switch (op) {
case CMD_MAP_FA:
@@ -585,31 +727,60 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
}
out:
- pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm)
{
- return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status);
+ return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1);
}
-int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status)
+int mthca_UNMAP_FA(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B);
}
-int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
+int mthca_RUN_FW(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A);
}
-int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
+static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
{
+ phys_addr_t addr;
+ u16 max_off = 0;
+ int i;
+
+ for (i = 0; i < 8; ++i)
+ max_off = max(max_off, dev->cmd.dbell_offsets[i]);
+
+ if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) {
+ mthca_warn(dev, "Firmware doorbell region at 0x%016llx, "
+ "length 0x%x crosses a page boundary\n",
+ (unsigned long long) base, max_off);
+ return;
+ }
+
+ addr = pci_resource_start(dev->pdev, 2) +
+ ((pci_resource_len(dev->pdev, 2) - 1) & base);
+ dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32));
+ if (!dev->cmd.dbell_map)
+ return;
+
+ dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS;
+ mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
+}
+
+int mthca_QUERY_FW(struct mthca_dev *dev)
+{
+ struct mthca_mailbox *mailbox;
u32 *outbox;
- dma_addr_t outdma;
+ u64 base;
+ u32 tmp;
int err = 0;
u8 lg;
+ int i;
#define QUERY_FW_OUT_SIZE 0x100
#define QUERY_FW_VER_OFFSET 0x00
@@ -617,6 +788,10 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
#define QUERY_FW_ERR_START_OFFSET 0x30
#define QUERY_FW_ERR_SIZE_OFFSET 0x38
+#define QUERY_FW_CMD_DB_EN_OFFSET 0x10
+#define QUERY_FW_CMD_DB_OFFSET 0x50
+#define QUERY_FW_CMD_DB_BASE 0x60
+
#define QUERY_FW_START_OFFSET 0x20
#define QUERY_FW_END_OFFSET 0x28
@@ -625,20 +800,20 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40
#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
- outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma);
- if (!outbox) {
- return -ENOMEM;
- }
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW,
- CMD_TIME_CLASS_A, status);
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
+ CMD_TIME_CLASS_A);
if (err)
goto out;
MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET);
/*
- * FW subminor version is at more signifant bits than minor
+ * FW subminor version is at more significant bits than minor
* version, so swap here.
*/
dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) |
@@ -651,6 +826,24 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
mthca_dbg(dev, "FW version %012llx, max commands %d\n",
(unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+ MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
+ MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
+
+ mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
+ (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
+
+ MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET);
+ if (tmp & 0x1) {
+ mthca_dbg(dev, "FW supports commands through doorbells\n");
+
+ MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE);
+ for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i)
+ MTHCA_GET(dev->cmd.dbell_offsets[i], outbox,
+ QUERY_FW_CMD_DB_OFFSET + (i << 1));
+
+ mthca_setup_cmd_doorbells(dev, base);
+ }
+
if (mthca_is_memfree(dev)) {
MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
@@ -659,12 +852,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
/*
- * Arbel page size is always 4 KB; round up number of
- * system pages needed.
+ * Round up number of system pages needed in case
+ * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
*/
dev->fw.arbel.fw_pages =
- (dev->fw.arbel.fw_pages + (1 << (PAGE_SHIFT - 12)) - 1) >>
- (PAGE_SHIFT - 12);
+ ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
(unsigned long long) dev->fw.arbel.clr_int_base,
@@ -681,15 +874,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
}
out:
- pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
+int mthca_ENABLE_LAM(struct mthca_dev *dev)
{
+ struct mthca_mailbox *mailbox;
u8 info;
u32 *outbox;
- dma_addr_t outdma;
int err = 0;
#define ENABLE_LAM_OUT_SIZE 0x100
@@ -700,19 +893,17 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
#define ENABLE_LAM_INFO_ECC_MASK 0x3
- outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM,
- CMD_TIME_CLASS_C, status);
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
+ CMD_TIME_CLASS_C);
if (err)
goto out;
- if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE)
- goto out;
-
MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET);
MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET);
MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET);
@@ -733,20 +924,20 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
(unsigned long long) dev->ddr_end);
out:
- pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
+int mthca_DISABLE_LAM(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C);
}
-int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
+int mthca_QUERY_DDR(struct mthca_dev *dev)
{
+ struct mthca_mailbox *mailbox;
u8 info;
u32 *outbox;
- dma_addr_t outdma;
int err = 0;
#define QUERY_DDR_OUT_SIZE 0x100
@@ -757,12 +948,13 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
#define QUERY_DDR_INFO_ECC_MASK 0x3
- outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR,
- CMD_TIME_CLASS_A, status);
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
+ CMD_TIME_CLASS_A);
if (err)
goto out;
@@ -787,17 +979,18 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
(unsigned long long) dev->ddr_end);
out:
- pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
- struct mthca_dev_lim *dev_lim, u8 *status)
+ struct mthca_dev_lim *dev_lim)
{
+ struct mthca_mailbox *mailbox;
u32 *outbox;
- dma_addr_t outdma;
u8 field;
u16 size;
+ u16 stat_rate;
int err;
#define QUERY_DEV_LIM_OUT_SIZE 0x100
@@ -828,6 +1021,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36
#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37
#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b
+#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET 0x3c
#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f
#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44
#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48
@@ -860,20 +1054,17 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f
#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0
- outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM,
- CMD_TIME_CLASS_A, status);
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
+ CMD_TIME_CLASS_A);
if (err)
goto out;
- MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
- dev_lim->max_srq_sz = 1 << field;
- MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
- dev_lim->max_qp_sz = 1 << field;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
dev_lim->reserved_qps = 1 << (field & 0xf);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -899,7 +1090,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
dev_lim->max_eqs = 1 << (field & 0x7);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
- dev_lim->reserved_mtts = 1 << (field >> 4);
+ if (mthca_is_memfree(dev))
+ dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
+ dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size;
+ else
+ dev_lim->reserved_mtts = 1 << (field >> 4);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
dev_lim->max_mrw_sz = 1 << field;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
@@ -922,6 +1117,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
dev_lim->num_ports = field & 0xf;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
dev_lim->max_gids = 1 << (field & 0xf);
+ MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET);
+ dev_lim->stat_rate_support = stat_rate;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
dev_lim->max_pkeys = 1 << (field & 0xf);
MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
@@ -969,26 +1166,17 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET);
dev_lim->uar_scratch_entry_sz = size;
- mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
- dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz);
- mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
- dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz);
- mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
- dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz);
- mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
- dev_lim->reserved_mrws, dev_lim->reserved_mtts);
- mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
- dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
- mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
- dev_lim->max_pds, dev_lim->reserved_mgms);
-
- mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
-
if (mthca_is_memfree(dev)) {
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+ dev_lim->max_srq_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+ dev_lim->max_qp_sz = 1 << field;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
dev_lim->hca.arbel.resize_srq = field & 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET);
+ dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz);
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
dev_lim->mpt_entry_sz = size;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
@@ -1014,21 +1202,72 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
mthca_dbg(dev, "Max ICM size %lld MB\n",
(unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
} else {
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+ dev_lim->max_srq_sz = (1 << field) - 1;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+ dev_lim->max_qp_sz = (1 << field) - 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
}
+ mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
+ dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz);
+ mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n",
+ dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz);
+ mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
+ dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz);
+ mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
+ dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz);
+ mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
+ dev_lim->reserved_mrws, dev_lim->reserved_mtts);
+ mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
+ dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
+ mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
+ dev_lim->max_pds, dev_lim->reserved_mgms);
+ mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+ dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
+
+ mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
+
out:
- pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
+static void get_board_id(void *vsd, char *board_id)
+{
+ int i;
+
+#define VSD_OFFSET_SIG1 0x00
+#define VSD_OFFSET_SIG2 0xde
+#define VSD_OFFSET_MLX_BOARD_ID 0xd0
+#define VSD_OFFSET_TS_BOARD_ID 0x20
+
+#define VSD_SIGNATURE_TOPSPIN 0x5ad
+
+ memset(board_id, 0, MTHCA_BOARD_ID_LEN);
+
+ if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
+ be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
+ strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
+ } else {
+ /*
+ * The board ID is a string but the firmware byte
+ * swaps each 4-byte word before passing it back to
+ * us. Therefore we need to swab it before printing.
+ */
+ for (i = 0; i < 4; ++i)
+ ((u32 *) board_id)[i] =
+ swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4));
+ }
+}
+
int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
- struct mthca_adapter *adapter, u8 *status)
+ struct mthca_adapter *adapter)
{
+ struct mthca_mailbox *mailbox;
u32 *outbox;
- dma_addr_t outdma;
int err;
#define QUERY_ADAPTER_OUT_SIZE 0x100
@@ -1036,37 +1275,47 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04
#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
+#define QUERY_ADAPTER_VSD_OFFSET 0x20
- outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER,
- CMD_TIME_CLASS_A, status);
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
+ CMD_TIME_CLASS_A);
if (err)
goto out;
- MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
- MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
- MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
- MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+ if (!mthca_is_memfree(dev)) {
+ MTHCA_GET(adapter->vendor_id, outbox,
+ QUERY_ADAPTER_VENDOR_ID_OFFSET);
+ MTHCA_GET(adapter->device_id, outbox,
+ QUERY_ADAPTER_DEVICE_ID_OFFSET);
+ MTHCA_GET(adapter->revision_id, outbox,
+ QUERY_ADAPTER_REVISION_ID_OFFSET);
+ }
+ MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+
+ get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
+ adapter->board_id);
out:
- pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_INIT_HCA(struct mthca_dev *dev,
- struct mthca_init_hca_param *param,
- u8 *status)
+ struct mthca_init_hca_param *param)
{
- u32 *inbox;
- dma_addr_t indma;
+ struct mthca_mailbox *mailbox;
+ __be32 *inbox;
int err;
#define INIT_HCA_IN_SIZE 0x200
-#define INIT_HCA_FLAGS_OFFSET 0x014
+#define INIT_HCA_FLAGS1_OFFSET 0x00c
+#define INIT_HCA_FLAGS2_OFFSET 0x014
#define INIT_HCA_QPC_OFFSET 0x020
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1102,21 +1351,29 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)
- inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
memset(inbox, 0, INIT_HCA_IN_SIZE);
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
+
#if defined(__LITTLE_ENDIAN)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
#elif defined(__BIG_ENDIAN)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
#else
#error Host endianness not defined
#endif
/* Check port for UD address vector: */
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
+
+ /* Enable IPoIB checksumming if we can: */
+ if (dev->device_cap_flags & IB_DEVICE_UD_IP_CSUM)
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(7 << 3);
/* We leave wqe_quota, responder_exu, etc as 0 (default) */
@@ -1167,19 +1424,19 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
}
- err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
- HZ, status);
+ err = mthca_cmd(dev, mailbox->dma, 0, 0,
+ CMD_INIT_HCA, CMD_TIME_CLASS_D);
- pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_INIT_IB(struct mthca_dev *dev,
struct mthca_init_ib_param *param,
- int port, u8 *status)
+ int port)
{
+ struct mthca_mailbox *mailbox;
u32 *inbox;
- dma_addr_t indma;
int err;
u32 flags;
@@ -1188,10 +1445,8 @@ int mthca_INIT_IB(struct mthca_dev *dev,
#define INIT_IB_FLAG_SIG (1 << 18)
#define INIT_IB_FLAG_NG (1 << 17)
#define INIT_IB_FLAG_G0 (1 << 16)
-#define INIT_IB_FLAG_1X (1 << 8)
-#define INIT_IB_FLAG_4X (1 << 9)
-#define INIT_IB_FLAG_12X (1 << 11)
#define INIT_IB_VL_SHIFT 4
+#define INIT_IB_PORT_WIDTH_SHIFT 8
#define INIT_IB_MTU_SHIFT 12
#define INIT_IB_MAX_GID_OFFSET 0x06
#define INIT_IB_MAX_PKEY_OFFSET 0x0a
@@ -1199,19 +1454,19 @@ int mthca_INIT_IB(struct mthca_dev *dev,
#define INIT_IB_NODE_GUID_OFFSET 0x18
#define INIT_IB_SI_GUID_OFFSET 0x20
- inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
memset(inbox, 0, INIT_IB_IN_SIZE);
flags = 0;
- flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0;
- flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0;
flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0;
flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0;
flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0;
flags |= param->vl_cap << INIT_IB_VL_SHIFT;
+ flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT;
flags |= param->mtu_cap << INIT_IB_MTU_SHIFT;
MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET);
@@ -1221,28 +1476,28 @@ int mthca_INIT_IB(struct mthca_dev *dev,
MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
- err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB,
- CMD_TIME_CLASS_A, status);
+ err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
+ CMD_TIME_CLASS_A);
- pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status)
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port)
{
- return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status);
+ return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, CMD_TIME_CLASS_A);
}
-int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic)
{
- return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
+ return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, CMD_TIME_CLASS_C);
}
int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
- int port, u8 *status)
+ int port)
{
+ struct mthca_mailbox *mailbox;
u32 *inbox;
- dma_addr_t indma;
int err;
u32 flags = 0;
@@ -1253,9 +1508,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
#define SET_IB_CAP_MASK_OFFSET 0x04
#define SET_IB_SI_GUID_OFFSET 0x08
- inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
memset(inbox, 0, SET_IB_IN_SIZE);
@@ -1266,34 +1522,36 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
- err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
- CMD_TIME_CLASS_B, status);
+ err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
+ CMD_TIME_CLASS_B);
- pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt)
{
- return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
+ return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt);
}
-int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt)
{
- u64 *inbox;
- dma_addr_t indma;
+ struct mthca_mailbox *mailbox;
+ __be64 *inbox;
int err;
- inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
inbox[0] = cpu_to_be64(virt);
inbox[1] = cpu_to_be64(dma_addr);
- err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
+ err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
+ CMD_TIME_CLASS_B);
- pci_free_consistent(dev->pdev, 16, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
if (!err)
mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
@@ -1302,314 +1560,279 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status
return err;
}
-int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count)
{
mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n",
page_count, (unsigned long long) virt);
- return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, virt, page_count, 0,
+ CMD_UNMAP_ICM, CMD_TIME_CLASS_B);
}
-int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm)
{
- return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status);
+ return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1);
}
-int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status)
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B);
}
-int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
- u8 *status)
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages)
{
- int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE,
- CMD_TIME_CLASS_A, status);
+ int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0,
+ 0, CMD_SET_ICM_SIZE, CMD_TIME_CLASS_A);
- if (ret || status)
+ if (ret)
return ret;
/*
- * Arbel page size is always 4 KB; round up number of system
- * pages needed.
+ * Round up number of system pages needed in case
+ * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
*/
- *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12);
+ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
return 0;
}
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
- int mpt_index, u8 *status)
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, mpt_entry,
- MTHCA_MPT_ENTRY_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
- CMD_TIME_CLASS_B, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
+ CMD_TIME_CLASS_B);
}
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
- int mpt_index, u8 *status)
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index)
{
- dma_addr_t outdma = 0;
- int err;
-
- if (mpt_entry) {
- outdma = pci_map_single(dev->pdev, mpt_entry,
- MTHCA_MPT_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
- }
-
- err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
- CMD_HW2SW_MPT,
- CMD_TIME_CLASS_B, status);
-
- if (mpt_entry)
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_MPT_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
+ !mailbox, CMD_HW2SW_MPT,
+ CMD_TIME_CLASS_B);
}
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
- int num_mtt, u8 *status)
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int num_mtt)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, mtt_entry,
- (num_mtt + 2) * 8,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
- CMD_TIME_CLASS_B, status);
-
- pci_unmap_single(dev->pdev, indma,
- (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
+ CMD_TIME_CLASS_B);
}
-int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
+int mthca_SYNC_TPT(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B);
}
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
- int eq_num, u8 *status)
+ int eq_num)
{
mthca_dbg(dev, "%s mask %016llx for eqn %d\n",
unmap ? "Clearing" : "Setting",
(unsigned long long) event_mask, eq_num);
return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num,
- 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
+ 0, CMD_MAP_EQ, CMD_TIME_CLASS_B);
}
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
- int eq_num, u8 *status)
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, eq_context,
- MTHCA_EQ_CONTEXT_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
+ CMD_TIME_CLASS_A);
}
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
- int eq_num, u8 *status)
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, eq_context,
- MTHCA_EQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
+ return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
+ CMD_HW2SW_EQ,
+ CMD_TIME_CLASS_A);
+}
- err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
- CMD_HW2SW_EQ,
- CMD_TIME_CLASS_A, status);
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num)
+{
+ return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
+ CMD_TIME_CLASS_A);
+}
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_EQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
+ CMD_HW2SW_CQ,
+ CMD_TIME_CLASS_A);
}
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
- int cq_num, u8 *status)
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size)
{
- dma_addr_t indma;
+ struct mthca_mailbox *mailbox;
+ __be32 *inbox;
int err;
- indma = pci_map_single(dev->pdev, cq_context,
- MTHCA_CQ_CONTEXT_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
+#define RESIZE_CQ_IN_SIZE 0x40
+#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c
+#define RESIZE_CQ_LKEY_OFFSET 0x1c
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ memset(inbox, 0, RESIZE_CQ_IN_SIZE);
+ /*
+ * Leave start address fields zeroed out -- mthca assumes that
+ * MRs for CQs always start at virtual address 0.
+ */
+ MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET);
+ MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET);
- err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
- CMD_TIME_CLASS_A, status);
+ err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
+ CMD_TIME_CLASS_B);
- pci_unmap_single(dev->pdev, indma,
- MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
- int cq_num, u8 *status)
+int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num)
{
- dma_addr_t outdma = 0;
- int err;
+ return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ,
+ CMD_TIME_CLASS_A);
+}
- outdma = pci_map_single(dev->pdev, cq_context,
- MTHCA_CQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
+int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0,
+ CMD_HW2SW_SRQ,
+ CMD_TIME_CLASS_A);
+}
- err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
- CMD_HW2SW_CQ,
- CMD_TIME_CLASS_A, status);
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+ struct mthca_mailbox *mailbox)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, num, 0,
+ CMD_QUERY_SRQ, CMD_TIME_CLASS_A);
+}
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_CQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit)
+{
+ return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
+ CMD_TIME_CLASS_B);
}
-int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
- int is_ee, void *qp_context, u32 optmask,
- u8 *status)
-{
- static const u16 op[] = {
- [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE,
- [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE,
- [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE,
- [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE,
- [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE,
- [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE,
- [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE,
- [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE,
- [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE,
- [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE,
- [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+ enum ib_qp_state next, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u32 optmask)
+{
+ static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_INIT] = CMD_RST2INIT_QPEE,
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_INIT] = CMD_INIT2INIT_QPEE,
+ [IB_QPS_RTR] = CMD_INIT2RTR_QPEE,
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_RTR2RTS_QPEE,
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_RTS2RTS_QPEE,
+ [IB_QPS_SQD] = CMD_RTS2SQD_QPEE,
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_SQD2RTS_QPEE,
+ [IB_QPS_SQD] = CMD_SQD2SQD_QPEE,
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_SQERR2RTS_QPEE,
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ }
};
- u8 op_mod = 0;
- dma_addr_t indma;
+ u8 op_mod = 0;
+ int my_mailbox = 0;
int err;
- if (trans < 0 || trans >= ARRAY_SIZE(op))
- return -EINVAL;
-
- if (trans == MTHCA_TRANS_ANY2RST) {
- indma = 0;
+ if (op[cur][next] == CMD_ERR2RST_QPEE) {
op_mod = 3; /* don't write outbox, any->reset */
/* For debugging */
- qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
- &indma);
- op_mod = 2; /* write outbox, any->reset */
- } else {
- indma = pci_map_single(dev->pdev, qp_context,
- MTHCA_QP_CONTEXT_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
+ if (!mailbox) {
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (!IS_ERR(mailbox)) {
+ my_mailbox = 1;
+ op_mod = 2; /* write outbox, any->reset */
+ } else
+ mailbox = NULL;
+ }
- if (0) {
+ err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ (!!is_ee << 24) | num, op_mod,
+ op[cur][next], CMD_TIME_CLASS_C);
+
+ if (0 && mailbox) {
int i;
mthca_dbg(dev, "Dumping QP context:\n");
- printk(" opt param mask: %08x\n", be32_to_cpup(qp_context));
+ printk(" %08x\n", be32_to_cpup(mailbox->buf));
for (i = 0; i < 0x100 / 4; ++i) {
if (i % 8 == 0)
- printk(" [%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+ printk("[%02x] ", i * 4);
+ printk(" %08x",
+ be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
if ((i + 1) % 8 == 0)
printk("\n");
}
}
- }
-
- if (trans == MTHCA_TRANS_ANY2RST) {
- err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num,
- op_mod, op[trans], CMD_TIME_CLASS_C, status);
+ if (my_mailbox)
+ mthca_free_mailbox(dev, mailbox);
+ } else {
if (0) {
int i;
mthca_dbg(dev, "Dumping QP context:\n");
- printk(" %08x\n", be32_to_cpup(qp_context));
+ printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
for (i = 0; i < 0x100 / 4; ++i) {
if (i % 8 == 0)
- printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+ printk(" [%02x] ", i * 4);
+ printk(" %08x",
+ be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
if ((i + 1) % 8 == 0)
printk("\n");
}
}
- } else
- err = mthca_cmd(dev, indma, (!!is_ee << 24) | num,
- op_mod, op[trans], CMD_TIME_CLASS_C, status);
+ err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num,
+ op_mod, op[cur][next], CMD_TIME_CLASS_C);
+ }
- if (trans != MTHCA_TRANS_ANY2RST)
- pci_unmap_single(dev->pdev, indma,
- MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE);
- else
- pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
- qp_context, indma);
return err;
}
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
- void *qp_context, u8 *status)
+ struct mthca_mailbox *mailbox)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, qp_context,
- MTHCA_QP_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
-
- err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
- CMD_QUERY_QPEE,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_QP_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
+ CMD_QUERY_QPEE, CMD_TIME_CLASS_A);
}
-int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
- u8 *status)
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn)
{
u8 op_mod;
@@ -1623,7 +1846,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
case IB_QPT_RAW_IPV6:
op_mod = 2;
break;
- case IB_QPT_RAW_ETY:
+ case IB_QPT_RAW_ETHERTYPE:
op_mod = 3;
break;
default:
@@ -1631,33 +1854,40 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
}
return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
}
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
- int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
- void *in_mad, void *response_mad, u8 *status)
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad)
{
- void *box;
- dma_addr_t dma;
+ struct mthca_mailbox *inmailbox, *outmailbox;
+ void *inbox;
int err;
u32 in_modifier = port;
u8 op_modifier = 0;
#define MAD_IFC_BOX_SIZE 0x400
#define MAD_IFC_MY_QPN_OFFSET 0x100
-#define MAD_IFC_RQPN_OFFSET 0x104
-#define MAD_IFC_SL_OFFSET 0x108
-#define MAD_IFC_G_PATH_OFFSET 0x109
-#define MAD_IFC_RLID_OFFSET 0x10a
-#define MAD_IFC_PKEY_OFFSET 0x10e
+#define MAD_IFC_RQPN_OFFSET 0x108
+#define MAD_IFC_SL_OFFSET 0x10c
+#define MAD_IFC_G_PATH_OFFSET 0x10d
+#define MAD_IFC_RLID_OFFSET 0x10e
+#define MAD_IFC_PKEY_OFFSET 0x112
#define MAD_IFC_GRH_OFFSET 0x140
- box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
- if (!box)
- return -ENOMEM;
+ inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(inmailbox))
+ return PTR_ERR(inmailbox);
+ inbox = inmailbox->buf;
- memcpy(box, in_mad, 256);
+ outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(outmailbox)) {
+ mthca_free_mailbox(dev, inmailbox);
+ return PTR_ERR(outmailbox);
+ }
+
+ memcpy(inbox, in_mad, 256);
/*
* Key check traps can't be generated unless we have in_wc to
@@ -1671,101 +1901,69 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
if (in_wc) {
u8 val;
- memset(box + 256, 0, 256);
+ memset(inbox + 256, 0, 256);
- MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
- MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
+ MTHCA_PUT(inbox, in_wc->qp->qp_num, MAD_IFC_MY_QPN_OFFSET);
+ MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
val = in_wc->sl << 4;
- MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET);
+ MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET);
val = in_wc->dlid_path_bits |
(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
- MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET);
+ MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET);
- MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET);
- MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+ MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET);
+ MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
if (in_grh)
- memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
+ memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
- op_modifier |= 0x10;
+ op_modifier |= 0x4;
in_modifier |= in_wc->slid << 16;
}
- err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
- CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
+ err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
+ in_modifier, op_modifier,
+ CMD_MAD_IFC, CMD_TIME_CLASS_C);
- if (!err && !*status)
- memcpy(response_mad, box + 512, 256);
+ if (!err)
+ memcpy(response_mad, outmailbox->buf, 256);
- pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
+ mthca_free_mailbox(dev, inmailbox);
+ mthca_free_mailbox(dev, outmailbox);
return err;
}
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status)
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, mgm,
- MTHCA_MGM_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
-
- err = mthca_cmd_box(dev, 0, outdma, index, 0,
- CMD_READ_MGM,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_MGM_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
+ CMD_READ_MGM, CMD_TIME_CLASS_A);
}
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status)
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, mgm,
- MTHCA_MGM_ENTRY_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
+ CMD_TIME_CLASS_A);
}
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
- u8 *status)
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ u16 *hash)
{
- dma_addr_t indma;
u64 imm;
int err;
- indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
+ err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
+ CMD_TIME_CLASS_A);
- err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
- CMD_TIME_CLASS_A, status);
*hash = imm;
-
- pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
return err;
}
-int mthca_NOP(struct mthca_dev *dev, u8 *status)
+int mthca_NOP(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status);
+ return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100));
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index adf039b3c54..f952244c54d 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,17 +30,14 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_CMD_H
#define MTHCA_CMD_H
-#include <ib_verbs.h>
+#include <rdma/ib_verbs.h>
-#define MTHCA_CMD_MAILBOX_ALIGN 16UL
-#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
+#define MTHCA_MAILBOX_SIZE 4096
enum {
/* command completed successfully: */
@@ -73,9 +72,9 @@ enum {
MTHCA_CMD_STAT_REG_BOUND = 0x21,
/* HCA local attached memory not present: */
MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
- /* Bad management packet (silently discarded): */
+ /* Bad management packet (silently discarded): */
MTHCA_CMD_STAT_BAD_PKT = 0x30,
- /* More outstanding CQEs in CQ than new CQ size: */
+ /* More outstanding CQEs in CQ than new CQ size: */
MTHCA_CMD_STAT_BAD_SIZE = 0x40
};
@@ -102,6 +101,7 @@ enum {
DEV_LIM_FLAG_RAW_IPV6 = 1 << 4,
DEV_LIM_FLAG_RAW_ETHER = 1 << 5,
DEV_LIM_FLAG_SRQ = 1 << 6,
+ DEV_LIM_FLAG_IPOIB_CSUM = 1 << 7,
DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8,
DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9,
DEV_LIM_FLAG_MW = 1 << 16,
@@ -112,6 +112,11 @@ enum {
DEV_LIM_FLAG_UD_MULTI = 1 << 21,
};
+struct mthca_mailbox {
+ dma_addr_t dma;
+ void *buf;
+};
+
struct mthca_dev_lim {
int max_srq_sz;
int max_qp_sz;
@@ -140,6 +145,7 @@ struct mthca_dev_lim {
int max_vl;
int num_ports;
int max_gids;
+ u16 stat_rate_support;
int max_pkeys;
u32 flags;
int reserved_uars;
@@ -179,10 +185,11 @@ struct mthca_dev_lim {
};
struct mthca_adapter {
- u32 vendor_id;
- u32 device_id;
- u32 revision_id;
- u8 inta_pin;
+ u32 vendor_id;
+ u32 device_id;
+ u32 revision_id;
+ char board_id[MTHCA_BOARD_ID_LEN];
+ u8 inta_pin;
};
struct mthca_init_hca_param {
@@ -214,8 +221,7 @@ struct mthca_init_hca_param {
};
struct mthca_init_ib_param {
- int enable_1x;
- int enable_4x;
+ int port_width;
int vl_cap;
int mtu_cap;
u16 gid_cap;
@@ -235,76 +241,85 @@ struct mthca_set_ib_param {
u32 cap_mask;
};
+int mthca_cmd_init(struct mthca_dev *dev);
+void mthca_cmd_cleanup(struct mthca_dev *dev);
int mthca_cmd_use_events(struct mthca_dev *dev);
void mthca_cmd_use_polling(struct mthca_dev *dev);
void mthca_cmd_event(struct mthca_dev *dev, u16 token,
u8 status, u64 out_param);
-int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
-int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
-int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
-int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status);
-int mthca_RUN_FW(struct mthca_dev *dev, u8 *status);
-int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status);
-int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status);
-int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status);
-int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status);
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+ gfp_t gfp_mask);
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
+
+int mthca_SYS_EN(struct mthca_dev *dev);
+int mthca_SYS_DIS(struct mthca_dev *dev);
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm);
+int mthca_UNMAP_FA(struct mthca_dev *dev);
+int mthca_RUN_FW(struct mthca_dev *dev);
+int mthca_QUERY_FW(struct mthca_dev *dev);
+int mthca_ENABLE_LAM(struct mthca_dev *dev);
+int mthca_DISABLE_LAM(struct mthca_dev *dev);
+int mthca_QUERY_DDR(struct mthca_dev *dev);
int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
- struct mthca_dev_lim *dev_lim, u8 *status);
+ struct mthca_dev_lim *dev_lim);
int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
- struct mthca_adapter *adapter, u8 *status);
+ struct mthca_adapter *adapter);
int mthca_INIT_HCA(struct mthca_dev *dev,
- struct mthca_init_hca_param *param,
- u8 *status);
+ struct mthca_init_hca_param *param);
int mthca_INIT_IB(struct mthca_dev *dev,
struct mthca_init_ib_param *param,
- int port, u8 *status);
-int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
-int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
+ int port);
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port);
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic);
int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
- int port, u8 *status);
-int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
-int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
-int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
-int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
-int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
-int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
- u8 *status);
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
- int mpt_index, u8 *status);
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
- int mpt_index, u8 *status);
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
- int num_mtt, u8 *status);
-int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
+ int port);
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt);
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt);
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count);
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm);
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev);
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages);
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index);
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index);
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int num_mtt);
+int mthca_SYNC_TPT(struct mthca_dev *dev);
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
- int eq_num, u8 *status);
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
- int eq_num, u8 *status);
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
- int eq_num, u8 *status);
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
- int cq_num, u8 *status);
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
- int cq_num, u8 *status);
-int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
- int is_ee, void *qp_context, u32 optmask,
- u8 *status);
+ int eq_num);
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num);
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num);
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num);
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num);
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size);
+int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num);
+int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num);
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+ struct mthca_mailbox *mailbox);
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit);
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+ enum ib_qp_state next, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u32 optmask);
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
- void *qp_context, u8 *status);
-int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
- u8 *status);
+ struct mthca_mailbox *mailbox);
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn);
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
- int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
- void *in_mad, void *response_mad, u8 *status);
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status);
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status);
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
- u8 *status);
-int mthca_NOP(struct mthca_dev *dev, u8 *status);
-
-#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad);
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox);
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox);
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ u16 *hash);
+int mthca_NOP(struct mthca_dev *dev);
#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
index b4bfbbfe2c3..155bc66395b 100644
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,15 +29,11 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_CONFIG_REG_H
#define MTHCA_CONFIG_REG_H
-#include <asm/page.h>
-
#define MTHCA_HCR_BASE 0x80680
#define MTHCA_HCR_SIZE 0x0001c
#define MTHCA_ECR_BASE 0x80700
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 2bf347b84c3..40ba8333815 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,5 +1,9 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,14 +32,15 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
*/
-#include <linux/init.h>
+#include <linux/gfp.h>
#include <linux/hardirq.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
-#include <ib_pack.h>
+#include <rdma/ib_pack.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -49,25 +54,29 @@ enum {
MTHCA_CQ_ENTRY_SIZE = 0x20
};
+enum {
+ MTHCA_ATOMIC_BYTE_LEN = 8
+};
+
/*
* Must be packed because start is 64 bits but only aligned to 32 bits.
*/
struct mthca_cq_context {
- u32 flags;
- u64 start;
- u32 logsize_usrpage;
- u32 error_eqn; /* Tavor only */
- u32 comp_eqn;
- u32 pd;
- u32 lkey;
- u32 last_notified_index;
- u32 solicit_producer_index;
- u32 consumer_index;
- u32 producer_index;
- u32 cqn;
- u32 ci_db; /* Arbel only */
- u32 state_db; /* Arbel only */
- u32 reserved;
+ __be32 flags;
+ __be64 start;
+ __be32 logsize_usrpage;
+ __be32 error_eqn; /* Tavor only */
+ __be32 comp_eqn;
+ __be32 pd;
+ __be32 lkey;
+ __be32 last_notified_index;
+ __be32 solicit_producer_index;
+ __be32 consumer_index;
+ __be32 producer_index;
+ __be32 cqn;
+ __be32 ci_db; /* Arbel only */
+ __be32 state_db; /* Arbel only */
+ u32 reserved;
} __attribute__((packed));
#define MTHCA_CQ_STATUS_OK ( 0 << 28)
@@ -106,31 +115,32 @@ enum {
};
struct mthca_cqe {
- u32 my_qpn;
- u32 my_ee;
- u32 rqpn;
- u16 sl_g_mlpath;
- u16 rlid;
- u32 imm_etype_pkey_eec;
- u32 byte_cnt;
- u32 wqe;
- u8 opcode;
- u8 is_send;
- u8 reserved;
- u8 owner;
+ __be32 my_qpn;
+ __be32 my_ee;
+ __be32 rqpn;
+ u8 sl_ipok;
+ u8 g_mlpath;
+ __be16 rlid;
+ __be32 imm_etype_pkey_eec;
+ __be32 byte_cnt;
+ __be32 wqe;
+ u8 opcode;
+ u8 is_send;
+ u8 reserved;
+ u8 owner;
};
struct mthca_err_cqe {
- u32 my_qpn;
- u32 reserved1[3];
- u8 syndrome;
- u8 reserved2;
- u16 db_cnt;
- u32 reserved3;
- u32 wqe;
- u8 opcode;
- u8 reserved4[2];
- u8 owner;
+ __be32 my_qpn;
+ u32 reserved1[3];
+ u8 syndrome;
+ u8 vendor_err;
+ __be16 db_cnt;
+ u32 reserved2;
+ __be32 wqe;
+ u8 opcode;
+ u8 reserved3[2];
+ u8 owner;
};
#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7)
@@ -146,24 +156,29 @@ struct mthca_err_cqe {
#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
-static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
+ int entry)
{
- if (cq->is_direct)
- return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
+ if (buf->is_direct)
+ return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
else
- return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
}
-static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
+static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+{
+ return get_cqe_from_buf(&cq->buf, entry);
+}
+
+static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
{
- struct mthca_cqe *cqe = get_cqe(cq, i);
return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
}
static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
{
- return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
+ return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
}
static inline void set_cqe_hw(struct mthca_cqe *cqe)
@@ -171,6 +186,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe)
cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
}
+static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
+{
+ __be32 *cqe = cqe_ptr;
+
+ (void) cqe; /* avoid warning if mthca_dbg compiled away... */
+ mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
+ be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
+ be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
+}
+
/*
* incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
* should be correct before calling update_cons_index().
@@ -178,22 +204,22 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe)
static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
int incr)
{
- u32 doorbell[2];
-
if (mthca_is_memfree(dev)) {
*cq->set_ci_db = cpu_to_be32(cq->cons_index);
wmb();
} else {
- doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
- doorbell[1] = cpu_to_be32(incr - 1);
-
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
dev->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ /*
+ * Make sure doorbells don't leak out of CQ spinlock
+ * and reach the HCA out of order:
+ */
+ mmiowb();
}
}
-void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
+void mthca_cq_completion(struct mthca_dev *dev, u32 cqn)
{
struct mthca_cq *cq;
@@ -209,21 +235,52 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn)
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
+ enum ib_event_type event_type)
{
struct mthca_cq *cq;
- struct mthca_cqe *cqe;
- int prod_index;
- int nfreed = 0;
+ struct ib_event event;
+
+ spin_lock(&dev->cq_table.lock);
- spin_lock_irq(&dev->cq_table.lock);
cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
if (cq)
- atomic_inc(&cq->refcount);
- spin_unlock_irq(&dev->cq_table.lock);
+ ++cq->refcount;
+
+ spin_unlock(&dev->cq_table.lock);
- if (!cq)
+ if (!cq) {
+ mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn);
return;
+ }
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.cq = &cq->ibcq;
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&event, cq->ibcq.cq_context);
+
+ spin_lock(&dev->cq_table.lock);
+ if (!--cq->refcount)
+ wake_up(&cq->wait);
+ spin_unlock(&dev->cq_table.lock);
+}
+
+static inline int is_recv_cqe(struct mthca_cqe *cqe)
+{
+ if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
+ MTHCA_ERROR_CQE_OPCODE_MASK)
+ return !(cqe->opcode & 0x01);
+ else
+ return !(cqe->is_send & 0x80);
+}
+
+void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+ struct mthca_srq *srq)
+{
+ struct mthca_cqe *cqe;
+ u32 prod_index;
+ int i, nfreed = 0;
spin_lock_irq(&cq->lock);
@@ -235,66 +292,107 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn)
* from our QP and therefore don't need to be checked.
*/
for (prod_index = cq->cons_index;
- cqe_sw(cq, prod_index & cq->ibcq.cqe);
+ cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
++prod_index)
if (prod_index == cq->cons_index + cq->ibcq.cqe)
break;
if (0)
mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
- qpn, cqn, cq->cons_index, prod_index);
+ qpn, cq->cqn, cq->cons_index, prod_index);
/*
* Now sweep backwards through the CQ, removing CQ entries
* that match our QP by copying older entries on top of them.
*/
- while (prod_index > cq->cons_index) {
- cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe);
- if (cqe->my_qpn == cpu_to_be32(qpn))
+ while ((int) --prod_index - (int) cq->cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ if (cqe->my_qpn == cpu_to_be32(qpn)) {
+ if (srq && is_recv_cqe(cqe))
+ mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
++nfreed;
- else if (nfreed)
- memcpy(get_cqe(cq, (prod_index - 1 + nfreed) &
- cq->ibcq.cqe),
- cqe,
- MTHCA_CQ_ENTRY_SIZE);
- --prod_index;
+ } else if (nfreed)
+ memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
+ cqe, MTHCA_CQ_ENTRY_SIZE);
}
if (nfreed) {
+ for (i = 0; i < nfreed; ++i)
+ set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
wmb();
cq->cons_index += nfreed;
update_cons_index(dev, cq, nfreed);
}
spin_unlock_irq(&cq->lock);
- if (atomic_dec_and_test(&cq->refcount))
- wake_up(&cq->wait);
}
-static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
- struct mthca_qp *qp, int wqe_index, int is_send,
- struct mthca_err_cqe *cqe,
- struct ib_wc *entry, int *free_cqe)
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
{
- int err;
- int dbd;
- u32 new_wqe;
+ int i;
- if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) {
- int j;
+ /*
+ * In Tavor mode, the hardware keeps the consumer and producer
+ * indices mod the CQ size. Since we might be making the CQ
+ * bigger, we need to deal with the case where the producer
+ * index wrapped around before the CQ was resized.
+ */
+ if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
+ cq->ibcq.cqe < cq->resize_buf->cqe) {
+ cq->cons_index &= cq->ibcq.cqe;
+ if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
+ cq->cons_index -= cq->ibcq.cqe + 1;
+ }
- mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n",
- cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
- be32_to_cpu(cqe->wqe));
+ for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
+ memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
+ i & cq->resize_buf->cqe),
+ get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
+}
+
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
+{
+ int ret;
+ int i;
+
+ ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
+ MTHCA_MAX_DIRECT_CQ_SIZE,
+ &buf->queue, &buf->is_direct,
+ &dev->driver_pd, 1, &buf->mr);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe_from_buf(buf, i));
- for (j = 0; j < 8; ++j)
- printk(KERN_DEBUG " [%2x] %08x\n",
- j * 4, be32_to_cpu(((u32 *) cqe)[j]));
+ return 0;
+}
+
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
+{
+ mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
+ buf->is_direct, &buf->mr);
+}
+
+static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
+ struct mthca_qp *qp, int wqe_index, int is_send,
+ struct mthca_err_cqe *cqe,
+ struct ib_wc *entry, int *free_cqe)
+{
+ int dbd;
+ __be32 new_wqe;
+
+ if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
+ mthca_dbg(dev, "local QP operation err "
+ "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
+ be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
+ cq->cqn, cq->cons_index);
+ dump_cqe(dev, cqe);
}
/*
- * For completions in error, only work request ID, status (and
- * freed resource count for RD) have to be set.
+ * For completions in error, only work request ID, status, vendor error
+ * (and freed resource count for RD) have to be set.
*/
switch (cqe->syndrome) {
case SYNDROME_LOCAL_LENGTH_ERR:
@@ -356,9 +454,16 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
break;
}
- err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
- if (err)
- return err;
+ entry->vendor_err = cqe->vendor_err;
+
+ /*
+ * Mem-free HCAs always generate one CQE per WQE, even in the
+ * error case, so we don't have to check the doorbell count, etc.
+ */
+ if (mthca_is_memfree(dev))
+ return;
+
+ mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
/*
* If we're at the end of the WQE chain, or we've used up our
@@ -366,24 +471,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
* the next poll operation.
*/
if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
- return 0;
+ return;
- cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
+ be16_add_cpu(&cqe->db_cnt, -dbd);
cqe->wqe = new_wqe;
cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
*free_cqe = 0;
-
- return 0;
-}
-
-static void dump_cqe(struct mthca_cqe *cqe)
-{
- int j;
-
- for (j = 0; j < 8; ++j)
- printk(KERN_DEBUG " [%2x] %08x\n",
- j * 4, be32_to_cpu(((u32 *) cqe)[j]));
}
static inline int mthca_poll_one(struct mthca_dev *dev,
@@ -399,6 +493,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
int is_send;
int free_cqe = 1;
int err = 0;
+ u16 checksum;
cqe = next_cqe_sw(cq);
if (!cqe)
@@ -414,8 +509,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
be32_to_cpu(cqe->wqe));
-
- dump_cqe(cqe);
+ dump_cqe(dev, cqe);
}
is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
@@ -439,7 +533,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
}
}
- entry->qp_num = (*cur_qp)->qpn;
+ entry->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
@@ -447,28 +541,41 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
>> wq->wqe_shift);
entry->wr_id = (*cur_qp)->wrid[wqe_index +
(*cur_qp)->rq.max];
+ } else if ((*cur_qp)->ibqp.srq) {
+ struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq);
+ u32 wqe = be32_to_cpu(cqe->wqe);
+ wq = NULL;
+ wqe_index = wqe >> srq->wqe_shift;
+ entry->wr_id = srq->wrid[wqe_index];
+ mthca_free_srq_wqe(srq, wqe);
} else {
+ s32 wqe;
wq = &(*cur_qp)->rq;
- wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift;
+ wqe = be32_to_cpu(cqe->wqe);
+ wqe_index = wqe >> wq->wqe_shift;
+ /*
+ * WQE addr == base - 1 might be reported in receive completion
+ * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+ * Arbel FW 5.1.400. This bug should be fixed in later FW revs.
+ */
+ if (unlikely(wqe_index < 0))
+ wqe_index = wq->max - 1;
entry->wr_id = (*cur_qp)->wrid[wqe_index];
}
- if (wq->last_comp < wqe_index)
- wq->tail += wqe_index - wq->last_comp;
- else
- wq->tail += wqe_index + wq->max - wq->last_comp;
-
- wq->last_comp = wqe_index;
+ if (wq) {
+ if (wq->last_comp < wqe_index)
+ wq->tail += wqe_index - wq->last_comp;
+ else
+ wq->tail += wqe_index + wq->max - wq->last_comp;
- if (0)
- mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n",
- is_send ? "Send" : "Receive",
- (*cur_qp)->qpn, wqe_index, wq->max);
+ wq->last_comp = wqe_index;
+ }
if (is_error) {
- err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
- (struct mthca_err_cqe *) cqe,
- entry, &free_cqe);
+ handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
+ (struct mthca_err_cqe *) cqe,
+ entry, &free_cqe);
goto out;
}
@@ -495,11 +602,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
break;
case MTHCA_OPCODE_ATOMIC_CS:
entry->opcode = IB_WC_COMP_SWAP;
- entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
case MTHCA_OPCODE_ATOMIC_FA:
entry->opcode = IB_WC_FETCH_ADD;
- entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
case MTHCA_OPCODE_BIND_MW:
entry->opcode = IB_WC_BIND_MW;
@@ -514,13 +621,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
entry->wc_flags = IB_WC_WITH_IMM;
- entry->imm_data = cqe->imm_etype_pkey_eec;
+ entry->ex.imm_data = cqe->imm_etype_pkey_eec;
entry->opcode = IB_WC_RECV;
break;
case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
entry->wc_flags = IB_WC_WITH_IMM;
- entry->imm_data = cqe->imm_etype_pkey_eec;
+ entry->ex.imm_data = cqe->imm_etype_pkey_eec;
entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
break;
default:
@@ -529,12 +636,15 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
break;
}
entry->slid = be16_to_cpu(cqe->rlid);
- entry->sl = be16_to_cpu(cqe->sl_g_mlpath) >> 12;
+ entry->sl = cqe->sl_ipok >> 4;
entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff;
- entry->dlid_path_bits = be16_to_cpu(cqe->sl_g_mlpath) & 0x7f;
+ entry->dlid_path_bits = cqe->g_mlpath & 0x7f;
entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
- entry->wc_flags |= be16_to_cpu(cqe->sl_g_mlpath) & 0x80 ?
- IB_WC_GRH : 0;
+ entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
+ checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
+ ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
+ entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ?
+ IB_WC_IP_CSUM_OK : 0;
}
entry->status = IB_WC_SUCCESS;
@@ -562,11 +672,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
spin_lock_irqsave(&cq->lock, flags);
- for (npolled = 0; npolled < num_entries; ++npolled) {
+ npolled = 0;
+repoll:
+ while (npolled < num_entries) {
err = mthca_poll_one(dev, cq, &qp,
&freed, entry + npolled);
if (err)
break;
+ ++npolled;
}
if (freed) {
@@ -574,43 +687,73 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
update_cons_index(dev, cq, freed);
}
+ /*
+ * If a CQ resize is in progress and we discovered that the
+ * old buffer is empty, then peek in the new buffer, and if
+ * it's not empty, switch to the new buffer and continue
+ * polling there.
+ */
+ if (unlikely(err == -EAGAIN && cq->resize_buf &&
+ cq->resize_buf->state == CQ_RESIZE_READY)) {
+ /*
+ * In Tavor mode, the hardware keeps the producer
+ * index modulo the CQ size. Since we might be making
+ * the CQ bigger, we need to mask our consumer index
+ * using the size of the old CQ buffer before looking
+ * in the new CQ buffer.
+ */
+ if (!mthca_is_memfree(dev))
+ cq->cons_index &= cq->ibcq.cqe;
+
+ if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
+ cq->cons_index & cq->resize_buf->cqe))) {
+ struct mthca_cq_buf tbuf;
+ int tcqe;
+
+ tbuf = cq->buf;
+ tcqe = cq->ibcq.cqe;
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+
+ cq->resize_buf->buf = tbuf;
+ cq->resize_buf->cqe = tcqe;
+ cq->resize_buf->state = CQ_RESIZE_SWAPPED;
+
+ goto repoll;
+ }
+ }
+
spin_unlock_irqrestore(&cq->lock, flags);
return err == 0 || err == -EAGAIN ? npolled : err;
}
-int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
{
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
- MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
- MTHCA_TAVOR_CQ_DB_REQ_NOT) |
- to_mcq(cq)->cqn);
- doorbell[1] = 0xffffffff;
+ u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
+ MTHCA_TAVOR_CQ_DB_REQ_NOT) |
+ to_mcq(cq)->cqn;
- mthca_write64(doorbell,
- to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
+ mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
return 0;
}
-int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mthca_cq *cq = to_mcq(ibcq);
- u32 doorbell[2];
- u32 sn;
- u32 ci;
+ __be32 db_rec[2];
+ u32 dbhi;
+ u32 sn = cq->arm_sn & 3;
- sn = cq->arm_sn & 3;
- ci = cpu_to_be32(cq->cons_index);
+ db_rec[0] = cpu_to_be32(cq->cons_index);
+ db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
+ ((flags & IB_CQ_SOLICITED_MASK) ==
+ IB_CQ_SOLICITED ? 1 : 2));
- doorbell[0] = ci;
- doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
- (notify == IB_CQ_SOLICITED ? 1 : 2));
-
- mthca_write_db_rec(doorbell, cq->arm_db);
+ mthca_write_db_rec(db_rec, cq->arm_db);
/*
* Make sure that the doorbell record in host memory is
@@ -618,197 +761,85 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
*/
wmb();
- doorbell[0] = cpu_to_be32((sn << 28) |
- (notify == IB_CQ_SOLICITED ?
- MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
- MTHCA_ARBEL_CQ_DB_REQ_NOT) |
- cq->cqn);
- doorbell[1] = ci;
+ dbhi = (sn << 28) |
+ ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
+ MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
- mthca_write64(doorbell,
+ mthca_write64(dbhi, cq->cons_index,
to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
return 0;
}
-static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
-{
- int i;
- int size;
-
- if (cq->is_direct)
- pci_free_consistent(dev->pdev,
- (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
- cq->queue.direct.buf,
- pci_unmap_addr(&cq->queue.direct,
- mapping));
- else {
- size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
- for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
- if (cq->queue.page_list[i].buf)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- cq->queue.page_list[i].buf,
- pci_unmap_addr(&cq->queue.page_list[i],
- mapping));
-
- kfree(cq->queue.page_list);
- }
-}
-
-static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
- struct mthca_cq *cq)
-{
- int err = -ENOMEM;
- int npages, shift;
- u64 *dma_list = NULL;
- dma_addr_t t;
- int i;
-
- if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) {
- cq->is_direct = 1;
- npages = 1;
- shift = get_order(size) + PAGE_SHIFT;
-
- cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
- size, &t);
- if (!cq->queue.direct.buf)
- return -ENOMEM;
-
- pci_unmap_addr_set(&cq->queue.direct, mapping, t);
-
- memset(cq->queue.direct.buf, 0, size);
-
- while (t & ((1 << shift) - 1)) {
- --shift;
- npages *= 2;
- }
-
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
- if (!dma_list)
- goto err_free;
-
- for (i = 0; i < npages; ++i)
- dma_list[i] = t + i * (1 << shift);
- } else {
- cq->is_direct = 0;
- npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
- shift = PAGE_SHIFT;
-
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
- if (!dma_list)
- return -ENOMEM;
-
- cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list,
- GFP_KERNEL);
- if (!cq->queue.page_list)
- goto err_out;
-
- for (i = 0; i < npages; ++i)
- cq->queue.page_list[i].buf = NULL;
-
- for (i = 0; i < npages; ++i) {
- cq->queue.page_list[i].buf =
- pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
- if (!cq->queue.page_list[i].buf)
- goto err_free;
-
- dma_list[i] = t;
- pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t);
-
- memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE);
- }
- }
-
- err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
- dma_list, shift, npages,
- 0, size,
- MTHCA_MPT_FLAG_LOCAL_WRITE |
- MTHCA_MPT_FLAG_LOCAL_READ,
- &cq->mr);
- if (err)
- goto err_free;
-
- kfree(dma_list);
-
- return 0;
-
-err_free:
- mthca_free_cq_buf(dev, cq);
-
-err_out:
- kfree(dma_list);
-
- return err;
-}
-
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
- int size = nent * MTHCA_CQ_ENTRY_SIZE;
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
struct mthca_cq_context *cq_context;
int err = -ENOMEM;
- u8 status;
- int i;
-
- might_sleep();
- cq->ibcq.cqe = nent - 1;
+ cq->ibcq.cqe = nent - 1;
+ cq->is_kernel = !ctx;
cq->cqn = mthca_alloc(&dev->cq_table.alloc);
if (cq->cqn == -1)
return -ENOMEM;
if (mthca_is_memfree(dev)) {
- cq->arm_sn = 1;
-
err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
if (err)
goto err_out;
- err = -ENOMEM;
+ if (cq->is_kernel) {
+ cq->arm_sn = 1;
- cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
- cq->cqn, &cq->set_ci_db);
- if (cq->set_ci_db_index < 0)
- goto err_out_icm;
+ err = -ENOMEM;
- cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
- cq->cqn, &cq->arm_db);
- if (cq->arm_db_index < 0)
- goto err_out_ci;
- }
+ cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+ cq->cqn, &cq->set_ci_db);
+ if (cq->set_ci_db_index < 0)
+ goto err_out_icm;
- mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
- goto err_out_mailbox;
+ cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+ cq->cqn, &cq->arm_db);
+ if (cq->arm_db_index < 0)
+ goto err_out_ci;
+ }
+ }
- cq_context = MAILBOX_ALIGN(mailbox);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ goto err_out_arm;
- err = mthca_alloc_cq_buf(dev, size, cq);
- if (err)
- goto err_out_mailbox;
+ cq_context = mailbox->buf;
- for (i = 0; i < nent; ++i)
- set_cqe_hw(get_cqe(cq, i));
+ if (cq->is_kernel) {
+ err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
+ if (err)
+ goto err_out_mailbox;
+ }
spin_lock_init(&cq->lock);
- atomic_set(&cq->refcount, 1);
+ cq->refcount = 1;
init_waitqueue_head(&cq->wait);
+ mutex_init(&cq->mutex);
memset(cq_context, 0, sizeof *cq_context);
cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK |
MTHCA_CQ_STATE_DISARMED |
MTHCA_CQ_FLAG_TR);
- cq_context->start = cpu_to_be64(0);
- cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
- dev->driver_uar.index);
+ cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ if (ctx)
+ cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
+ else
+ cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
- cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num);
- cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
+ cq_context->pd = cpu_to_be32(pdn);
+ cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);
if (mthca_is_memfree(dev)) {
@@ -816,19 +847,12 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context->state_db = cpu_to_be32(cq->arm_db_index);
}
- err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
+ err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn);
if (err) {
mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
goto err_out_free_mr;
}
- if (status) {
- mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_free_mr;
- }
-
spin_lock_irq(&dev->cq_table.lock);
if (mthca_array_set(&dev->cq_table.cq,
cq->cqn & (dev->limits.num_cqs - 1),
@@ -840,22 +864,23 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq->cons_index = 0;
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return 0;
err_out_free_mr:
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
+ if (cq->is_kernel)
+ mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_out_mailbox:
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
- if (mthca_is_memfree(dev))
+err_out_arm:
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
err_out_ci:
- if (mthca_is_memfree(dev))
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
err_out_icm:
@@ -867,35 +892,40 @@ err_out:
return err;
}
+static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq)
+{
+ int c;
+
+ spin_lock_irq(&dev->cq_table.lock);
+ c = cq->refcount;
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ return c;
+}
+
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq)
{
- void *mailbox;
+ struct mthca_mailbox *mailbox;
int err;
- u8 status;
-
- might_sleep();
- mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox) {
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
mthca_warn(dev, "No memory for mailbox to free CQ.\n");
return;
}
- err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status);
+ err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn);
if (err)
mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
- else if (status)
- mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n",
- status);
if (0) {
- u32 *ctx = MAILBOX_ALIGN(mailbox);
+ __be32 *ctx = mailbox->buf;
int j;
printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
- cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
+ cq->cqn, cq->cons_index,
+ cq->is_kernel ? !!next_cqe_sw(cq) : 0);
for (j = 0; j < 16; ++j)
printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
}
@@ -903,6 +933,7 @@ void mthca_free_cq(struct mthca_dev *dev,
spin_lock_irq(&dev->cq_table.lock);
mthca_array_clear(&dev->cq_table.cq,
cq->cqn & (dev->limits.num_cqs - 1));
+ --cq->refcount;
spin_unlock_irq(&dev->cq_table.lock);
if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
@@ -910,23 +941,22 @@ void mthca_free_cq(struct mthca_dev *dev,
else
synchronize_irq(dev->pdev->irq);
- atomic_dec(&cq->refcount);
- wait_event(cq->wait, !atomic_read(&cq->refcount));
+ wait_event(cq->wait, !get_cq_refcount(dev, cq));
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
-
- if (mthca_is_memfree(dev)) {
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
- mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+ if (cq->is_kernel) {
+ mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ }
}
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
mthca_free(&dev->cq_table.alloc, cq->cqn);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
}
-int __devinit mthca_init_cq_table(struct mthca_dev *dev)
+int mthca_init_cq_table(struct mthca_dev *dev)
{
int err;
@@ -947,7 +977,7 @@ int __devinit mthca_init_cq_table(struct mthca_dev *dev)
return err;
}
-void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev)
+void mthca_cleanup_cq_table(struct mthca_dev *dev)
{
mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
mthca_alloc_cleanup(&dev->cq_table.alloc);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e3d79e267dc..7e6a6d64ad4 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,5 +1,9 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,8 +32,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_DEV_H
@@ -39,25 +41,28 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
-#include <asm/semaphore.h>
+#include <linux/timer.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
#include "mthca_provider.h"
#include "mthca_doorbell.h"
#define DRV_NAME "ib_mthca"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "0.06-pre"
-#define DRV_RELDATE "November 8, 2004"
+#define DRV_VERSION "1.0"
+#define DRV_RELDATE "April 4, 2008"
enum {
MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
MTHCA_FLAG_SRQ = 1 << 2,
- MTHCA_FLAG_MSI = 1 << 3,
- MTHCA_FLAG_MSI_X = 1 << 4,
- MTHCA_FLAG_NO_LAM = 1 << 5,
- MTHCA_FLAG_FMR = 1 << 6,
- MTHCA_FLAG_MEMFREE = 1 << 7,
- MTHCA_FLAG_PCIE = 1 << 8
+ MTHCA_FLAG_MSI_X = 1 << 3,
+ MTHCA_FLAG_NO_LAM = 1 << 4,
+ MTHCA_FLAG_FMR = 1 << 5,
+ MTHCA_FLAG_MEMFREE = 1 << 6,
+ MTHCA_FLAG_PCIE = 1 << 7,
+ MTHCA_FLAG_SINAI_OPT = 1 << 8
};
enum {
@@ -65,16 +70,22 @@ enum {
};
enum {
+ MTHCA_BOARD_ID_LEN = 64
+};
+
+enum {
MTHCA_EQ_CONTEXT_SIZE = 0x40,
MTHCA_CQ_CONTEXT_SIZE = 0x40,
MTHCA_QP_CONTEXT_SIZE = 0x200,
MTHCA_RDB_ENTRY_SIZE = 0x20,
MTHCA_AV_SIZE = 0x20,
- MTHCA_MGM_ENTRY_SIZE = 0x40,
+ MTHCA_MGM_ENTRY_SIZE = 0x100,
/* Arbel FW gives us these, but we need them for Tavor */
MTHCA_MPT_ENTRY_SIZE = 0x40,
MTHCA_MTT_SEG_SIZE = 0x40,
+
+ MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
};
enum {
@@ -97,9 +108,18 @@ enum {
MTHCA_OPCODE_INVALID = 0xff
};
+enum {
+ MTHCA_CMD_USE_EVENTS = 1 << 0,
+ MTHCA_CMD_POST_DOORBELLS = 1 << 1
+};
+
+enum {
+ MTHCA_CMD_NUM_DBELL_DWORDS = 8
+};
+
struct mthca_cmd {
- int use_events;
- struct semaphore hcr_sem;
+ struct pci_pool *pool;
+ struct mutex hcr_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
int max_cmds;
@@ -107,6 +127,9 @@ struct mthca_cmd {
int free_head;
struct mthca_cmd_context *context;
u16 token_mask;
+ u32 flags;
+ void __iomem *dbell_map;
+ u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
};
struct mthca_limits {
@@ -119,17 +142,24 @@ struct mthca_limits {
int num_uars;
int max_sg;
int num_qps;
+ int max_wqes;
+ int max_desc_sz;
+ int max_qp_init_rdma;
int reserved_qps;
int num_srqs;
+ int max_srq_wqes;
+ int max_srq_sge;
int reserved_srqs;
int num_eecs;
int reserved_eecs;
int num_cqs;
+ int max_cqes;
int reserved_cqs;
int num_eqs;
int reserved_eqs;
int num_mpts;
int num_mtt_segs;
+ int mtt_seg_size;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
@@ -139,6 +169,10 @@ struct mthca_limits {
int reserved_mcgs;
int num_pds;
int reserved_pds;
+ u32 page_size_cap;
+ u32 flags;
+ u16 stat_rate_support;
+ u8 port_width_cap;
};
struct mthca_alloc {
@@ -169,6 +203,7 @@ struct mthca_pd_table {
struct mthca_buddy {
unsigned long **bits;
+ int *num_free;
int max_order;
spinlock_t lock;
};
@@ -208,6 +243,13 @@ struct mthca_cq_table {
struct mthca_icm_table *table;
};
+struct mthca_srq_table {
+ struct mthca_alloc alloc;
+ spinlock_t lock;
+ struct mthca_array srq;
+ struct mthca_icm_table *table;
+};
+
struct mthca_qp_table {
struct mthca_alloc alloc;
u32 rdb_base;
@@ -229,11 +271,21 @@ struct mthca_av_table {
};
struct mthca_mcg_table {
- struct semaphore sem;
+ struct mutex mutex;
struct mthca_alloc alloc;
struct mthca_icm_table *table;
};
+struct mthca_catas_err {
+ u64 addr;
+ u32 __iomem *map;
+ u32 size;
+ struct timer_list timer;
+ struct list_head list;
+};
+
+extern struct mutex mthca_device_mutex;
+
struct mthca_dev {
struct ib_device ib_dev;
struct pci_dev *pdev;
@@ -243,6 +295,7 @@ struct mthca_dev {
unsigned long device_cap_flags;
u32 rev_id;
+ char board_id[MTHCA_BOARD_ID_LEN];
/* firmware info */
u64 fw_ver;
@@ -265,7 +318,7 @@ struct mthca_dev {
u64 ddr_end;
MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
- struct semaphore cap_mask_mutex;
+ struct mutex cap_mask_mutex;
void __iomem *hcr;
void __iomem *kar;
@@ -288,10 +341,13 @@ struct mthca_dev {
struct mthca_mr_table mr_table;
struct mthca_eq_table eq_table;
struct mthca_cq_table cq_table;
+ struct mthca_srq_table srq_table;
struct mthca_qp_table qp_table;
struct mthca_av_table av_table;
struct mthca_mcg_table mcg_table;
+ struct mthca_catas_err catas_err;
+
struct mthca_uar driver_uar;
struct mthca_db_table *db_tab;
struct mthca_pd driver_pd;
@@ -300,10 +356,25 @@ struct mthca_dev {
struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
spinlock_t sm_lock;
+ u8 rate[MTHCA_MAX_PORTS];
+ bool active;
};
-#define mthca_dbg(mdev, format, arg...) \
- dev_dbg(&mdev->pdev->dev, format, ## arg)
+#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+extern int mthca_debug_level;
+
+#define mthca_dbg(mdev, format, arg...) \
+ do { \
+ if (mthca_debug_level) \
+ dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
+ } while (0)
+
+#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
+#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
+
+#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
#define mthca_err(mdev, format, arg...) \
dev_err(&mdev->pdev->dev, format, ## arg)
#define mthca_info(mdev, format, arg...) \
@@ -318,24 +389,23 @@ extern void __buggy_use_of_MTHCA_PUT(void);
do { \
void *__p = (char *) (source) + (offset); \
switch (sizeof (dest)) { \
- case 1: (dest) = *(u8 *) __p; break; \
- case 2: (dest) = be16_to_cpup(__p); break; \
- case 4: (dest) = be32_to_cpup(__p); break; \
- case 8: (dest) = be64_to_cpup(__p); break; \
- default: __buggy_use_of_MTHCA_GET(); \
+ case 1: (dest) = *(u8 *) __p; break; \
+ case 2: (dest) = be16_to_cpup(__p); break; \
+ case 4: (dest) = be32_to_cpup(__p); break; \
+ case 8: (dest) = be64_to_cpup(__p); break; \
+ default: __buggy_use_of_MTHCA_GET(); \
} \
} while (0)
#define MTHCA_PUT(dest, source, offset) \
do { \
- __typeof__(source) *__p = \
- (__typeof__(source) *) ((char *) (dest) + (offset)); \
+ void *__d = ((char *) (dest) + (offset)); \
switch (sizeof(source)) { \
- case 1: *__p = (source); break; \
- case 2: *__p = cpu_to_be16(source); break; \
- case 4: *__p = cpu_to_be32(source); break; \
- case 8: *__p = cpu_to_be64(source); break; \
- default: __buggy_use_of_MTHCA_PUT(); \
+ case 1: *(u8 *) __d = (source); break; \
+ case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
+ case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
+ case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
+ default: __buggy_use_of_MTHCA_PUT(); \
} \
} while (0)
@@ -351,12 +421,18 @@ int mthca_array_set(struct mthca_array *array, int index, void *value);
void mthca_array_clear(struct mthca_array *array, int index);
int mthca_array_init(struct mthca_array *array, int nent);
void mthca_array_cleanup(struct mthca_array *array, int nent);
+int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
+ union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+ int hca_write, struct mthca_mr *mr);
+void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
+ int is_direct, struct mthca_mr *mr);
int mthca_init_uar_table(struct mthca_dev *dev);
int mthca_init_pd_table(struct mthca_dev *dev);
int mthca_init_mr_table(struct mthca_dev *dev);
int mthca_init_eq_table(struct mthca_dev *dev);
int mthca_init_cq_table(struct mthca_dev *dev);
+int mthca_init_srq_table(struct mthca_dev *dev);
int mthca_init_qp_table(struct mthca_dev *dev);
int mthca_init_av_table(struct mthca_dev *dev);
int mthca_init_mcg_table(struct mthca_dev *dev);
@@ -366,6 +442,7 @@ void mthca_cleanup_pd_table(struct mthca_dev *dev);
void mthca_cleanup_mr_table(struct mthca_dev *dev);
void mthca_cleanup_eq_table(struct mthca_dev *dev);
void mthca_cleanup_cq_table(struct mthca_dev *dev);
+void mthca_cleanup_srq_table(struct mthca_dev *dev);
void mthca_cleanup_qp_table(struct mthca_dev *dev);
void mthca_cleanup_av_table(struct mthca_dev *dev);
void mthca_cleanup_mcg_table(struct mthca_dev *dev);
@@ -373,12 +450,26 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev);
int mthca_register_device(struct mthca_dev *dev);
void mthca_unregister_device(struct mthca_dev *dev);
+void mthca_start_catas_poll(struct mthca_dev *dev);
+void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
+
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
-int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
+int mthca_write_mtt_size(struct mthca_dev *dev);
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len);
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+ u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_mr *mr);
int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
@@ -402,18 +493,43 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev);
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *entry);
-int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
-int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq);
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq);
-void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
-void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn);
+void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
+ enum ib_event_type event_type);
+void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+ struct mthca_srq *srq);
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
+
+int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
+ struct ib_srq_attr *attr, struct mthca_srq *srq);
+void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+int mthca_max_srq_sge(struct mthca_dev *dev);
+void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
+ enum ib_event_type event_type);
+void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
+int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
enum ib_event_type event_type);
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata);
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
@@ -422,20 +538,22 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
- int index, int *dbd, u32 *new_wqe);
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, __be32 *new_wqe);
int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
struct mthca_qp *qp);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp);
@@ -447,6 +565,10 @@ int mthca_create_ah(struct mthca_dev *dev,
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header);
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
+int mthca_ah_grh_present(struct mthca_ah *ah);
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index 821039a4904..14f51ef97d7 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/types.h>
@@ -56,13 +56,13 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
__raw_writeq((__force u64) val, dest);
}
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
+static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock)
{
- __raw_writeq(*(u64 *) val, dest);
+ __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
}
-static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
{
*(u64 *) db = *(u64 *) val;
}
@@ -85,18 +85,21 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
__raw_writel(((__force u32 *) &val)[1], dest + 4);
}
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
+static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock)
{
unsigned long flags;
+ hi = (__force u32) cpu_to_be32(hi);
+ lo = (__force u32) cpu_to_be32(lo);
+
spin_lock_irqsave(doorbell_lock, flags);
- __raw_writel(val[0], dest);
- __raw_writel(val[1], dest + 4);
+ __raw_writel(hi, dest);
+ __raw_writel(lo, dest + 4);
spin_unlock_irqrestore(doorbell_lock, flags);
}
-static inline void mthca_write_db_rec(u32 val[2], u32 *db)
+static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
{
db[0] = val[0];
wmb();
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index f46d615d396..69020173899 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,14 +29,12 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
+#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -44,6 +43,7 @@
enum {
MTHCA_NUM_ASYNC_EQE = 0x80,
MTHCA_NUM_CMD_EQE = 0x80,
+ MTHCA_NUM_SPARE_EQE = 0x80,
MTHCA_EQ_ENTRY_SIZE = 0x20
};
@@ -51,18 +51,18 @@ enum {
* Must be packed because start is 64 bits but only aligned to 32 bits.
*/
struct mthca_eq_context {
- u32 flags;
- u64 start;
- u32 logsize_usrpage;
- u32 tavor_pd; /* reserved for Arbel */
- u8 reserved1[3];
- u8 intr;
- u32 arbel_pd; /* lost_count for Tavor */
- u32 lkey;
- u32 reserved2[2];
- u32 consumer_index;
- u32 producer_index;
- u32 reserved3[4];
+ __be32 flags;
+ __be64 start;
+ __be32 logsize_usrpage;
+ __be32 tavor_pd; /* reserved for Arbel */
+ u8 reserved1[3];
+ u8 intr;
+ __be32 arbel_pd; /* lost_count for Tavor */
+ __be32 lkey;
+ u32 reserved2[2];
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved3[4];
} __attribute__((packed));
#define MTHCA_EQ_STATUS_OK ( 0 << 28)
@@ -82,7 +82,8 @@ enum {
MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
MTHCA_EVENT_TYPE_COMM_EST = 0x02,
MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
- MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14,
MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
@@ -109,8 +110,9 @@ enum {
(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
-#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
- (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
+#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
#define MTHCA_EQ_DB_INC_CI (1 << 24)
@@ -127,28 +129,31 @@ struct mthca_eqe {
union {
u32 raw[6];
struct {
- u32 cqn;
+ __be32 cqn;
} __attribute__((packed)) comp;
struct {
- u16 reserved1;
- u16 token;
- u32 reserved2;
- u8 reserved3[3];
- u8 status;
- u64 out_param;
+ u16 reserved1;
+ __be16 token;
+ u32 reserved2;
+ u8 reserved3[3];
+ u8 status;
+ __be64 out_param;
} __attribute__((packed)) cmd;
struct {
- u32 qpn;
+ __be32 qpn;
} __attribute__((packed)) qp;
struct {
- u32 cqn;
- u32 reserved1;
- u8 reserved2[3];
- u8 syndrome;
+ __be32 srqn;
+ } __attribute__((packed)) srq;
+ struct {
+ __be32 cqn;
+ u32 reserved1;
+ u8 reserved2[3];
+ u8 syndrome;
} __attribute__((packed)) cq_err;
struct {
- u32 reserved1[2];
- u32 port;
+ u32 reserved1[2];
+ __be32 port;
} __attribute__((packed)) port_change;
} event;
u8 reserved3[3];
@@ -167,11 +172,6 @@ static inline u64 async_mask(struct mthca_dev *dev)
static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
- doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
-
/*
* This barrier makes sure that all updates to ownership bits
* done by set_eqe_hw() hit memory before the consumer index
@@ -181,7 +181,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
* having set_eqe_hw() overwrite the owner field.
*/
wmb();
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -190,8 +190,8 @@ static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
{
/* See comment in tavor_set_eq_ci() above. */
wmb();
- __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base +
- eq->eqn * 8);
+ __raw_writel((__force u32) cpu_to_be32(ci),
+ dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
/* We still want ordering, just not swabbing, so add a barrier */
mb();
}
@@ -206,12 +206,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
{
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
- doorbell[1] = 0;
-
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -224,12 +219,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
{
if (!mthca_is_memfree(dev)) {
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
- doorbell[1] = cpu_to_be32(cqn);
-
- mthca_write64(doorbell,
+ mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
@@ -241,9 +231,9 @@ static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
}
-static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
+static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
{
- struct mthca_eqe* eqe;
+ struct mthca_eqe *eqe;
eqe = get_eqe(eq, eq->cons_index);
return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
}
@@ -271,11 +261,10 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
{
struct mthca_eqe *eqe;
int disarm_cqn;
- int eqes_found = 0;
+ int eqes_found = 0;
+ int set_ci = 0;
while ((eqe = next_eqe_sw(eq))) {
- int set_ci = 0;
-
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -286,7 +275,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
case MTHCA_EVENT_TYPE_COMP:
disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
disarm_cq(dev, eq->eqn, disarm_cqn);
- mthca_cq_event(dev, disarm_cqn);
+ mthca_cq_completion(dev, disarm_cqn);
break;
case MTHCA_EVENT_TYPE_PATH_MIG:
@@ -304,6 +293,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
IB_EVENT_SQ_DRAINED);
break;
+ case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_LAST_WQE_REACHED);
+ break;
+
+ case MTHCA_EVENT_TYPE_SRQ_LIMIT:
+ mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+ IB_EVENT_SRQ_LIMIT_REACHED);
+ break;
+
case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_FATAL);
@@ -329,12 +328,6 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
be16_to_cpu(eqe->event.cmd.token),
eqe->event.cmd.status,
be64_to_cpu(eqe->event.cmd.out_param));
- /*
- * cmd_event() may add more commands.
- * The card will think the queue has overflowed if
- * we don't tell it we've been processing events.
- */
- set_ci = 1;
break;
case MTHCA_EVENT_TYPE_PORT_CHANGE:
@@ -348,6 +341,8 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
+ mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
+ IB_EVENT_CQ_ERR);
break;
case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
@@ -362,13 +357,21 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
eqe->type, eqe->subtype, eq->eqn);
break;
- };
+ }
set_eqe_hw(eqe);
++eq->cons_index;
eqes_found = 1;
+ ++set_ci;
- if (unlikely(set_ci)) {
+ /*
+ * The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MTHCA_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
+ if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) {
/*
* Conditional on hca_type is OK here because
* this is a rare case, not the fast path.
@@ -385,7 +388,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
return eqes_found;
}
-static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
{
struct mthca_dev *dev = dev_ptr;
u32 ecr;
@@ -395,24 +398,24 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs
writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
- if (ecr) {
- writel(ecr, dev->eq_regs.tavor.ecr_base +
- MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+ if (!ecr)
+ return IRQ_NONE;
+
+ writel(ecr, dev->eq_regs.tavor.ecr_base +
+ MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
- for (i = 0; i < MTHCA_NUM_EQ; ++i)
- if (ecr & dev->eq_table.eq[i].eqn_mask &&
- mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (ecr & dev->eq_table.eq[i].eqn_mask) {
+ if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
dev->eq_table.eq[i].cons_index);
- tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
- }
- }
+ tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+ }
- return IRQ_RETVAL(ecr);
+ return IRQ_HANDLED;
}
-static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
- struct pt_regs *regs)
+static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
{
struct mthca_eq *eq = eq_ptr;
struct mthca_dev *dev = eq->dev;
@@ -425,7 +428,7 @@ static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
return IRQ_HANDLED;
}
-static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
{
struct mthca_dev *dev = dev_ptr;
int work = 0;
@@ -446,8 +449,7 @@ static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs
return IRQ_RETVAL(work);
}
-static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
- struct pt_regs *regs)
+static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
{
struct mthca_eq *eq = eq_ptr;
struct mthca_dev *dev = eq->dev;
@@ -460,27 +462,22 @@ static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
return IRQ_HANDLED;
}
-static int __devinit mthca_create_eq(struct mthca_dev *dev,
- int nent,
- u8 intr,
- struct mthca_eq *eq)
+static int mthca_create_eq(struct mthca_dev *dev,
+ int nent,
+ u8 intr,
+ struct mthca_eq *eq)
{
- int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
- PAGE_SIZE;
+ int npages;
u64 *dma_list = NULL;
dma_addr_t t;
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
struct mthca_eq_context *eq_context;
int err = -ENOMEM;
int i;
- u8 status;
- /* Make sure EQ size is aligned to a power of 2 size. */
- for (i = 1; i < nent; i <<= 1)
- ; /* nothing */
- nent = i;
-
- eq->dev = dev;
+ eq->dev = dev;
+ eq->nent = roundup_pow_of_two(max(nent, 2));
+ npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@@ -494,30 +491,29 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
if (!dma_list)
goto err_out_free;
- mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
goto err_out_free;
- eq_context = MAILBOX_ALIGN(mailbox);
+ eq_context = mailbox->buf;
for (i = 0; i < npages; ++i) {
- eq->page_list[i].buf = pci_alloc_consistent(dev->pdev,
- PAGE_SIZE, &t);
+ eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
+ PAGE_SIZE, &t, GFP_KERNEL);
if (!eq->page_list[i].buf)
- goto err_out_free;
+ goto err_out_free_pages;
dma_list[i] = t;
- pci_unmap_addr_set(&eq->page_list[i], mapping, t);
+ dma_unmap_addr_set(&eq->page_list[i], mapping, t);
- memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+ clear_page(eq->page_list[i].buf);
}
- for (i = 0; i < nent; ++i)
+ for (i = 0; i < eq->nent; ++i)
set_eqe_hw(get_eqe(eq, i));
eq->eqn = mthca_alloc(&dev->eq_table.alloc);
if (eq->eqn == -1)
- goto err_out_free;
+ goto err_out_free_pages;
err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
dma_list, PAGE_SHIFT, npages,
@@ -528,8 +524,6 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
if (err)
goto err_out_free_eq;
- eq->nent = nent;
-
memset(eq_context, 0, sizeof *eq_context);
eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK |
MTHCA_EQ_OWNER_HW |
@@ -538,7 +532,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
if (mthca_is_memfree(dev))
eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
- eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
if (mthca_is_memfree(dev)) {
eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
} else {
@@ -548,20 +542,14 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
eq_context->intr = intr;
eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
- err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status);
+ err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn);
if (err) {
- mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
- goto err_out_free_mr;
- }
- if (status) {
- mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
- status);
- err = -EINVAL;
+ mthca_warn(dev, "SW2HW_EQ returned %d\n", err);
goto err_out_free_mr;
}
kfree(dma_list);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
eq->eqn_mask = swab32(1 << eq->eqn);
eq->cons_index = 0;
@@ -569,7 +557,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
dev->eq_table.arm_mask |= eq->eqn_mask;
mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
- eq->eqn, nent);
+ eq->eqn, eq->nent);
return err;
@@ -579,17 +567,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
err_out_free_eq:
mthca_free(&dev->eq_table.alloc, eq->eqn);
- err_out_free:
+ err_out_free_pages:
for (i = 0; i < npages; ++i)
if (eq->page_list[i].buf)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- eq->page_list[i].buf,
- pci_unmap_addr(&eq->page_list[i],
- mapping));
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ dma_unmap_addr(&eq->page_list[i],
+ mapping));
+
+ mthca_free_mailbox(dev, mailbox);
+ err_out_free:
kfree(eq->page_list);
kfree(dma_list);
- kfree(mailbox);
err_out:
return err;
@@ -598,25 +588,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
static void mthca_free_eq(struct mthca_dev *dev,
struct mthca_eq *eq)
{
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
int err;
- u8 status;
int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
PAGE_SIZE;
int i;
- mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
return;
- err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox),
- eq->eqn, &status);
+ err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn);
if (err)
- mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
- if (status)
- mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n",
- status);
+ mthca_warn(dev, "HW2SW_EQ returned %d\n", err);
dev->eq_table.arm_mask &= ~eq->eqn_mask;
@@ -625,7 +609,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4));
+ printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
if ((i + 1) % 4 == 0)
printk("\n");
}
@@ -635,10 +619,10 @@ static void mthca_free_eq(struct mthca_dev *dev,
for (i = 0; i < npages; ++i)
pci_free_consistent(dev->pdev, PAGE_SIZE,
eq->page_list[i].buf,
- pci_unmap_addr(&eq->page_list[i], mapping));
+ dma_unmap_addr(&eq->page_list[i], mapping));
kfree(eq->page_list);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
}
static void mthca_free_irqs(struct mthca_dev *dev)
@@ -648,44 +632,28 @@ static void mthca_free_irqs(struct mthca_dev *dev)
if (dev->eq_table.have_irq)
free_irq(dev->pdev->irq, dev);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
- if (dev->eq_table.eq[i].have_irq)
+ if (dev->eq_table.eq[i].have_irq) {
free_irq(dev->eq_table.eq[i].msi_x_vector,
dev->eq_table.eq + i);
+ dev->eq_table.eq[i].have_irq = 0;
+ }
}
-static int __devinit mthca_map_reg(struct mthca_dev *dev,
- unsigned long offset, unsigned long size,
- void __iomem **map)
+static int mthca_map_reg(struct mthca_dev *dev,
+ unsigned long offset, unsigned long size,
+ void __iomem **map)
{
- unsigned long base = pci_resource_start(dev->pdev, 0);
-
- if (!request_mem_region(base + offset, size, DRV_NAME))
- return -EBUSY;
+ phys_addr_t base = pci_resource_start(dev->pdev, 0);
*map = ioremap(base + offset, size);
- if (!*map) {
- release_mem_region(base + offset, size);
+ if (!*map)
return -ENOMEM;
- }
return 0;
}
-static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
- unsigned long size, void __iomem *map)
+static int mthca_map_eq_regs(struct mthca_dev *dev)
{
- unsigned long base = pci_resource_start(dev->pdev, 0);
-
- release_mem_region(base + offset, size);
- iounmap(map);
-}
-
-static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
-{
- unsigned long mthca_base;
-
- mthca_base = pci_resource_start(dev->pdev, 0);
-
if (mthca_is_memfree(dev)) {
/*
* We assume that the EQ arm and EQ set CI registers
@@ -709,11 +677,8 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.eq_arm_base) + 4, 4,
&dev->eq_regs.arbel.eq_arm)) {
- mthca_err(dev, "Couldn't map interrupt clear register, "
- "aborting.\n");
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
+ iounmap(dev->clr_base);
return -ENOMEM;
}
@@ -721,14 +686,9 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
dev->fw.arbel.eq_set_ci_base,
MTHCA_EQ_SET_CI_SIZE,
&dev->eq_regs.arbel.eq_set_ci_base)) {
- mthca_err(dev, "Couldn't map interrupt clear register, "
- "aborting.\n");
- mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.eq_arm_base) + 4, 4,
- dev->eq_regs.arbel.eq_arm);
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
+ iounmap(dev->eq_regs.arbel.eq_arm);
+ iounmap(dev->clr_base);
return -ENOMEM;
}
} else {
@@ -744,8 +704,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
&dev->eq_regs.tavor.ecr_base)) {
mthca_err(dev, "Couldn't map ecr register, "
"aborting.\n");
- mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->clr_base);
return -ENOMEM;
}
}
@@ -754,32 +713,21 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
}
-static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
+static void mthca_unmap_eq_regs(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev)) {
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.eq_set_ci_base,
- MTHCA_EQ_SET_CI_SIZE,
- dev->eq_regs.arbel.eq_set_ci_base);
- mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.eq_arm_base) + 4, 4,
- dev->eq_regs.arbel.eq_arm);
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->eq_regs.arbel.eq_set_ci_base);
+ iounmap(dev->eq_regs.arbel.eq_arm);
+ iounmap(dev->clr_base);
} else {
- mthca_unmap_reg(dev, MTHCA_ECR_BASE,
- MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
- dev->eq_regs.tavor.ecr_base);
- mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->eq_regs.tavor.ecr_base);
+ iounmap(dev->clr_base);
}
}
-int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
+int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
{
int ret;
- u8 status;
/*
* We assume that mapping one page is enough for the whole EQ
@@ -793,14 +741,12 @@ int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
return -ENOMEM;
dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+ if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
__free_page(dev->eq_table.icm_page);
return -ENOMEM;
}
- ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
- if (!ret && status)
- ret = -EINVAL;
+ ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt);
if (ret) {
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
@@ -810,20 +756,17 @@ int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
return ret;
}
-void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
+void mthca_unmap_eq_icm(struct mthca_dev *dev)
{
- u8 status;
-
- mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
+ mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1);
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
}
-int __devinit mthca_init_eq_table(struct mthca_dev *dev)
+int mthca_init_eq_table(struct mthca_dev *dev)
{
int err;
- u8 status;
u8 intr;
int i;
@@ -838,34 +781,32 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev)
if (err)
goto err_out_free;
- if (dev->mthca_flags & MTHCA_FLAG_MSI ||
- dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
dev->eq_table.clr_mask = 0;
} else {
dev->eq_table.clr_mask =
swab32(1 << (dev->eq_table.inta_pin & 31));
dev->eq_table.clr_int = dev->clr_base +
- (dev->eq_table.inta_pin < 31 ? 4 : 0);
+ (dev->eq_table.inta_pin < 32 ? 4 : 0);
}
dev->eq_table.arm_mask = 0;
- intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ?
- 128 : dev->eq_table.inta_pin;
+ intr = dev->eq_table.inta_pin;
- err = mthca_create_eq(dev, dev->limits.num_cqs,
+ err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
(dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
&dev->eq_table.eq[MTHCA_EQ_COMP]);
if (err)
goto err_out_unmap;
- err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE,
+ err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
(dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
&dev->eq_table.eq[MTHCA_EQ_ASYNC]);
if (err)
goto err_out_comp;
- err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE,
+ err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
(dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
&dev->eq_table.eq[MTHCA_EQ_CMD]);
if (err)
@@ -873,51 +814,52 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev)
if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
static const char *eq_name[] = {
- [MTHCA_EQ_COMP] = DRV_NAME " (comp)",
- [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
- [MTHCA_EQ_CMD] = DRV_NAME " (cmd)"
+ [MTHCA_EQ_COMP] = DRV_NAME "-comp",
+ [MTHCA_EQ_ASYNC] = DRV_NAME "-async",
+ [MTHCA_EQ_CMD] = DRV_NAME "-cmd"
};
for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+ snprintf(dev->eq_table.eq[i].irq_name,
+ IB_DEVICE_NAME_MAX,
+ "%s@pci:%s", eq_name[i],
+ pci_name(dev->pdev));
err = request_irq(dev->eq_table.eq[i].msi_x_vector,
mthca_is_memfree(dev) ?
mthca_arbel_msi_x_interrupt :
mthca_tavor_msi_x_interrupt,
- 0, eq_name[i], dev->eq_table.eq + i);
+ 0, dev->eq_table.eq[i].irq_name,
+ dev->eq_table.eq + i);
if (err)
goto err_out_cmd;
dev->eq_table.eq[i].have_irq = 1;
}
} else {
+ snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
+ DRV_NAME "@pci:%s", pci_name(dev->pdev));
err = request_irq(dev->pdev->irq,
mthca_is_memfree(dev) ?
mthca_arbel_interrupt :
mthca_tavor_interrupt,
- SA_SHIRQ, DRV_NAME, dev);
+ IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
if (err)
goto err_out_cmd;
dev->eq_table.have_irq = 1;
}
err = mthca_MAP_EQ(dev, async_mask(dev),
- 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
if (err)
mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
- if (status)
- mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
- dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
- 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+ 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
if (err)
mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
- if (status)
- mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
- dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
- for (i = 0; i < MTHCA_EQ_CMD; ++i)
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (mthca_is_memfree(dev))
arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
else
@@ -943,17 +885,16 @@ err_out_free:
return err;
}
-void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev)
+void mthca_cleanup_eq_table(struct mthca_dev *dev)
{
- u8 status;
int i;
mthca_free_irqs(dev);
mthca_MAP_EQ(dev, async_mask(dev),
- 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
- 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+ 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
mthca_free_eq(dev, &dev->eq_table.eq[i]);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 7df22364201..b6f7f457fc5 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,13 +30,14 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <ib_verbs.h>
-#include <ib_mad.h>
-#include <ib_smi.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -44,10 +47,29 @@ enum {
MTHCA_VENDOR_CLASS2 = 0xa
};
-struct mthca_trap_mad {
- struct ib_mad *mad;
- DECLARE_PCI_UNMAP_ADDR(mapping)
-};
+static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
+{
+ struct ib_port_attr *tprops = NULL;
+ int ret;
+
+ tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+ if (!tprops)
+ return -ENOMEM;
+
+ ret = ib_query_port(&dev->ib_dev, port_num, tprops);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
+ ret, dev->ib_dev.name, port_num);
+ goto out;
+ }
+
+ dev->rate[port_num - 1] = tprops->active_speed *
+ ib_width_enum_to_int(tprops->active_width);
+
+out:
+ kfree(tprops);
+ return ret;
+}
static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
@@ -82,7 +104,8 @@ static void update_sm_ah(struct mthca_dev *dev,
*/
static void smp_snoop(struct ib_device *ibdev,
u8 port_num,
- struct ib_mad *mad)
+ struct ib_mad *mad,
+ u16 prev_lid)
{
struct ib_event event;
@@ -90,14 +113,27 @@ static void smp_snoop(struct ib_device *ibdev,
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+ struct ib_port_info *pinfo =
+ (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ u16 lid = be16_to_cpu(pinfo->lid);
+
+ mthca_update_rate(to_mdev(ibdev), port_num);
update_sm_ah(to_mdev(ibdev), port_num,
- be16_to_cpup((__be16 *) (mad->data + 58)),
- (*(u8 *) (mad->data + 76)) & 0xf);
+ be16_to_cpu(pinfo->sm_lid),
+ pinfo->neighbormtu_mastersmsl & 0xf);
event.device = ibdev;
- event.event = IB_EVENT_LID_CHANGE;
event.element.port_num = port_num;
- ib_dispatch_event(&event);
+
+ if (pinfo->clientrereg_resv_subnetto & 0x80) {
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ ib_dispatch_event(&event);
+ }
+
+ if (prev_lid != lid) {
+ event.event = IB_EVENT_LID_CHANGE;
+ ib_dispatch_event(&event);
+ }
}
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
@@ -109,54 +145,34 @@ static void smp_snoop(struct ib_device *ibdev,
}
}
+static void node_desc_override(struct ib_device *dev,
+ struct ib_mad *mad)
+{
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
+ mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
+ mutex_lock(&to_mdev(dev)->cap_mask_mutex);
+ memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+ mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
+ }
+}
+
static void forward_trap(struct mthca_dev *dev,
u8 port_num,
struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
- struct mthca_trap_mad *tmad;
- struct ib_sge gather_list;
- struct ib_send_wr *bad_wr, wr = {
- .opcode = IB_WR_SEND,
- .sg_list = &gather_list,
- .num_sge = 1,
- .send_flags = IB_SEND_SIGNALED,
- .wr = {
- .ud = {
- .remote_qpn = qpn,
- .remote_qkey = qpn ? IB_QP1_QKEY : 0,
- .timeout_ms = 0
- }
- }
- };
+ struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
unsigned long flags;
if (agent) {
- tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
- if (!tmad)
- return;
-
- tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
- if (!tmad->mad) {
- kfree(tmad);
+ send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
return;
- }
-
- memcpy(tmad->mad, mad, sizeof *mad);
-
- wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
- wr.wr_id = (unsigned long) tmad;
-
- gather_list.addr = dma_map_single(agent->device->dma_device,
- tmad->mad,
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- gather_list.length = sizeof *tmad->mad;
- gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
- pci_unmap_addr_set(tmad, mapping, gather_list.addr);
-
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
@@ -164,21 +180,15 @@ static void forward_trap(struct mthca_dev *dev,
* it's OK for our devices).
*/
spin_lock_irqsave(&dev->sm_lock, flags);
- wr.wr.ud.ah = dev->sm_ah[port_num - 1];
- if (wr.wr.ud.ah)
- ret = ib_post_send_mad(agent, &wr, &bad_wr);
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+ ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
spin_unlock_irqrestore(&dev->sm_lock, flags);
- if (ret) {
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(tmad, mapping),
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- kfree(tmad->mad);
- kfree(tmad);
- }
+ if (ret)
+ ib_free_send_mad(send_buf);
}
}
@@ -191,8 +201,9 @@ int mthca_process_mad(struct ib_device *ibdev,
struct ib_mad *out_mad)
{
int err;
- u8 status;
- u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE;
+ u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+ u16 prev_lid = 0;
+ struct ib_port_attr pattr;
/* Forward locally generated traps to the SM */
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
@@ -230,26 +241,28 @@ int mthca_process_mad(struct ib_device *ibdev,
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
+ if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+ !ib_query_port(ibdev, port_num, &pattr))
+ prev_lid = pattr.lid;
err = mthca_MAD_IFC(to_mdev(ibdev),
mad_flags & IB_MAD_IGNORE_MKEY,
mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad,
- &status);
- if (err) {
- mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");
- return IB_MAD_RESULT_FAILURE;
- }
- if (status == MTHCA_CMD_STAT_BAD_PKT)
+ port_num, in_wc, in_grh, in_mad, out_mad);
+ if (err == -EBADMSG)
return IB_MAD_RESULT_SUCCESS;
- if (status) {
- mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n",
- status);
+ else if (err) {
+ mthca_err(to_mdev(ibdev), "MAD_IFC returned %d\n", err);
return IB_MAD_RESULT_FAILURE;
}
- if (!out_mad->mad_hdr.status)
- smp_snoop(ibdev, port_num, in_mad);
+ if (!out_mad->mad_hdr.status) {
+ smp_snoop(ibdev, port_num, in_mad, prev_lid);
+ node_desc_override(ibdev, out_mad);
+ }
/* set return bit in status of directed route responses */
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
@@ -265,21 +278,14 @@ int mthca_process_mad(struct ib_device *ibdev,
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct mthca_trap_mad *tmad =
- (void *) (unsigned long) mad_send_wc->wr_id;
-
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(tmad, mapping),
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- kfree(tmad->mad);
- kfree(tmad);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
int mthca_create_agents(struct mthca_dev *dev)
{
struct ib_mad_agent *agent;
int p, q;
+ int ret;
spin_lock_init(&dev->sm_lock);
@@ -289,11 +295,23 @@ int mthca_create_agents(struct mthca_dev *dev)
q ? IB_QPT_GSI : IB_QPT_SMI,
NULL, 0, send_handler,
NULL, NULL);
- if (IS_ERR(agent))
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
goto err;
+ }
dev->send_agent[p][q] = agent;
}
+
+ for (p = 1; p <= dev->limits.num_ports; ++p) {
+ ret = mthca_update_rate(dev, p);
+ if (ret) {
+ mthca_err(dev, "Failed to obtain port %d rate."
+ " aborting.\n", p);
+ goto err;
+ }
+ }
+
return 0;
err:
@@ -302,7 +320,7 @@ err:
if (dev->send_agent[p][q])
ib_unregister_mad_agent(dev->send_agent[p][q]);
- return PTR_ERR(agent);
+ return ret;
}
void mthca_free_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index d40590356df..ded76c101dd 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,95 +30,129 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
*/
-#include <linux/config.h>
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
+#include <linux/gfp.h>
#include "mthca_dev.h"
#include "mthca_config_reg.h"
#include "mthca_cmd.h"
#include "mthca_profile.h"
#include "mthca_memfree.h"
+#include "mthca_wqe.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
+#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+
+int mthca_debug_level = 0;
+module_param_named(debug_level, mthca_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+
+#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
#ifdef CONFIG_PCI_MSI
-static int msi_x = 0;
+static int msi_x = 1;
module_param(msi_x, int, 0444);
MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
-static int msi = 0;
-module_param(msi, int, 0444);
-MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
-
#else /* CONFIG_PCI_MSI */
#define msi_x (0)
-#define msi (0)
#endif /* CONFIG_PCI_MSI */
-static const char mthca_version[] __devinitdata =
- "ib_mthca: Mellanox InfiniBand HCA driver v"
- DRV_VERSION " (" DRV_RELDATE ")\n";
-
-static struct mthca_profile default_profile = {
- .num_qp = 1 << 16,
- .rdb_per_qp = 4,
- .num_cq = 1 << 16,
- .num_mcg = 1 << 13,
- .num_mpt = 1 << 17,
- .num_mtt = 1 << 20,
- .num_udav = 1 << 15, /* Tavor only */
- .fmr_reserved_mtts = 1 << 18, /* Tavor only */
- .uarc_size = 1 << 18, /* Arbel only */
+static int tune_pci = 0;
+module_param(tune_pci, int, 0444);
+MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
+
+DEFINE_MUTEX(mthca_device_mutex);
+
+#define MTHCA_DEFAULT_NUM_QP (1 << 16)
+#define MTHCA_DEFAULT_RDB_PER_QP (1 << 2)
+#define MTHCA_DEFAULT_NUM_CQ (1 << 16)
+#define MTHCA_DEFAULT_NUM_MCG (1 << 13)
+#define MTHCA_DEFAULT_NUM_MPT (1 << 17)
+#define MTHCA_DEFAULT_NUM_MTT (1 << 20)
+#define MTHCA_DEFAULT_NUM_UDAV (1 << 15)
+#define MTHCA_DEFAULT_NUM_RESERVED_MTTS (1 << 18)
+#define MTHCA_DEFAULT_NUM_UARC_SIZE (1 << 18)
+
+static struct mthca_profile hca_profile = {
+ .num_qp = MTHCA_DEFAULT_NUM_QP,
+ .rdb_per_qp = MTHCA_DEFAULT_RDB_PER_QP,
+ .num_cq = MTHCA_DEFAULT_NUM_CQ,
+ .num_mcg = MTHCA_DEFAULT_NUM_MCG,
+ .num_mpt = MTHCA_DEFAULT_NUM_MPT,
+ .num_mtt = MTHCA_DEFAULT_NUM_MTT,
+ .num_udav = MTHCA_DEFAULT_NUM_UDAV, /* Tavor only */
+ .fmr_reserved_mtts = MTHCA_DEFAULT_NUM_RESERVED_MTTS, /* Tavor only */
+ .uarc_size = MTHCA_DEFAULT_NUM_UARC_SIZE, /* Arbel only */
};
-static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
+module_param_named(num_qp, hca_profile.num_qp, int, 0444);
+MODULE_PARM_DESC(num_qp, "maximum number of QPs per HCA");
+
+module_param_named(rdb_per_qp, hca_profile.rdb_per_qp, int, 0444);
+MODULE_PARM_DESC(rdb_per_qp, "number of RDB buffers per QP");
+
+module_param_named(num_cq, hca_profile.num_cq, int, 0444);
+MODULE_PARM_DESC(num_cq, "maximum number of CQs per HCA");
+
+module_param_named(num_mcg, hca_profile.num_mcg, int, 0444);
+MODULE_PARM_DESC(num_mcg, "maximum number of multicast groups per HCA");
+
+module_param_named(num_mpt, hca_profile.num_mpt, int, 0444);
+MODULE_PARM_DESC(num_mpt,
+ "maximum number of memory protection table entries per HCA");
+
+module_param_named(num_mtt, hca_profile.num_mtt, int, 0444);
+MODULE_PARM_DESC(num_mtt,
+ "maximum number of memory translation table segments per HCA");
+
+module_param_named(num_udav, hca_profile.num_udav, int, 0444);
+MODULE_PARM_DESC(num_udav, "maximum number of UD address vectors per HCA");
+
+module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444);
+MODULE_PARM_DESC(fmr_reserved_mtts,
+ "number of memory translation table segments reserved for FMR");
+
+static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)");
+
+static char mthca_version[] =
+ DRV_NAME ": Mellanox InfiniBand HCA driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static int mthca_tune_pci(struct mthca_dev *mdev)
{
- int cap;
- u16 val;
+ if (!tune_pci)
+ return 0;
/* First try to max out Read Byte Count */
- cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
- if (cap) {
- if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) {
- mthca_err(mdev, "Couldn't read PCI-X command register, "
- "aborting.\n");
- return -ENODEV;
- }
- val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
- if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
- mthca_err(mdev, "Couldn't write PCI-X command register, "
- "aborting.\n");
+ if (pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX)) {
+ if (pcix_set_mmrbc(mdev->pdev, pcix_get_max_mmrbc(mdev->pdev))) {
+ mthca_err(mdev, "Couldn't set PCI-X max read count, "
+ "aborting.\n");
return -ENODEV;
}
} else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE))
mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
- cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
- if (cap) {
- if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) {
- mthca_err(mdev, "Couldn't read PCI Express device control "
- "register, aborting.\n");
- return -ENODEV;
- }
- val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
- if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
- mthca_err(mdev, "Couldn't write PCI Express device control "
- "register, aborting.\n");
+ if (pci_is_pcie(mdev->pdev)) {
+ if (pcie_set_readrq(mdev->pdev, 4096)) {
+ mthca_err(mdev, "Couldn't write PCI Express read request, "
+ "aborting.\n");
return -ENODEV;
}
} else if (mdev->mthca_flags & MTHCA_FLAG_PCIE)
@@ -126,21 +162,17 @@ static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
return 0;
}
-static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
+static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
{
int err;
- u8 status;
- err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
+ mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8;
+ err = mthca_QUERY_DEV_LIM(mdev, dev_lim);
if (err) {
- mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_DEV_LIM command returned %d"
+ ", aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
if (dev_lim->min_page_sz > PAGE_SIZE) {
mthca_err(mdev, "HCA minimum page size of %d bigger than "
"kernel PAGE_SIZE of %ld, aborting.\n",
@@ -154,22 +186,67 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
return -ENODEV;
}
+ if (dev_lim->uar_size > pci_resource_len(mdev->pdev, 2)) {
+ mthca_err(mdev, "HCA reported UAR size of 0x%x bigger than "
+ "PCI resource 2 size of 0x%llx, aborting.\n",
+ dev_lim->uar_size,
+ (unsigned long long)pci_resource_len(mdev->pdev, 2));
+ return -ENODEV;
+ }
+
mdev->limits.num_ports = dev_lim->num_ports;
mdev->limits.vl_cap = dev_lim->max_vl;
mdev->limits.mtu_cap = dev_lim->max_mtu;
mdev->limits.gid_table_len = dev_lim->max_gids;
mdev->limits.pkey_table_len = dev_lim->max_pkeys;
mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
- mdev->limits.max_sg = dev_lim->max_sg;
+ /*
+ * Need to allow for worst case send WQE overhead and check
+ * whether max_desc_sz imposes a lower limit than max_sg; UD
+ * send has the biggest overhead.
+ */
+ mdev->limits.max_sg = min_t(int, dev_lim->max_sg,
+ (dev_lim->max_desc_sz -
+ sizeof (struct mthca_next_seg) -
+ (mthca_is_memfree(mdev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg))) /
+ sizeof (struct mthca_data_seg));
+ mdev->limits.max_wqes = dev_lim->max_qp_sz;
+ mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
mdev->limits.reserved_qps = dev_lim->reserved_qps;
+ mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
+ mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev);
+ /*
+ * Subtract 1 from the limit because we need to allocate a
+ * spare CQE so the HCA HW can tell the difference between an
+ * empty CQ and a full CQ.
+ */
+ mdev->limits.max_cqes = dev_lim->max_cq_sz - 1;
mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
mdev->limits.reserved_mrws = dev_lim->reserved_mrws;
mdev->limits.reserved_uars = dev_lim->reserved_uars;
mdev->limits.reserved_pds = dev_lim->reserved_pds;
+ mdev->limits.port_width_cap = dev_lim->max_port_width;
+ mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1);
+ mdev->limits.flags = dev_lim->flags;
+ /*
+ * For old FW that doesn't return static rate support, use a
+ * value of 0x3 (only static rate values of 0 or 1 are handled),
+ * except on Sinai, where even old FW can handle static rate
+ * values of 2 and 3.
+ */
+ if (dev_lim->stat_rate_support)
+ mdev->limits.stat_rate_support = dev_lim->stat_rate_support;
+ else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ mdev->limits.stat_rate_support = 0xf;
+ else
+ mdev->limits.stat_rate_support = 0x3;
/* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
May be doable since hardware supports it for SRQ.
@@ -201,185 +278,134 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
mdev->mthca_flags |= MTHCA_FLAG_SRQ;
+ if (mthca_is_memfree(mdev))
+ if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM)
+ mdev->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
return 0;
}
-static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
+static int mthca_init_tavor(struct mthca_dev *mdev)
{
- u8 status;
+ s64 size;
int err;
struct mthca_dev_lim dev_lim;
struct mthca_profile profile;
struct mthca_init_hca_param init_hca;
- struct mthca_adapter adapter;
- err = mthca_SYS_EN(mdev, &status);
+ err = mthca_SYS_EN(mdev);
if (err) {
- mthca_err(mdev, "SYS_EN command failed, aborting.\n");
+ mthca_err(mdev, "SYS_EN command returned %d, aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "SYS_EN returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
- err = mthca_QUERY_FW(mdev, &status);
+ err = mthca_QUERY_FW(mdev);
if (err) {
- mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_FW command returned %d,"
+ " aborting.\n", err);
goto err_disable;
}
- if (status) {
- mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
- goto err_disable;
- }
- err = mthca_QUERY_DDR(mdev, &status);
+ err = mthca_QUERY_DDR(mdev);
if (err) {
- mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
- goto err_disable;
- }
- if (status) {
- mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "QUERY_DDR command returned %d, aborting.\n", err);
goto err_disable;
}
err = mthca_dev_lim(mdev, &dev_lim);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command returned %d, aborting.\n", err);
+ goto err_disable;
+ }
- profile = default_profile;
+ profile = hca_profile;
profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
profile.uarc_size = 0;
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ profile.num_srq = dev_lim.max_srqs;
- err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
- if (err < 0)
- goto err_disable;
-
- err = mthca_INIT_HCA(mdev, &init_hca, &status);
- if (err) {
- mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
- goto err_disable;
- }
- if (status) {
- mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+ if (size < 0) {
+ err = size;
goto err_disable;
}
- err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ err = mthca_INIT_HCA(mdev, &init_hca);
if (err) {
- mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_close;
- }
- if (status) {
- mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
- goto err_close;
+ mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err);
+ goto err_disable;
}
- mdev->eq_table.inta_pin = adapter.inta_pin;
- mdev->rev_id = adapter.revision_id;
-
return 0;
-err_close:
- mthca_CLOSE_HCA(mdev, 0, &status);
-
err_disable:
- mthca_SYS_DIS(mdev, &status);
+ mthca_SYS_DIS(mdev);
return err;
}
-static int __devinit mthca_load_fw(struct mthca_dev *mdev)
+static int mthca_load_fw(struct mthca_dev *mdev)
{
- u8 status;
int err;
/* FIXME: use HCA-attached memory for FW if present */
mdev->fw.arbel.fw_icm =
mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
- GFP_HIGHUSER | __GFP_NOWARN);
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
if (!mdev->fw.arbel.fw_icm) {
mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
return -ENOMEM;
}
- err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status);
+ err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm);
if (err) {
- mthca_err(mdev, "MAP_FA command failed, aborting.\n");
+ mthca_err(mdev, "MAP_FA command returned %d, aborting.\n", err);
goto err_free;
}
- if (status) {
- mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
- err = -EINVAL;
- goto err_free;
- }
- err = mthca_RUN_FW(mdev, &status);
+ err = mthca_RUN_FW(mdev);
if (err) {
- mthca_err(mdev, "RUN_FW command failed, aborting.\n");
- goto err_unmap_fa;
- }
- if (status) {
- mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "RUN_FW command returned %d, aborting.\n", err);
goto err_unmap_fa;
}
return 0;
err_unmap_fa:
- mthca_UNMAP_FA(mdev, &status);
+ mthca_UNMAP_FA(mdev);
err_free:
- mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
return err;
}
-static int __devinit mthca_init_icm(struct mthca_dev *mdev,
- struct mthca_dev_lim *dev_lim,
- struct mthca_init_hca_param *init_hca,
- u64 icm_size)
+static int mthca_init_icm(struct mthca_dev *mdev,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca,
+ u64 icm_size)
{
u64 aux_pages;
- u8 status;
int err;
- err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status);
+ err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages);
if (err) {
- mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n");
+ mthca_err(mdev, "SET_ICM_SIZE command returned %d, aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n",
(unsigned long long) icm_size >> 10,
(unsigned long long) aux_pages << 2);
mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
- GFP_HIGHUSER | __GFP_NOWARN);
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
if (!mdev->fw.arbel.aux_icm) {
mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
return -ENOMEM;
}
- err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status);
+ err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm);
if (err) {
- mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n");
- goto err_free_aux;
- }
- if (status) {
- mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "MAP_ICM_AUX returned %d, aborting.\n", err);
goto err_free_aux;
}
@@ -389,10 +415,15 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
goto err_unmap_aux;
}
+ /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
+ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size,
+ dma_get_cache_alignment()) / mdev->limits.mtt_seg_size;
+
mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
- MTHCA_MTT_SEG_SIZE,
+ mdev->limits.mtt_seg_size,
mdev->limits.num_mtt_segs,
- mdev->limits.reserved_mtts, 1);
+ mdev->limits.reserved_mtts,
+ 1, 0);
if (!mdev->mr_table.mtt_table) {
mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
err = -ENOMEM;
@@ -402,7 +433,8 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
dev_lim->mpt_entry_sz,
mdev->limits.num_mpts,
- mdev->limits.reserved_mrws, 1);
+ mdev->limits.reserved_mrws,
+ 1, 1);
if (!mdev->mr_table.mpt_table) {
mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
err = -ENOMEM;
@@ -412,7 +444,8 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
dev_lim->qpc_entry_sz,
mdev->limits.num_qps,
- mdev->limits.reserved_qps, 0);
+ mdev->limits.reserved_qps,
+ 0, 0);
if (!mdev->qp_table.qp_table) {
mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
err = -ENOMEM;
@@ -422,7 +455,8 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
dev_lim->eqpc_entry_sz,
mdev->limits.num_qps,
- mdev->limits.reserved_qps, 0);
+ mdev->limits.reserved_qps,
+ 0, 0);
if (!mdev->qp_table.eqp_table) {
mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
err = -ENOMEM;
@@ -432,7 +466,7 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base,
MTHCA_RDB_ENTRY_SIZE,
mdev->limits.num_qps <<
- mdev->qp_table.rdb_shift,
+ mdev->qp_table.rdb_shift, 0,
0, 0);
if (!mdev->qp_table.rdb_table) {
mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
@@ -441,15 +475,31 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
}
mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
- dev_lim->cqc_entry_sz,
- mdev->limits.num_cqs,
- mdev->limits.reserved_cqs, 0);
+ dev_lim->cqc_entry_sz,
+ mdev->limits.num_cqs,
+ mdev->limits.reserved_cqs,
+ 0, 0);
if (!mdev->cq_table.table) {
mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
err = -ENOMEM;
goto err_unmap_rdb;
}
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ) {
+ mdev->srq_table.table =
+ mthca_alloc_icm_table(mdev, init_hca->srqc_base,
+ dev_lim->srq_entry_sz,
+ mdev->limits.num_srqs,
+ mdev->limits.reserved_srqs,
+ 0, 0);
+ if (!mdev->srq_table.table) {
+ mthca_err(mdev, "Failed to map SRQ context memory, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_cq;
+ }
+ }
+
/*
* It's not strictly required, but for simplicity just map the
* whole multicast group table now. The table isn't very big
@@ -461,15 +511,19 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev,
mdev->limits.num_amgms,
mdev->limits.num_mgms +
mdev->limits.num_amgms,
- 0);
+ 0, 0);
if (!mdev->mcg_table.table) {
mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
err = -ENOMEM;
- goto err_unmap_cq;
+ goto err_unmap_srq;
}
return 0;
+err_unmap_srq:
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ mthca_free_icm_table(mdev, mdev->srq_table.table);
+
err_unmap_cq:
mthca_free_icm_table(mdev, mdev->cq_table.table);
@@ -492,67 +546,75 @@ err_unmap_eq:
mthca_unmap_eq_icm(mdev);
err_unmap_aux:
- mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_UNMAP_ICM_AUX(mdev);
err_free_aux:
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
return err;
}
-static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
+static void mthca_free_icms(struct mthca_dev *mdev)
+{
+
+ mthca_free_icm_table(mdev, mdev->mcg_table.table);
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ mthca_free_icm_table(mdev, mdev->srq_table.table);
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
+}
+
+static int mthca_init_arbel(struct mthca_dev *mdev)
{
struct mthca_dev_lim dev_lim;
struct mthca_profile profile;
struct mthca_init_hca_param init_hca;
- struct mthca_adapter adapter;
- u64 icm_size;
- u8 status;
+ s64 icm_size;
int err;
- err = mthca_QUERY_FW(mdev, &status);
+ err = mthca_QUERY_FW(mdev);
if (err) {
- mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_FW command failed %d, aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
- err = mthca_ENABLE_LAM(mdev, &status);
- if (err) {
- mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
- return err;
- }
- if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
+ err = mthca_ENABLE_LAM(mdev);
+ if (err == -EAGAIN) {
mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
- } else if (status) {
- mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
+ } else if (err) {
+ mthca_err(mdev, "ENABLE_LAM returned %d, aborting.\n", err);
+ return err;
}
err = mthca_load_fw(mdev);
if (err) {
- mthca_err(mdev, "Failed to start FW, aborting.\n");
+ mthca_err(mdev, "Loading FW returned %d, aborting.\n", err);
goto err_disable;
}
err = mthca_dev_lim(mdev, &dev_lim);
if (err) {
- mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_DEV_LIM returned %d, aborting.\n", err);
goto err_stop_fw;
}
- profile = default_profile;
+ profile = hca_profile;
profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
profile.num_udav = 0;
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ profile.num_srq = dev_lim.max_srqs;
icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
- if ((int) icm_size < 0) {
+ if (icm_size < 0) {
err = icm_size;
goto err_stop_fw;
}
@@ -561,70 +623,78 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
if (err)
goto err_stop_fw;
- err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ err = mthca_INIT_HCA(mdev, &init_hca);
if (err) {
- mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
- goto err_free_icm;
- }
- if (status) {
- mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err);
goto err_free_icm;
}
- err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
- if (err) {
- mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_free_icm;
- }
- if (status) {
- mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
- goto err_free_icm;
- }
-
- mdev->eq_table.inta_pin = adapter.inta_pin;
- mdev->rev_id = adapter.revision_id;
-
return 0;
err_free_icm:
- mthca_free_icm_table(mdev, mdev->cq_table.table);
- mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
- mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
- mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
- mthca_unmap_eq_icm(mdev);
-
- mthca_UNMAP_ICM_AUX(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+ mthca_free_icms(mdev);
err_stop_fw:
- mthca_UNMAP_FA(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ mthca_UNMAP_FA(mdev);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
err_disable:
if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
- mthca_DISABLE_LAM(mdev, &status);
+ mthca_DISABLE_LAM(mdev);
return err;
}
-static int __devinit mthca_init_hca(struct mthca_dev *mdev)
+static void mthca_close_hca(struct mthca_dev *mdev)
{
+ mthca_CLOSE_HCA(mdev, 0);
+
+ if (mthca_is_memfree(mdev)) {
+ mthca_free_icms(mdev);
+
+ mthca_UNMAP_FA(mdev);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
+
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev);
+ } else
+ mthca_SYS_DIS(mdev);
+}
+
+static int mthca_init_hca(struct mthca_dev *mdev)
+{
+ int err;
+ struct mthca_adapter adapter;
+
if (mthca_is_memfree(mdev))
- return mthca_init_arbel(mdev);
+ err = mthca_init_arbel(mdev);
else
- return mthca_init_tavor(mdev);
+ err = mthca_init_tavor(mdev);
+
+ if (err)
+ return err;
+
+ err = mthca_QUERY_ADAPTER(mdev, &adapter);
+ if (err) {
+ mthca_err(mdev, "QUERY_ADAPTER command returned %d, aborting.\n", err);
+ goto err_close;
+ }
+
+ mdev->eq_table.inta_pin = adapter.inta_pin;
+ if (!mthca_is_memfree(mdev))
+ mdev->rev_id = adapter.revision_id;
+ memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id);
+
+ return 0;
+
+err_close:
+ mthca_close_hca(mdev);
+ return err;
}
-static int __devinit mthca_setup_hca(struct mthca_dev *dev)
+static int mthca_setup_hca(struct mthca_dev *dev)
{
int err;
- u8 status;
MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
@@ -642,7 +712,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
goto err_uar_table_free;
}
- dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ dev->kar = ioremap((phys_addr_t) dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!dev->kar) {
mthca_err(dev, "Couldn't map kernel access region, "
"aborting.\n");
@@ -664,7 +734,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
goto err_pd_table_free;
}
- err = mthca_pd_alloc(dev, &dev->driver_pd);
+ err = mthca_pd_alloc(dev, 1, &dev->driver_pd);
if (err) {
mthca_err(dev, "Failed to create driver PD, "
"aborting.\n");
@@ -685,16 +755,19 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
goto err_eq_table_free;
}
- err = mthca_NOP(dev, &status);
- if (err || status) {
- mthca_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting.\n",
- dev->mthca_flags & MTHCA_FLAG_MSI_X ?
- dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
- dev->pdev->irq);
- if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
- mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
- else
+ err = mthca_NOP(dev);
+ if (err) {
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ mthca_warn(dev, "NOP command failed to generate interrupt "
+ "(IRQ %d).\n",
+ dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector);
+ mthca_warn(dev, "Trying again with MSI-X disabled.\n");
+ } else {
+ mthca_err(dev, "NOP command failed to generate interrupt "
+ "(IRQ %d), aborting.\n",
+ dev->pdev->irq);
mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+ }
goto err_cmd_poll;
}
@@ -708,11 +781,18 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
goto err_cmd_poll;
}
+ err = mthca_init_srq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "shared receive queue table, aborting.\n");
+ goto err_cq_table_free;
+ }
+
err = mthca_init_qp_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
"queue pair table, aborting.\n");
- goto err_cq_table_free;
+ goto err_srq_table_free;
}
err = mthca_init_av_table(dev);
@@ -737,6 +817,9 @@ err_av_table_free:
err_qp_table_free:
mthca_cleanup_qp_table(dev);
+err_srq_table_free:
+ mthca_cleanup_srq_table(dev);
+
err_cq_table_free:
mthca_cleanup_cq_table(dev);
@@ -766,60 +849,7 @@ err_uar_table_free:
return err;
}
-static int __devinit mthca_request_regions(struct pci_dev *pdev,
- int ddr_hidden)
-{
- int err;
-
- /*
- * We can't just use pci_request_regions() because the MSI-X
- * table is right in the middle of the first BAR. If we did
- * pci_request_region and grab all of the first BAR, then
- * setting up MSI-X would fail, since the PCI core wants to do
- * request_mem_region on the MSI-X vector table.
- *
- * So just request what we need right now, and request any
- * other regions we need when setting up EQs.
- */
- if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
- MTHCA_HCR_SIZE, DRV_NAME))
- return -EBUSY;
-
- err = pci_request_region(pdev, 2, DRV_NAME);
- if (err)
- goto err_bar2_failed;
-
- if (!ddr_hidden) {
- err = pci_request_region(pdev, 4, DRV_NAME);
- if (err)
- goto err_bar4_failed;
- }
-
- return 0;
-
-err_bar4_failed:
- pci_release_region(pdev, 2);
-
-err_bar2_failed:
- release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
- MTHCA_HCR_SIZE);
-
- return err;
-}
-
-static void mthca_release_regions(struct pci_dev *pdev,
- int ddr_hidden)
-{
- if (!ddr_hidden)
- pci_release_region(pdev, 4);
-
- pci_release_region(pdev, 2);
-
- release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
- MTHCA_HCR_SIZE);
-}
-
-static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
+static int mthca_enable_msi_x(struct mthca_dev *mdev)
{
struct msix_entry entries[3];
int err;
@@ -828,13 +858,9 @@ static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
entries[1].entry = 1;
entries[2].entry = 2;
- err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err) {
- if (err > 0)
- mthca_info(mdev, "Only %d MSI-X vectors available, "
- "not using MSI-X\n", err);
+ err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err)
return err;
- }
mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
@@ -843,33 +869,6 @@ static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
return 0;
}
-static void mthca_close_hca(struct mthca_dev *mdev)
-{
- u8 status;
-
- mthca_CLOSE_HCA(mdev, 0, &status);
-
- if (mthca_is_memfree(mdev)) {
- mthca_free_icm_table(mdev, mdev->cq_table.table);
- mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
- mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
- mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
- mthca_unmap_eq_icm(mdev);
-
- mthca_UNMAP_ICM_AUX(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
-
- mthca_UNMAP_FA(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
-
- if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
- mthca_DISABLE_LAM(mdev, &status);
- } else
- mthca_SYS_DIS(mdev, &status);
-}
-
/* Types of supported HCA */
enum {
TAVOR, /* MT23108 */
@@ -883,36 +882,29 @@ enum {
static struct {
u64 latest_fw;
- int is_memfree;
- int is_pcie;
+ u32 flags;
} mthca_hca_table[] = {
- [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 2), .is_memfree = 0, .is_pcie = 0 },
- [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 6, 2), .is_memfree = 0, .is_pcie = 1 },
- [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 0, 1), .is_memfree = 1, .is_pcie = 1 },
- [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
+ [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
+ .flags = 0 },
+ [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
+ .flags = MTHCA_FLAG_PCIE },
+ [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0),
+ .flags = MTHCA_FLAG_MEMFREE |
+ MTHCA_FLAG_PCIE },
+ [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
+ .flags = MTHCA_FLAG_MEMFREE |
+ MTHCA_FLAG_PCIE |
+ MTHCA_FLAG_SINAI_OPT }
};
-static int __devinit mthca_init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
{
- static int mthca_version_printed = 0;
int ddr_hidden = 0;
int err;
struct mthca_dev *mdev;
- if (!mthca_version_printed) {
- printk(KERN_INFO "%s", mthca_version);
- ++mthca_version_printed;
- }
-
- printk(KERN_INFO PFX "Initializing %s (%s)\n",
- pci_pretty_name(pdev), pci_name(pdev));
-
- if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
- printk(KERN_ERR PFX "%s (%s) has invalid driver data %lx\n",
- pci_pretty_name(pdev), pci_name(pdev), id->driver_data);
- return -ENODEV;
- }
+ printk(KERN_INFO PFX "Initializing %s\n",
+ pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
@@ -927,20 +919,19 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
*/
if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
pci_resource_len(pdev, 0) != 1 << 20) {
- dev_err(&pdev->dev, "Missing DCS, aborting.");
+ dev_err(&pdev->dev, "Missing DCS, aborting.\n");
err = -ENODEV;
goto err_disable_pdev;
}
- if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
- pci_resource_len(pdev, 2) != 1 << 23) {
- dev_err(&pdev->dev, "Missing UAR, aborting.");
+ if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing UAR, aborting.\n");
err = -ENODEV;
goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
ddr_hidden = 1;
- err = mthca_request_regions(pdev, ddr_hidden);
+ err = pci_request_regions(pdev, DRV_NAME);
if (err) {
dev_err(&pdev->dev, "Cannot obtain PCI resources, "
"aborting.\n");
@@ -949,20 +940,20 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
- err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
goto err_free_res;
}
}
- err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
"consistent PCI DMA mask.\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
@@ -970,6 +961,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
}
}
+ /* We can handle large RDMA requests, so allow larger segments. */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+
mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
if (!mdev) {
dev_err(&pdev->dev, "Device struct alloc failed, "
@@ -980,12 +974,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
mdev->pdev = pdev;
+ mdev->mthca_flags = mthca_hca_table[hca_type].flags;
if (ddr_hidden)
mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
- if (mthca_hca_table[id->driver_data].is_memfree)
- mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
- if (mthca_hca_table[id->driver_data].is_pcie)
- mdev->mthca_flags |= MTHCA_FLAG_PCIE;
/*
* Now reset the HCA before we touch the PCI capabilities or
@@ -998,43 +989,41 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
goto err_free_dev;
}
- if (msi_x && !mthca_enable_msi_x(mdev))
- mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
- if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) &&
- !pci_enable_msi(pdev))
- mdev->mthca_flags |= MTHCA_FLAG_MSI;
-
- sema_init(&mdev->cmd.hcr_sem, 1);
- sema_init(&mdev->cmd.poll_sem, 1);
- mdev->cmd.use_events = 0;
-
- mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
- if (!mdev->hcr) {
- mthca_err(mdev, "Couldn't map command register, "
- "aborting.\n");
- err = -ENOMEM;
+ if (mthca_cmd_init(mdev)) {
+ mthca_err(mdev, "Failed to init command interface, aborting.\n");
goto err_free_dev;
}
err = mthca_tune_pci(mdev);
if (err)
- goto err_iounmap;
+ goto err_cmd;
err = mthca_init_hca(mdev);
if (err)
- goto err_iounmap;
+ goto err_cmd;
- if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
- mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
+ if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
+ mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n",
(int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
(int) (mdev->fw_ver & 0xffff),
- (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
- (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
- (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+ (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+ (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+ (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
}
+ if (msi_x && !mthca_enable_msi_x(mdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
+
err = mthca_setup_hca(mdev);
+ if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
+
+ err = mthca_setup_hca(mdev);
+ }
+
if (err)
goto err_close;
@@ -1047,6 +1036,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
goto err_unregister;
pci_set_drvdata(pdev, mdev);
+ mdev->hca_type = hca_type;
+
+ mdev->active = true;
return 0;
@@ -1057,6 +1049,7 @@ err_cleanup:
mthca_cleanup_mcg_table(mdev);
mthca_cleanup_av_table(mdev);
mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_srq_table(mdev);
mthca_cleanup_cq_table(mdev);
mthca_cmd_use_polling(mdev);
mthca_cleanup_eq_table(mdev);
@@ -1068,21 +1061,19 @@ err_cleanup:
mthca_cleanup_uar_table(mdev);
err_close:
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+
mthca_close_hca(mdev);
-err_iounmap:
- iounmap(mdev->hcr);
+err_cmd:
+ mthca_cmd_cleanup(mdev);
err_free_dev:
- if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
- if (mdev->mthca_flags & MTHCA_FLAG_MSI)
- pci_disable_msi(pdev);
-
ib_dealloc_device(&mdev->ib_dev);
err_free_res:
- mthca_release_regions(pdev, ddr_hidden);
+ pci_release_regions(pdev);
err_disable_pdev:
pci_disable_device(pdev);
@@ -1090,10 +1081,9 @@ err_disable_pdev:
return err;
}
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void __mthca_remove_one(struct pci_dev *pdev)
{
struct mthca_dev *mdev = pci_get_drvdata(pdev);
- u8 status;
int p;
if (mdev) {
@@ -1101,11 +1091,12 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
mthca_unregister_device(mdev);
for (p = 1; p <= mdev->limits.num_ports; ++p)
- mthca_CLOSE_IB(mdev, p, &status);
+ mthca_CLOSE_IB(mdev, p);
mthca_cleanup_mcg_table(mdev);
mthca_cleanup_av_table(mdev);
mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_srq_table(mdev);
mthca_cleanup_cq_table(mdev);
mthca_cmd_use_polling(mdev);
mthca_cleanup_eq_table(mdev);
@@ -1118,24 +1109,61 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
iounmap(mdev->kar);
mthca_uar_free(mdev, &mdev->driver_uar);
mthca_cleanup_uar_table(mdev);
-
mthca_close_hca(mdev);
-
- iounmap(mdev->hcr);
+ mthca_cmd_cleanup(mdev);
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
pci_disable_msix(pdev);
- if (mdev->mthca_flags & MTHCA_FLAG_MSI)
- pci_disable_msi(pdev);
ib_dealloc_device(&mdev->ib_dev);
- mthca_release_regions(pdev, mdev->mthca_flags &
- MTHCA_FLAG_DDR_HIDDEN);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
}
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+ struct mthca_dev *mdev;
+ int hca_type;
+
+ mdev = pci_get_drvdata(pdev);
+ if (!mdev)
+ return -ENODEV;
+ hca_type = mdev->hca_type;
+ __mthca_remove_one(pdev);
+ return __mthca_init_one(pdev, hca_type);
+}
+
+static int mthca_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ int ret;
+
+ mutex_lock(&mthca_device_mutex);
+
+ printk_once(KERN_INFO "%s", mthca_version);
+
+ if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+ printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+ pci_name(pdev), id->driver_data);
+ mutex_unlock(&mthca_device_mutex);
+ return -ENODEV;
+ }
+
+ ret = __mthca_init_one(pdev, id->driver_data);
+
+ mutex_unlock(&mthca_device_mutex);
+
+ return ret;
+}
+
+static void mthca_remove_one(struct pci_dev *pdev)
+{
+ mutex_lock(&mthca_device_mutex);
+ __mthca_remove_one(pdev);
+ mutex_unlock(&mthca_device_mutex);
+}
+
static struct pci_device_id mthca_pci_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
.driver_data = TAVOR },
@@ -1163,23 +1191,84 @@ static struct pci_device_id mthca_pci_table[] = {
MODULE_DEVICE_TABLE(pci, mthca_pci_table);
static struct pci_driver mthca_driver = {
- .name = "ib_mthca",
+ .name = DRV_NAME,
.id_table = mthca_pci_table,
.probe = mthca_init_one,
- .remove = __devexit_p(mthca_remove_one)
+ .remove = mthca_remove_one,
};
+static void __init __mthca_check_profile_val(const char *name, int *pval,
+ int pval_default)
+{
+ /* value must be positive and power of 2 */
+ int old_pval = *pval;
+
+ if (old_pval <= 0)
+ *pval = pval_default;
+ else
+ *pval = roundup_pow_of_two(old_pval);
+
+ if (old_pval != *pval) {
+ printk(KERN_WARNING PFX "Invalid value %d for %s in module parameter.\n",
+ old_pval, name);
+ printk(KERN_WARNING PFX "Corrected %s to %d.\n", name, *pval);
+ }
+}
+
+#define mthca_check_profile_val(name, default) \
+ __mthca_check_profile_val(#name, &hca_profile.name, default)
+
+static void __init mthca_validate_profile(void)
+{
+ mthca_check_profile_val(num_qp, MTHCA_DEFAULT_NUM_QP);
+ mthca_check_profile_val(rdb_per_qp, MTHCA_DEFAULT_RDB_PER_QP);
+ mthca_check_profile_val(num_cq, MTHCA_DEFAULT_NUM_CQ);
+ mthca_check_profile_val(num_mcg, MTHCA_DEFAULT_NUM_MCG);
+ mthca_check_profile_val(num_mpt, MTHCA_DEFAULT_NUM_MPT);
+ mthca_check_profile_val(num_mtt, MTHCA_DEFAULT_NUM_MTT);
+ mthca_check_profile_val(num_udav, MTHCA_DEFAULT_NUM_UDAV);
+ mthca_check_profile_val(fmr_reserved_mtts, MTHCA_DEFAULT_NUM_RESERVED_MTTS);
+
+ if (hca_profile.fmr_reserved_mtts >= hca_profile.num_mtt) {
+ printk(KERN_WARNING PFX "Invalid fmr_reserved_mtts module parameter %d.\n",
+ hca_profile.fmr_reserved_mtts);
+ printk(KERN_WARNING PFX "(Must be smaller than num_mtt %d)\n",
+ hca_profile.num_mtt);
+ hca_profile.fmr_reserved_mtts = hca_profile.num_mtt / 2;
+ printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n",
+ hca_profile.fmr_reserved_mtts);
+ }
+
+ if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
+ printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n",
+ log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8));
+ log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
+ }
+}
+
static int __init mthca_init(void)
{
int ret;
+ mthca_validate_profile();
+
+ ret = mthca_catas_init();
+ if (ret)
+ return ret;
+
ret = pci_register_driver(&mthca_driver);
- return ret < 0 ? ret : 0;
+ if (ret < 0) {
+ mthca_catas_cleanup();
+ return ret;
+ }
+
+ return 0;
}
static void __exit mthca_cleanup(void)
{
pci_unregister_driver(&mthca_driver);
+ mthca_catas_cleanup();
}
module_init(mthca_init);
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 70a6553a588..6304ae8f4a6 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -28,24 +28,19 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
-enum {
- MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
-};
-
struct mthca_mgm {
- u32 next_gid_index;
- u32 reserved[3];
- u8 gid[16];
- u32 qp[MTHCA_QP_PER_MGM];
+ __be32 next_gid_index;
+ u32 reserved[3];
+ u8 gid[16];
+ __be32 qp[MTHCA_QP_PER_MGM];
};
static const u8 zero_gid[16]; /* automatically initialized to 0 */
@@ -66,49 +61,38 @@ static const u8 zero_gid[16]; /* automatically initialized to 0 */
* entry in hash chain and *mgm holds end of hash chain.
*/
static int find_mgm(struct mthca_dev *dev,
- u8 *gid, struct mthca_mgm *mgm,
+ u8 *gid, struct mthca_mailbox *mgm_mailbox,
u16 *hash, int *prev, int *index)
{
- void *mailbox;
+ struct mthca_mailbox *mailbox;
+ struct mthca_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
- u8 status;
- mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
return -ENOMEM;
- mgid = MAILBOX_ALIGN(mailbox);
+ mgid = mailbox->buf;
memcpy(mgid, gid, 16);
- err = mthca_MGID_HASH(dev, mgid, hash, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "MGID_HASH returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_MGID_HASH(dev, mailbox, hash);
+ if (err) {
+ mthca_err(dev, "MGID_HASH failed (%d)\n", err);
goto out;
}
if (0)
- mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
- "%04x:%04x:%04x:%04x is %04x\n",
- be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]),
- be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]),
- be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]),
- be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]),
- *hash);
+ mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
*index = *hash;
*prev = -1;
do {
- err = mthca_READ_MGM(dev, *index, mgm, &status);
- if (err)
+ err = mthca_READ_MGM(dev, *index, mgm_mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM failed (%d)\n", err);
goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- return -EINVAL;
}
if (!memcmp(mgm->gid, zero_gid, 16)) {
@@ -123,37 +107,35 @@ static int find_mgm(struct mthca_dev *dev,
goto out;
*prev = *index;
- *index = be32_to_cpu(mgm->next_gid_index) >> 5;
+ *index = be32_to_cpu(mgm->next_gid_index) >> 6;
} while (*index);
*index = -1;
out:
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
- void *mailbox;
+ struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm;
u16 hash;
int index, prev;
int link = 0;
int i;
int err;
- u8 status;
- mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
- return -ENOMEM;
- mgm = MAILBOX_ALIGN(mailbox);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
- if (down_interruptible(&dev->mcg_table.sem))
- return -EINTR;
+ mutex_lock(&dev->mcg_table.mutex);
- err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+ err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
if (err)
goto out;
@@ -170,21 +152,22 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
}
- err = mthca_READ_MGM(dev, index, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_READ_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM failed (%d)\n", err);
goto out;
}
-
+ memset(mgm, 0, sizeof *mgm);
memcpy(mgm->gid, gid->raw, 16);
- mgm->next_gid_index = 0;
}
for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
- if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+ if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
+ mthca_dbg(dev, "QP %06x already a member of MGM\n",
+ ibqp->qp_num);
+ err = 0;
+ goto out;
+ } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
break;
}
@@ -195,76 +178,62 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
}
- err = mthca_WRITE_MGM(dev, index, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = mthca_WRITE_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM failed %d\n", err);
err = -EINVAL;
+ goto out;
}
if (!link)
goto out;
- err = mthca_READ_MGM(dev, prev, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_READ_MGM(dev, prev, mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM failed %d\n", err);
goto out;
}
- mgm->next_gid_index = cpu_to_be32(index << 5);
+ mgm->next_gid_index = cpu_to_be32(index << 6);
- err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+ err = mthca_WRITE_MGM(dev, prev, mailbox);
if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
- }
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
out:
- up(&dev->mcg_table.sem);
- kfree(mailbox);
+ if (err && link && index != -1) {
+ BUG_ON(index < dev->limits.num_mgms);
+ mthca_free(&dev->mcg_table.alloc, index);
+ }
+ mutex_unlock(&dev->mcg_table.mutex);
+
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
- void *mailbox;
+ struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm;
u16 hash;
int prev, index;
int i, loc;
int err;
- u8 status;
- mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
- return -ENOMEM;
- mgm = MAILBOX_ALIGN(mailbox);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
- if (down_interruptible(&dev->mcg_table.sem))
- return -EINTR;
+ mutex_lock(&dev->mcg_table.mutex);
- err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+ err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
if (err)
goto out;
if (index == -1) {
- mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
- "not found\n",
- be16_to_cpu(((u16 *) gid->raw)[0]),
- be16_to_cpu(((u16 *) gid->raw)[1]),
- be16_to_cpu(((u16 *) gid->raw)[2]),
- be16_to_cpu(((u16 *) gid->raw)[3]),
- be16_to_cpu(((u16 *) gid->raw)[4]),
- be16_to_cpu(((u16 *) gid->raw)[5]),
- be16_to_cpu(((u16 *) gid->raw)[6]),
- be16_to_cpu(((u16 *) gid->raw)[7]));
+ mthca_err(dev, "MGID %pI6 not found\n", gid->raw);
err = -EINVAL;
goto out;
}
@@ -285,92 +254,82 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mgm->qp[loc] = mgm->qp[i - 1];
mgm->qp[i - 1] = 0;
- err = mthca_WRITE_MGM(dev, index, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_WRITE_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
if (i != 1)
goto out;
- goto out;
-
if (prev == -1) {
/* Remove entry from MGM */
- if (be32_to_cpu(mgm->next_gid_index) >> 5) {
- err = mthca_READ_MGM(dev,
- be32_to_cpu(mgm->next_gid_index) >> 5,
- mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n",
- status);
- err = -EINVAL;
+ int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
+ if (amgm_index_to_free) {
+ err = mthca_READ_MGM(dev, amgm_index_to_free,
+ mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM returned %d\n", err);
goto out;
}
} else
memset(mgm->gid, 0, 16);
- err = mthca_WRITE_MGM(dev, index, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_WRITE_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
+ if (amgm_index_to_free) {
+ BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
+ mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
+ }
} else {
/* Remove entry from AMGM */
- index = be32_to_cpu(mgm->next_gid_index) >> 5;
- err = mthca_READ_MGM(dev, prev, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
+ err = mthca_READ_MGM(dev, prev, mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM returned %d\n", err);
goto out;
}
- mgm->next_gid_index = cpu_to_be32(index << 5);
+ mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
- err = mthca_WRITE_MGM(dev, prev, mgm, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_WRITE_MGM(dev, prev, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
+ BUG_ON(index < dev->limits.num_mgms);
+ mthca_free(&dev->mcg_table.alloc, index);
}
out:
- up(&dev->mcg_table.sem);
- kfree(mailbox);
+ mutex_unlock(&dev->mcg_table.mutex);
+
+ mthca_free_mailbox(dev, mailbox);
return err;
}
-int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
+int mthca_init_mcg_table(struct mthca_dev *dev)
{
int err;
+ int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
err = mthca_alloc_init(&dev->mcg_table.alloc,
- dev->limits.num_amgms,
- dev->limits.num_amgms - 1,
- 0);
+ table_size,
+ table_size - 1,
+ dev->limits.num_mgms);
if (err)
return err;
- init_MUTEX(&dev->mcg_table.sem);
+ mutex_init(&dev->mcg_table.mutex);
return 0;
}
-void __devexit mthca_cleanup_mcg_table(struct mthca_dev *dev)
+void mthca_cleanup_mcg_table(struct mthca_dev *dev)
{
mthca_alloc_cleanup(&dev->mcg_table.alloc);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 637b30e3559..7d2e42dd692 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,11 +30,14 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/page.h>
#include "mthca_memfree.h"
#include "mthca_dev.h"
@@ -47,22 +52,51 @@ enum {
MTHCA_TABLE_CHUNK_SIZE = 1 << 18
};
-void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
+struct mthca_user_db_table {
+ struct mutex mutex;
+ struct {
+ u64 uvirt;
+ struct scatterlist mem;
+ int refcount;
+ } page[0];
+};
+
+static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
+{
+ int i;
+
+ if (chunk->nsg > 0)
+ pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ for (i = 0; i < chunk->npages; ++i)
+ __free_pages(sg_page(&chunk->mem[i]),
+ get_order(chunk->mem[i].length));
+}
+
+static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
{
- struct mthca_icm_chunk *chunk, *tmp;
int i;
+ for (i = 0; i < chunk->npages; ++i) {
+ dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+ lowmem_page_address(sg_page(&chunk->mem[i])),
+ sg_dma_address(&chunk->mem[i]));
+ }
+}
+
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
+{
+ struct mthca_icm_chunk *chunk, *tmp;
+
if (!icm)
return;
list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
- if (chunk->nsg > 0)
- pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
-
- for (i = 0; i < chunk->npages; ++i)
- __free_pages(chunk->mem[i].page,
- get_order(chunk->mem[i].length));
+ if (coherent)
+ mthca_free_icm_coherent(dev, chunk);
+ else
+ mthca_free_icm_pages(dev, chunk);
kfree(chunk);
}
@@ -70,12 +104,46 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
kfree(icm);
}
+static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+{
+ struct page *page;
+
+ /*
+ * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+ * cleared, and subtle failures are seen if they aren't.
+ */
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
+ if (!page)
+ return -ENOMEM;
+
+ sg_set_page(mem, page, PAGE_SIZE << order, 0);
+ return 0;
+}
+
+static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
+ int order, gfp_t gfp_mask)
+{
+ void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
+ gfp_mask);
+ if (!buf)
+ return -ENOMEM;
+
+ sg_set_buf(mem, buf, PAGE_SIZE << order);
+ BUG_ON(mem->offset);
+ sg_dma_len(mem) = PAGE_SIZE << order;
+ return 0;
+}
+
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
- unsigned int gfp_mask)
+ gfp_t gfp_mask, int coherent)
{
struct mthca_icm *icm;
struct mthca_icm_chunk *chunk = NULL;
int cur_order;
+ int ret;
+
+ /* We use sg_set_buf for coherent allocs, which assumes low memory */
+ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!icm)
@@ -93,6 +161,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
if (!chunk)
goto fail;
+ sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
chunk->npages = 0;
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list);
@@ -101,21 +170,30 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
while (1 << cur_order > npages)
--cur_order;
- chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
- if (chunk->mem[chunk->npages].page) {
- chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
- chunk->mem[chunk->npages].offset = 0;
+ if (coherent)
+ ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
+ &chunk->mem[chunk->npages],
+ cur_order, gfp_mask);
+ else
+ ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
+ cur_order, gfp_mask);
- if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+ if (!ret) {
+ ++chunk->npages;
+
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
+ }
+ if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
chunk = NULL;
- }
npages -= 1 << cur_order;
} else {
@@ -125,7 +203,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
}
}
- if (chunk) {
+ if (!coherent && chunk) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
@@ -137,7 +215,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
return icm;
fail:
- mthca_free_icm(dev, icm);
+ mthca_free_icm(dev, icm, coherent);
return NULL;
}
@@ -145,9 +223,8 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
{
int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
int ret = 0;
- u8 status;
- down(&table->mutex);
+ mutex_lock(&table->mutex);
if (table->icm[i]) {
++table->icm[i]->refcount;
@@ -156,15 +233,15 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
- __GFP_NOWARN);
+ __GFP_NOWARN, table->coherent);
if (!table->icm[i]) {
ret = -ENOMEM;
goto out;
}
- if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- &status) || status) {
- mthca_free_icm(dev, table->icm[i]);
+ if (mthca_MAP_ICM(dev, table->icm[i],
+ table->virt + i * MTHCA_TABLE_CHUNK_SIZE)) {
+ mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
ret = -ENOMEM;
goto out;
@@ -173,30 +250,34 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
++table->icm[i]->refcount;
out:
- up(&table->mutex);
+ mutex_unlock(&table->mutex);
return ret;
}
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
{
- int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
- u8 status;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return;
- down(&table->mutex);
+ i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+
+ mutex_lock(&table->mutex);
if (--table->icm[i]->refcount == 0) {
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
- mthca_free_icm(dev, table->icm[i]);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
}
- up(&table->mutex);
+ mutex_unlock(&table->mutex);
}
-void *mthca_table_find(struct mthca_icm_table *table, int obj)
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
{
- int idx, offset, i;
+ int idx, offset, dma_offset, i;
struct mthca_icm_chunk *chunk;
struct mthca_icm *icm;
struct page *page = NULL;
@@ -204,27 +285,36 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
if (!table->lowmem)
return NULL;
- down(&table->mutex);
+ mutex_lock(&table->mutex);
idx = (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
- offset = idx % MTHCA_TABLE_CHUNK_SIZE;
+ dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
if (!icm)
goto out;
list_for_each_entry(chunk, &icm->chunk_list, list) {
for (i = 0; i < chunk->npages; ++i) {
- if (chunk->mem[i].length >= offset) {
- page = chunk->mem[i].page;
- break;
+ if (dma_handle && dma_offset >= 0) {
+ if (sg_dma_len(&chunk->mem[i]) > dma_offset)
+ *dma_handle = sg_dma_address(&chunk->mem[i]) +
+ dma_offset;
+ dma_offset -= sg_dma_len(&chunk->mem[i]);
+ }
+ /* DMA mapping can merge pages but not split them,
+ * so if we found the page, dma_handle has already
+ * been assigned to. */
+ if (chunk->mem[i].length > offset) {
+ page = sg_page(&chunk->mem[i]);
+ goto out;
}
offset -= chunk->mem[i].length;
}
}
out:
- up(&table->mutex);
+ mutex_unlock(&table->mutex);
return page ? lowmem_page_address(page) + offset : NULL;
}
@@ -256,6 +346,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
{
int i;
+ if (!mthca_is_memfree(dev))
+ return;
+
for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
mthca_table_put(dev, table, i);
}
@@ -263,14 +356,16 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
- int use_lowmem)
+ int use_lowmem, int use_coherent)
{
struct mthca_icm_table *table;
+ int obj_per_chunk;
int num_icm;
+ unsigned chunk_size;
int i;
- u8 status;
- num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE;
+ obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
+ num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
if (!table)
@@ -281,20 +376,25 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
table->num_obj = nobj;
table->obj_size = obj_size;
table->lowmem = use_lowmem;
- init_MUTEX(&table->mutex);
+ table->coherent = use_coherent;
+ mutex_init(&table->mutex);
for (i = 0; i < num_icm; ++i)
table->icm[i] = NULL;
for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
- table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+ chunk_size = MTHCA_TABLE_CHUNK_SIZE;
+ if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
+ chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
+
+ table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
- __GFP_NOWARN);
+ __GFP_NOWARN, use_coherent);
if (!table->icm[i])
goto err;
- if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
- &status) || status) {
- mthca_free_icm(dev, table->icm[i]);
+ if (mthca_MAP_ICM(dev, table->icm[i],
+ virt + i * MTHCA_TABLE_CHUNK_SIZE)) {
+ mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
goto err;
}
@@ -312,8 +412,8 @@ err:
for (i = 0; i < num_icm; ++i)
if (table->icm[i]) {
mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
- mthca_free_icm(dev, table->icm[i]);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
}
kfree(table);
@@ -324,35 +424,155 @@ err:
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
{
int i;
- u8 status;
for (i = 0; i < table->num_icm; ++i)
if (table->icm[i]) {
- mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
- mthca_free_icm(dev, table->icm[i]);
+ mthca_UNMAP_ICM(dev,
+ table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
}
kfree(table);
}
-static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
{
return dev->uar_table.uarc_base +
- dev->driver_uar.index * dev->uar_table.uarc_size +
- page * 4096;
+ uar->index * dev->uar_table.uarc_size +
+ page * MTHCA_ICM_PAGE_SIZE;
+}
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+{
+ struct page *pages[1];
+ int ret = 0;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return 0;
+
+ if (index < 0 || index > dev->uar_table.uarc_size / 8)
+ return -EINVAL;
+
+ mutex_lock(&db_tab->mutex);
+
+ i = index / MTHCA_DB_REC_PER_PAGE;
+
+ if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
+ (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
+ (uaddr & 4095)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (db_tab->page[i].refcount) {
+ ++db_tab->page[i].refcount;
+ goto out;
+ }
+
+ ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+ pages, NULL);
+ if (ret < 0)
+ goto out;
+
+ sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
+ uaddr & ~PAGE_MASK);
+
+ ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ if (ret < 0) {
+ put_page(pages[0]);
+ goto out;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+ mthca_uarc_virt(dev, uar, i));
+ if (ret) {
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(sg_page(&db_tab->page[i].mem));
+ goto out;
+ }
+
+ db_tab->page[i].uvirt = uaddr;
+ db_tab->page[i].refcount = 1;
+
+out:
+ mutex_unlock(&db_tab->mutex);
+ return ret;
+}
+
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index)
+{
+ if (!mthca_is_memfree(dev))
+ return;
+
+ /*
+ * To make our bookkeeping simpler, we don't unmap DB
+ * pages until we clean up the whole db table.
+ */
+
+ mutex_lock(&db_tab->mutex);
+
+ --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+
+ mutex_unlock(&db_tab->mutex);
+}
+
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
+{
+ struct mthca_user_db_table *db_tab;
+ int npages;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return NULL;
+
+ npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
+ db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+ if (!db_tab)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&db_tab->mutex);
+ for (i = 0; i < npages; ++i) {
+ db_tab->page[i].refcount = 0;
+ db_tab->page[i].uvirt = 0;
+ sg_init_table(&db_tab->page[i].mem, 1);
+ }
+
+ return db_tab;
+}
+
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab)
+{
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
+ if (db_tab->page[i].uvirt) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(sg_page(&db_tab->page[i].mem));
+ }
+ }
+
+ kfree(db_tab);
}
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db)
{
int group;
int start, end, dir;
int i, j;
struct mthca_db_page *page;
int ret = 0;
- u8 status;
- down(&dev->db_tab->mutex);
+ mutex_lock(&dev->db_tab->mutex);
switch (type) {
case MTHCA_DB_TYPE_CQ_ARM:
@@ -385,34 +605,42 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
goto found;
}
+ for (i = start; i != end; i += dir)
+ if (!dev->db_tab->page[i].db_rec) {
+ page = dev->db_tab->page + i;
+ goto alloc;
+ }
+
if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
ret = -ENOMEM;
goto out;
}
+ if (group == 0)
+ ++dev->db_tab->max_group1;
+ else
+ --dev->db_tab->min_group2;
+
page = dev->db_tab->page + end;
- page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
+
+alloc:
+ page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
&page->mapping, GFP_KERNEL);
if (!page->db_rec) {
ret = -ENOMEM;
goto out;
}
- memset(page->db_rec, 0, 4096);
+ memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
- ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
- if (!ret && status)
- ret = -EINVAL;
+ ret = mthca_MAP_ICM_page(dev, page->mapping,
+ mthca_uarc_virt(dev, &dev->driver_uar, i));
if (ret) {
- dma_free_coherent(&dev->pdev->dev, 4096,
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
goto out;
}
bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
- if (group == 0)
- ++dev->db_tab->max_group1;
- else
- --dev->db_tab->min_group2;
found:
j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
@@ -425,10 +653,10 @@ found:
page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
- *db = (u32 *) &page->db_rec[j];
+ *db = (__be32 *) &page->db_rec[j];
out:
- up(&dev->db_tab->mutex);
+ mutex_unlock(&dev->db_tab->mutex);
return ret;
}
@@ -437,14 +665,13 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
{
int i, j;
struct mthca_db_page *page;
- u8 status;
i = db_index / MTHCA_DB_REC_PER_PAGE;
j = db_index % MTHCA_DB_REC_PER_PAGE;
page = dev->db_tab->page + i;
- down(&dev->db_tab->mutex);
+ mutex_lock(&dev->db_tab->mutex);
page->db_rec[j] = 0;
if (i >= dev->db_tab->min_group2)
@@ -453,9 +680,9 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
i >= dev->db_tab->max_group1 - 1) {
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1);
- dma_free_coherent(&dev->pdev->dev, 4096,
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
page->db_rec = NULL;
@@ -467,7 +694,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
++dev->db_tab->min_group2;
}
- up(&dev->db_tab->mutex);
+ mutex_unlock(&dev->db_tab->mutex);
}
int mthca_init_db_tab(struct mthca_dev *dev)
@@ -481,9 +708,9 @@ int mthca_init_db_tab(struct mthca_dev *dev)
if (!dev->db_tab)
return -ENOMEM;
- init_MUTEX(&dev->db_tab->mutex);
+ mutex_init(&dev->db_tab->mutex);
- dev->db_tab->npages = dev->uar_table.uarc_size / 4096;
+ dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
dev->db_tab->max_group1 = 0;
dev->db_tab->min_group2 = dev->db_tab->npages - 1;
@@ -504,7 +731,6 @@ int mthca_init_db_tab(struct mthca_dev *dev)
void mthca_cleanup_db_tab(struct mthca_dev *dev)
{
int i;
- u8 status;
if (!mthca_is_memfree(dev))
return;
@@ -522,9 +748,9 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1);
- dma_free_coherent(&dev->pdev->dev, 4096,
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
dev->db_tab->page[i].db_rec,
dev->db_tab->page[i].mapping);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index fe7be2a6bc4..da9b8f9b884 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,22 +30,24 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#ifndef MTHCA_MEMFREE_H
#define MTHCA_MEMFREE_H
#include <linux/list.h>
-#include <linux/pci.h>
-
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
#define MTHCA_ICM_CHUNK_LEN \
((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
(sizeof (struct scatterlist)))
+enum {
+ MTHCA_ICM_PAGE_SHIFT = 12,
+ MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT,
+ MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8
+};
+
struct mthca_icm_chunk {
struct list_head list;
int npages;
@@ -62,7 +66,8 @@ struct mthca_icm_table {
int num_obj;
int obj_size;
int lowmem;
- struct semaphore mutex;
+ int coherent;
+ struct mutex mutex;
struct mthca_icm *icm[0];
};
@@ -75,17 +80,17 @@ struct mthca_icm_iter {
struct mthca_dev;
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
- unsigned int gfp_mask);
-void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
+ gfp_t gfp_mask, int coherent);
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
- int use_lowmem);
+ int use_lowmem, int use_coherent);
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
-void *mthca_table_find(struct mthca_icm_table *table, int obj);
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end);
void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
@@ -130,13 +135,9 @@ static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
-enum {
- MTHCA_DB_REC_PER_PAGE = 4096 / 8
-};
-
struct mthca_db_page {
DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
- u64 *db_rec;
+ __be64 *db_rec;
dma_addr_t mapping;
};
@@ -145,10 +146,10 @@ struct mthca_db_table {
int max_group1;
int min_group2;
struct mthca_db_page *page;
- struct semaphore mutex;
+ struct mutex mutex;
};
-enum {
+enum mthca_db_type {
MTHCA_DB_TYPE_INVALID = 0x0,
MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
MTHCA_DB_TYPE_CQ_ARM = 0x2,
@@ -158,9 +159,21 @@ enum {
MTHCA_DB_TYPE_GROUP_SEP = 0x7
};
+struct mthca_user_db_table;
+struct mthca_uar;
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr);
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index);
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab);
+
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db);
void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 8960fc2306b..ed9a989e501 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,34 +29,37 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/slab.h>
-#include <linux/init.h>
#include <linux/errno.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
+struct mthca_mtt {
+ struct mthca_buddy *buddy;
+ int order;
+ u32 first_seg;
+};
+
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
struct mthca_mpt_entry {
- u32 flags;
- u32 page_size;
- u32 key;
- u32 pd;
- u64 start;
- u64 length;
- u32 lkey;
- u32 window_count;
- u32 window_count_limit;
- u64 mtt_seg;
- u32 mtt_sz; /* Arbel only */
- u32 reserved[2];
+ __be32 flags;
+ __be32 page_size;
+ __be32 key;
+ __be32 pd;
+ __be64 start;
+ __be64 length;
+ __be32 lkey;
+ __be32 window_count;
+ __be32 window_count_limit;
+ __be64 mtt_seg;
+ __be32 mtt_sz; /* Arbel only */
+ u32 reserved[2];
} __attribute__((packed));
#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28)
@@ -69,6 +73,8 @@ struct mthca_mpt_entry {
#define MTHCA_MPT_STATUS_SW 0xF0
#define MTHCA_MPT_STATUS_HW 0x00
+#define SINAI_FMR_KEY_INC 0x1000000
+
/*
* Buddy allocator for MTT segments (currently not very efficient
* since it doesn't keep a free list and just searches linearly
@@ -83,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
spin_lock(&buddy->lock);
- for (o = order; o <= buddy->max_order; ++o) {
- m = 1 << (buddy->max_order - o);
- seg = find_first_bit(buddy->bits[o], m);
- if (seg < m)
- goto found;
- }
+ for (o = order; o <= buddy->max_order; ++o)
+ if (buddy->num_free[o]) {
+ m = 1 << (buddy->max_order - o);
+ seg = find_first_bit(buddy->bits[o], m);
+ if (seg < m)
+ goto found;
+ }
spin_unlock(&buddy->lock);
return -1;
found:
clear_bit(seg, buddy->bits[o]);
+ --buddy->num_free[o];
while (o > order) {
--o;
seg <<= 1;
set_bit(seg ^ 1, buddy->bits[o]);
+ ++buddy->num_free[o];
}
spin_unlock(&buddy->lock);
@@ -117,29 +126,31 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
while (test_bit(seg ^ 1, buddy->bits[order])) {
clear_bit(seg ^ 1, buddy->bits[order]);
+ --buddy->num_free[order];
seg >>= 1;
++order;
}
set_bit(seg, buddy->bits[order]);
+ ++buddy->num_free[order];
spin_unlock(&buddy->lock);
}
-static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
+static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
{
int i, s;
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
- buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
+ buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
GFP_KERNEL);
- if (!buddy->bits)
+ buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
+ GFP_KERNEL);
+ if (!buddy->bits || !buddy->num_free)
goto err_out;
- memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
-
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
@@ -150,6 +161,7 @@ static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
}
set_bit(0, buddy->bits[buddy->max_order]);
+ buddy->num_free[buddy->max_order] = 1;
return 0;
@@ -157,13 +169,14 @@ err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
+err_out:
kfree(buddy->bits);
+ kfree(buddy->num_free);
-err_out:
return -ENOMEM;
}
-static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
+static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
{
int i;
@@ -171,10 +184,11 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
kfree(buddy->bits[i]);
kfree(buddy->bits);
+ kfree(buddy->num_free);
}
-static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
- struct mthca_buddy *buddy)
+static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
+ struct mthca_buddy *buddy)
{
u32 seg = mthca_buddy_alloc(buddy, order);
@@ -191,14 +205,181 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
return seg;
}
-static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
- struct mthca_buddy* buddy)
+static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
+ struct mthca_buddy *buddy)
{
- mthca_buddy_free(buddy, seg, order);
+ struct mthca_mtt *mtt;
+ int i;
- if (mthca_is_memfree(dev))
- mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
- seg + (1 << order) - 1);
+ if (size <= 0)
+ return ERR_PTR(-EINVAL);
+
+ mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->buddy = buddy;
+ mtt->order = 0;
+ for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
+ ++mtt->order;
+
+ mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
+ if (mtt->first_seg == -1) {
+ kfree(mtt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mtt;
+}
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
+{
+ return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
+}
+
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
+{
+ if (!mtt)
+ return;
+
+ mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
+
+ mthca_table_put_range(dev, dev->mr_table.mtt_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+
+ kfree(mtt);
+}
+
+static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ struct mthca_mailbox *mailbox;
+ __be64 *mtt_entry;
+ int err = 0;
+ int i;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mtt_entry = mailbox->buf;
+
+ while (list_len > 0) {
+ mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
+ mtt->first_seg * dev->limits.mtt_seg_size +
+ start_index * 8);
+ mtt_entry[1] = 0;
+ for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
+ mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+
+ /*
+ * If we have an odd number of entries to write, add
+ * one more dummy entry for firmware efficiency.
+ */
+ if (i & 1)
+ mtt_entry[i + 2] = 0;
+
+ err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
+ if (err) {
+ mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
+ goto out;
+ }
+
+ list_len -= i;
+ start_index += i;
+ buffer_list += i;
+ }
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_write_mtt_size(struct mthca_dev *dev)
+{
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+ !(dev->mthca_flags & MTHCA_FLAG_FMR))
+ /*
+ * Be friendly to WRITE_MTT command
+ * and leave two empty slots for the
+ * index and reserved fields of the
+ * mailbox.
+ */
+ return PAGE_SIZE / sizeof (u64) - 2;
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
+}
+
+static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
+ struct mthca_mtt *mtt, int start_index,
+ u64 *buffer_list, int list_len)
+{
+ u64 __iomem *mtts;
+ int i;
+
+ mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
+ start_index * sizeof (u64);
+ for (i = 0; i < list_len; ++i)
+ mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
+ mtts + i);
+}
+
+static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
+ struct mthca_mtt *mtt, int start_index,
+ u64 *buffer_list, int list_len)
+{
+ __be64 *mtts;
+ dma_addr_t dma_handle;
+ int i;
+ int s = start_index * sizeof (u64);
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
+ /* Require full segments */
+ BUG_ON(s % dev->limits.mtt_seg_size);
+
+ mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
+ s / dev->limits.mtt_seg_size, &dma_handle);
+
+ BUG_ON(!mtts);
+
+ dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+ list_len * sizeof (u64), DMA_TO_DEVICE);
+
+ for (i = 0; i < list_len; ++i)
+ mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
+
+ dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+ list_len * sizeof (u64), DMA_TO_DEVICE);
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ int size = mthca_write_mtt_size(dev);
+ int chunk;
+
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+ !(dev->mthca_flags & MTHCA_FLAG_FMR))
+ return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
+
+ while (list_len > 0) {
+ chunk = min(size, list_len);
+ if (mthca_is_memfree(dev))
+ mthca_arbel_write_mtt_seg(dev, mtt, start_index,
+ buffer_list, chunk);
+ else
+ mthca_tavor_write_mtt_seg(dev, mtt, start_index,
+ buffer_list, chunk);
+
+ list_len -= chunk;
+ start_index += chunk;
+ buffer_list += chunk;
+ }
+
+ return 0;
}
static inline u32 tavor_hw_index_to_key(u32 ind)
@@ -237,96 +418,29 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
return tavor_key_to_hw_index(key);
}
-int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
- u32 access, struct mthca_mr *mr)
+static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
{
- void *mailbox = NULL;
- struct mthca_mpt_entry *mpt_entry;
- u32 key;
- int err;
- u8 status;
-
- might_sleep();
-
- mr->order = -1;
- key = mthca_alloc(&dev->mr_table.mpt_alloc);
- if (key == -1)
- return -ENOMEM;
- mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
-
- if (mthca_is_memfree(dev)) {
- err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
- if (err)
- goto err_out_mpt_free;
- }
-
- mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox) {
- err = -ENOMEM;
- goto err_out_table;
- }
- mpt_entry = MAILBOX_ALIGN(mailbox);
-
- mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
- MTHCA_MPT_FLAG_MIO |
- MTHCA_MPT_FLAG_PHYSICAL |
- MTHCA_MPT_FLAG_REGION |
- access);
- mpt_entry->page_size = 0;
- mpt_entry->key = cpu_to_be32(key);
- mpt_entry->pd = cpu_to_be32(pd);
- mpt_entry->start = 0;
- mpt_entry->length = ~0ULL;
-
- memset(&mpt_entry->lkey, 0,
- sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
-
- err = mthca_SW2HW_MPT(dev, mpt_entry,
- key & (dev->limits.num_mpts - 1),
- &status);
- if (err) {
- mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
- goto err_out_table;
- } else if (status) {
- mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_table;
- }
-
- kfree(mailbox);
- return err;
-
-err_out_table:
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
-
-err_out_mpt_free:
- mthca_free(&dev->mr_table.mpt_alloc, key);
- kfree(mailbox);
- return err;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ return ((key << 20) & 0x800000) | (key & 0x7fffff);
+ else
+ return key;
}
-int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
- u64 *buffer_list, int buffer_size_shift,
- int list_len, u64 iova, u64 total_size,
- u32 access, struct mthca_mr *mr)
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+ u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
{
- void *mailbox;
- u64 *mtt_entry;
+ struct mthca_mailbox *mailbox;
struct mthca_mpt_entry *mpt_entry;
u32 key;
- int err = -ENOMEM;
- u8 status;
int i;
+ int err;
- might_sleep();
WARN_ON(buffer_size_shift >= 32);
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
+ key = adjust_key(dev, key);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
if (mthca_is_memfree(dev)) {
@@ -335,126 +449,103 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
goto err_out_mpt_free;
}
- for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
- i < list_len;
- i <<= 1, ++mr->order)
- ; /* nothing */
-
- mr->first_seg = mthca_alloc_mtt(dev, mr->order,
- &dev->mr_table.mtt_buddy);
- if (mr->first_seg == -1)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
goto err_out_table;
-
- /*
- * If list_len is odd, we add one more dummy entry for
- * firmware efficiency.
- */
- mailbox = kmalloc(max(sizeof *mpt_entry,
- (size_t) 8 * (list_len + (list_len & 1) + 2)) +
- MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
- goto err_out_free_mtt;
-
- mtt_entry = MAILBOX_ALIGN(mailbox);
-
- mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
- mr->first_seg * MTHCA_MTT_SEG_SIZE);
- mtt_entry[1] = 0;
- for (i = 0; i < list_len; ++i)
- mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
- MTHCA_MTT_FLAG_PRESENT);
- if (list_len & 1) {
- mtt_entry[i + 2] = 0;
- ++list_len;
}
-
- if (0) {
- mthca_dbg(dev, "Dumping MPT entry\n");
- for (i = 0; i < list_len + 2; ++i)
- printk(KERN_ERR "[%2d] %016llx\n",
- i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
- }
-
- err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
- if (err) {
- mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
- goto err_out_mailbox_free;
- }
- if (status) {
- mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_mailbox_free;
- }
-
- mpt_entry = MAILBOX_ALIGN(mailbox);
+ mpt_entry = mailbox->buf;
mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
MTHCA_MPT_FLAG_MIO |
MTHCA_MPT_FLAG_REGION |
access);
+ if (!mr->mtt)
+ mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
mpt_entry->key = cpu_to_be32(key);
mpt_entry->pd = cpu_to_be32(pd);
mpt_entry->start = cpu_to_be64(iova);
mpt_entry->length = cpu_to_be64(total_size);
+
memset(&mpt_entry->lkey, 0,
sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
- mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base +
- mr->first_seg * MTHCA_MTT_SEG_SIZE);
+
+ if (mr->mtt)
+ mpt_entry->mtt_seg =
+ cpu_to_be64(dev->mr_table.mtt_base +
+ mr->mtt->first_seg * dev->limits.mtt_seg_size);
if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+ printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
if ((i + 1) % 4 == 0)
printk("\n");
}
}
- err = mthca_SW2HW_MPT(dev, mpt_entry,
- key & (dev->limits.num_mpts - 1),
- &status);
- if (err)
+ err = mthca_SW2HW_MPT(dev, mailbox,
+ key & (dev->limits.num_mpts - 1));
+ if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
- else if (status) {
- mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
- status);
- err = -EINVAL;
+ goto err_out_mailbox;
}
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return err;
-err_out_mailbox_free:
- kfree(mailbox);
-
-err_out_free_mtt:
- mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
+err_out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
err_out_table:
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
err_out_mpt_free:
mthca_free(&dev->mr_table.mpt_alloc, key);
return err;
}
-/* Free mr or fmr */
-static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
- u32 first_seg, struct mthca_buddy *buddy)
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr)
{
- if (order >= 0)
- mthca_free_mtt(dev, first_seg, order, buddy);
+ mr->mtt = NULL;
+ return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
+}
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table,
- arbel_key_to_hw_index(lkey));
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr)
+{
+ int err;
+
+ mr->mtt = mthca_alloc_mtt(dev, list_len);
+ if (IS_ERR(mr->mtt))
+ return PTR_ERR(mr->mtt);
+
+ err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
+ if (err) {
+ mthca_free_mtt(dev, mr->mtt);
+ return err;
+ }
+
+ err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
+ total_size, access, mr);
+ if (err)
+ mthca_free_mtt(dev, mr->mtt);
+
+ return err;
+}
+
+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
+{
+ mthca_table_put(dev, dev->mr_table.mpt_table,
+ key_to_hw_index(dev, lkey));
mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
}
@@ -462,39 +553,29 @@ static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
{
int err;
- u8 status;
-
- might_sleep();
err = mthca_HW2SW_MPT(dev, NULL,
key_to_hw_index(dev, mr->ibmr.lkey) &
- (dev->limits.num_mpts - 1),
- &status);
+ (dev->limits.num_mpts - 1));
if (err)
mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
- else if (status)
- mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
- status);
- mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
- &dev->mr_table.mtt_buddy);
+ mthca_free_region(dev, mr->ibmr.lkey);
+ mthca_free_mtt(dev, mr->mtt);
}
int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_fmr *mr)
{
struct mthca_mpt_entry *mpt_entry;
- void *mailbox;
+ struct mthca_mailbox *mailbox;
u64 mtt_seg;
u32 key, idx;
- u8 status;
int list_len = mr->attr.max_pages;
int err = -ENOMEM;
int i;
- might_sleep();
-
- if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
+ if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
return -EINVAL;
/* For Arbel, all MTTs must fit in the same page. */
@@ -507,6 +588,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
+ key = adjust_key(dev, key);
idx = key & (dev->limits.num_mpts - 1);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
@@ -516,44 +598,42 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
if (err)
goto err_out_mpt_free;
- mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
+ mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
BUG_ON(!mr->mem.arbel.mpt);
} else
mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
- sizeof *(mr->mem.tavor.mpt) * idx;
+ sizeof *(mr->mem.tavor.mpt) * idx;
- for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
- i < list_len;
- i <<= 1, ++mr->order)
- ; /* nothing */
-
- mr->first_seg = mthca_alloc_mtt(dev, mr->order,
- dev->mr_table.fmr_mtt_buddy);
- if (mr->first_seg == -1)
+ mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
+ if (IS_ERR(mr->mtt)) {
+ err = PTR_ERR(mr->mtt);
goto err_out_table;
+ }
- mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
+ mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
- mr->first_seg);
+ mr->mtt->first_seg,
+ &mr->mem.arbel.dma_handle);
BUG_ON(!mr->mem.arbel.mtts);
} else
mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
- mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
goto err_out_free_mtt;
+ }
- mpt_entry = MAILBOX_ALIGN(mailbox);
+ mpt_entry = mailbox->buf;
mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
MTHCA_MPT_FLAG_MIO |
MTHCA_MPT_FLAG_REGION |
access);
- mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
+ mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
mpt_entry->key = cpu_to_be32(key);
mpt_entry->pd = cpu_to_be32(pd);
memset(&mpt_entry->start, 0,
@@ -565,42 +645,33 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+ printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
if ((i + 1) % 4 == 0)
printk("\n");
}
}
- err = mthca_SW2HW_MPT(dev, mpt_entry,
- key & (dev->limits.num_mpts - 1),
- &status);
+ err = mthca_SW2HW_MPT(dev, mailbox,
+ key & (dev->limits.num_mpts - 1));
if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_out_mailbox_free;
}
- if (status) {
- mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_mailbox_free;
- }
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return 0;
err_out_mailbox_free:
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
err_out_free_mtt:
- mthca_free_mtt(dev, mr->first_seg, mr->order,
- dev->mr_table.fmr_mtt_buddy);
+ mthca_free_mtt(dev, mr->mtt);
err_out_table:
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
err_out_mpt_free:
- mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ mthca_free(&dev->mr_table.mpt_alloc, key);
return err;
}
@@ -609,8 +680,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
if (fmr->maps)
return -EBUSY;
- mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
- dev->mr_table.fmr_mtt_buddy);
+ mthca_free_region(dev, fmr->ibmr.lkey);
+ mthca_free_mtt(dev, fmr->mtt);
+
return 0;
}
@@ -622,7 +694,7 @@ static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
if (list_len > fmr->attr.max_pages)
return -EINVAL;
- page_mask = (1 << fmr->attr.page_size) - 1;
+ page_mask = (1 << fmr->attr.page_shift) - 1;
/* We are getting page lists, so va must be page aligned. */
if (iova & page_mask)
@@ -670,10 +742,10 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
}
mpt_entry.lkey = cpu_to_be32(key);
- mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+ mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
mpt_entry.start = cpu_to_be64(iova);
- writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
+ __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
offsetof(struct mthca_mpt_entry, window_count) -
offsetof(struct mthca_mpt_entry, start));
@@ -698,20 +770,29 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
++fmr->maps;
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- key += dev->limits.num_mpts;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ key += SINAI_FMR_KEY_INC;
+ else
+ key += dev->limits.num_mpts;
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
wmb();
+ dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+ list_len * sizeof(u64), DMA_TO_DEVICE);
+
for (i = 0; i < list_len; ++i)
fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
MTHCA_MTT_FLAG_PRESENT);
+ dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+ list_len * sizeof(u64), DMA_TO_DEVICE);
+
fmr->mem.arbel.mpt->key = cpu_to_be32(key);
fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
- fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+ fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
wmb();
@@ -725,15 +806,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
- u32 key;
-
if (!fmr->maps)
return;
- key = tavor_key_to_hw_index(fmr->ibmr.lkey);
- key &= dev->limits.num_mpts - 1;
- fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
-
fmr->maps = 0;
writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
@@ -741,23 +816,18 @@ void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
- u32 key;
-
if (!fmr->maps)
return;
- key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- key &= dev->limits.num_mpts - 1;
- fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
-
fmr->maps = 0;
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
}
-int __devinit mthca_init_mr_table(struct mthca_dev *dev)
+int mthca_init_mr_table(struct mthca_dev *dev)
{
- int err, i;
+ phys_addr_t addr;
+ int mpts, mtts, err, i;
err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
dev->limits.num_mpts,
@@ -771,6 +841,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
else
dev->mthca_flags |= MTHCA_FLAG_FMR;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ mthca_dbg(dev, "Memory key throughput optimization activated.\n");
+
err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
fls(dev->limits.num_mtt_segs - 1));
@@ -788,10 +861,21 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
err = -EINVAL;
goto err_fmr_mpt;
}
+ mpts = mtts = 1 << i;
+ } else {
+ mtts = dev->limits.num_mtt_segs;
+ mpts = dev->limits.num_mpts;
+ }
+
+ if (!mthca_is_memfree(dev) &&
+ (dev->mthca_flags & MTHCA_FLAG_FMR)) {
+
+ addr = pci_resource_start(dev->pdev, 4) +
+ ((pci_resource_len(dev->pdev, 4) - 1) &
+ dev->mr_table.mpt_base);
dev->mr_table.tavor_fmr.mpt_base =
- ioremap(dev->mr_table.mpt_base,
- (1 << i) * sizeof (struct mthca_mpt_entry));
+ ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
if (!dev->mr_table.tavor_fmr.mpt_base) {
mthca_warn(dev, "MPT ioremap for FMR failed.\n");
@@ -799,26 +883,31 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
goto err_fmr_mpt;
}
+ addr = pci_resource_start(dev->pdev, 4) +
+ ((pci_resource_len(dev->pdev, 4) - 1) &
+ dev->mr_table.mtt_base);
+
dev->mr_table.tavor_fmr.mtt_base =
- ioremap(dev->mr_table.mtt_base,
- (1 << i) * MTHCA_MTT_SEG_SIZE);
+ ioremap(addr, mtts * dev->limits.mtt_seg_size);
if (!dev->mr_table.tavor_fmr.mtt_base) {
mthca_warn(dev, "MTT ioremap for FMR failed.\n");
err = -ENOMEM;
goto err_fmr_mtt;
}
+ }
- err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
+ if (dev->limits.fmr_reserved_mtts) {
+ err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
if (err)
goto err_fmr_mtt_buddy;
/* Prevent regular MRs from using FMR keys */
- err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
+ err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
if (err)
goto err_reserve_fmr;
dev->mr_table.fmr_mtt_buddy =
- &dev->mr_table.tavor_fmr.mtt_buddy;
+ &dev->mr_table.tavor_fmr.mtt_buddy;
} else
dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
@@ -826,7 +915,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
if (dev->limits.reserved_mtts) {
i = fls(dev->limits.reserved_mtts - 1);
- if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
+ if (mthca_alloc_mtt_range(dev, i,
+ dev->mr_table.fmr_mtt_buddy) == -1) {
mthca_warn(dev, "MTT table of order %d is too small.\n",
dev->mr_table.fmr_mtt_buddy->max_order);
err = -ENOMEM;
@@ -858,7 +948,7 @@ err_mtt_buddy:
return err;
}
-void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
+void mthca_cleanup_mr_table(struct mthca_dev *dev)
{
/* XXX check if any MRs are still allocated? */
if (dev->limits.fmr_reserved_mtts)
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index ea66847e4ea..266f14e4740 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,44 +30,43 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include "mthca_dev.h"
-int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
{
- int err;
+ int err = 0;
- might_sleep();
+ pd->privileged = privileged;
atomic_set(&pd->sqp_count, 0);
pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
if (pd->pd_num == -1)
return -ENOMEM;
- err = mthca_mr_alloc_notrans(dev, pd->pd_num,
- MTHCA_MPT_FLAG_LOCAL_READ |
- MTHCA_MPT_FLAG_LOCAL_WRITE,
- &pd->ntmr);
- if (err)
- mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ if (privileged) {
+ err = mthca_mr_alloc_notrans(dev, pd->pd_num,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ MTHCA_MPT_FLAG_LOCAL_WRITE,
+ &pd->ntmr);
+ if (err)
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ }
return err;
}
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
{
- might_sleep();
- mthca_free_mr(dev, &pd->ntmr);
+ if (pd->privileged)
+ mthca_free_mr(dev, &pd->ntmr);
mthca_free(&dev->pd_table.alloc, pd->pd_num);
}
-int __devinit mthca_init_pd_table(struct mthca_dev *dev)
+int mthca_init_pd_table(struct mthca_dev *dev)
{
return mthca_alloc_init(&dev->pd_table.alloc,
dev->limits.num_pds,
@@ -73,7 +74,7 @@ int __devinit mthca_init_pd_table(struct mthca_dev *dev)
dev->limits.reserved_pds);
}
-void __devexit mthca_cleanup_pd_table(struct mthca_dev *dev)
+void mthca_cleanup_pd_table(struct mthca_dev *dev)
{
/* XXX check if any PDs are still allocated? */
mthca_alloc_cleanup(&dev->pd_table.alloc);
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 4fedc32d587..8edb28a9a0e 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,12 +29,12 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include "mthca_profile.h"
@@ -60,7 +61,7 @@ enum {
MTHCA_NUM_PDS = 1 << 15
};
-u64 mthca_make_profile(struct mthca_dev *dev,
+s64 mthca_make_profile(struct mthca_dev *dev,
struct mthca_profile *request,
struct mthca_dev_lim *dev_lim,
struct mthca_init_hca_param *init_hca)
@@ -74,17 +75,15 @@ u64 mthca_make_profile(struct mthca_dev *dev,
};
u64 mem_base, mem_avail;
- u64 total_size = 0;
+ s64 total_size = 0;
struct mthca_resource *profile;
struct mthca_resource tmp;
int i, j;
- profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
+ profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
if (!profile)
return -ENOMEM;
- memset(profile, 0, MTHCA_RES_NUM * sizeof *profile);
-
profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz;
profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz;
profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz;
@@ -95,12 +94,13 @@ u64 mthca_make_profile(struct mthca_dev *dev,
profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
- profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE;
+ profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size;
profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
profile[MTHCA_RES_UARC].size = request->uarc_size;
profile[MTHCA_RES_QP].num = request->num_qp;
+ profile[MTHCA_RES_SRQ].num = request->num_srq;
profile[MTHCA_RES_EQP].num = request->num_qp;
profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp;
profile[MTHCA_RES_CQ].num = request->num_cq;
@@ -150,7 +150,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
}
if (total_size > mem_avail) {
mthca_err(dev, "Profile requires 0x%llx bytes; "
- "won't in 0x%llx bytes of context memory.\n",
+ "won't fit in 0x%llx bytes of context memory.\n",
(unsigned long long) total_size,
(unsigned long long) mem_avail);
kfree(profile);
@@ -232,7 +232,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
dev->limits.num_mtt_segs = profile[i].num;
dev->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
- init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+ init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7;
break;
case MTHCA_RES_UAR:
dev->limits.num_uars = profile[i].num;
@@ -260,6 +260,14 @@ u64 mthca_make_profile(struct mthca_dev *dev,
*/
dev->limits.num_pds = MTHCA_NUM_PDS;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
+ init_hca->log_mpt_sz > 23) {
+ mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
+ init_hca->log_mpt_sz);
+ mthca_warn(dev, "Disabling memory key throughput optimization.\n");
+ dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
+ }
+
/*
* For Tavor, FMRs use ioremapped PCI memory. For 32 bit
* systems it may use too much vmalloc space to map all MTT
@@ -267,7 +275,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
* out of the MR pool. They don't use additional memory, but
* we assign them as part of the HCA profile anyway.
*/
- if (mthca_is_memfree(dev))
+ if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
dev->limits.fmr_reserved_mtts = 0;
else
dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
index 17aef335766..62b009cc873 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_PROFILE_H
@@ -41,6 +40,7 @@
struct mthca_profile {
int num_qp;
int rdb_per_qp;
+ int num_srq;
int num_cq;
int num_mcg;
int num_mpt;
@@ -51,7 +51,7 @@ struct mthca_profile {
int fmr_reserved_mtts;
};
-u64 mthca_make_profile(struct mthca_dev *mdev,
+s64 mthca_make_profile(struct mthca_dev *mdev,
struct mthca_profile *request,
struct mthca_dev_lim *dev_lim,
struct mthca_init_hca_param *init_hca);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 159f4e6c312..415f8e1a54d 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,5 +1,9 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,14 +32,30 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
*/
-#include <ib_smi.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/export.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
+#include "mthca_user.h"
+#include "mthca_memfree.h"
+
+static void init_query_mad(struct ib_smp *mad)
+{
+ mad->base_version = 1;
+ mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->class_version = 1;
+ mad->method = IB_MGMT_METHOD_GET;
+}
static int mthca_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
@@ -43,55 +63,65 @@ static int mthca_query_device(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- struct mthca_dev* mdev = to_mdev(ibdev);
+ struct mthca_dev *mdev = to_mdev(ibdev);
- u8 status;
-
- in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof props);
+ memset(props, 0, sizeof *props);
props->fw_ver = mdev->fw_ver;
- memset(in_mad, 0, sizeof *in_mad);
- in_mad->base_version = 1;
- in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- in_mad->class_version = 1;
- in_mad->method = IB_MGMT_METHOD_GET;
- in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(mdev, 1, 1,
- 1, NULL, NULL, in_mad, out_mad,
- &status);
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
props->device_cap_flags = mdev->device_cap_flags;
- props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) &
+ props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30));
- props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32));
+ props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
- memcpy(&props->node_guid, out_mad->data + 12, 8);
props->max_mr_size = ~0ull;
+ props->page_size_cap = mdev->limits.page_size_cap;
props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
- props->max_qp_wr = 0xffff;
+ props->max_qp_wr = mdev->limits.max_wqes;
props->max_sge = mdev->limits.max_sg;
props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
- props->max_cqe = 0xffff;
+ props->max_cqe = mdev->limits.max_cqes;
props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
- props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
+ props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
+ props->max_srq_wr = mdev->limits.max_srq_wqes;
+ props->max_srq_sge = mdev->limits.max_srq_sge;
props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
+ props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = mdev->limits.pkey_table_len;
+ props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
+ props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+ /*
+ * If Sinai memory key optimization is being used, then only
+ * the 8-bit key portion will change. For other HCAs, the
+ * unused index bits will also be used for FMR remapping.
+ */
+ if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ props->max_map_per_fmr = 255;
+ else
+ props->max_map_per_fmr =
+ (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1;
err = 0;
out:
@@ -106,43 +136,42 @@ static int mthca_query_port(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
- in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
- memset(in_mad, 0, sizeof *in_mad);
- in_mad->base_version = 1;
- in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- in_mad->class_version = 1;
- in_mad->method = IB_MGMT_METHOD_GET;
- in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
- props->lid = be16_to_cpup((u16 *) (out_mad->data + 16));
+ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
- props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18));
+ props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
props->sm_sl = out_mad->data[36] & 0xf;
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
- props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20));
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
+ props->max_msg_sz = 0x80000000;
props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
- props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48));
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
out:
kfree(in_mad);
@@ -150,6 +179,23 @@ static int mthca_query_port(struct ib_device *ibdev,
return err;
}
+static int mthca_modify_device(struct ib_device *ibdev,
+ int mask,
+ struct ib_device_modify *props)
+{
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ return -ERESTARTSYS;
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+ }
+
+ return 0;
+}
+
static int mthca_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
struct ib_port_modify *props)
@@ -157,9 +203,8 @@ static int mthca_modify_port(struct ib_device *ibdev,
struct mthca_set_ib_param set_ib;
struct ib_port_attr attr;
int err;
- u8 status;
- if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
err = mthca_query_port(ibdev, port, &attr);
@@ -172,16 +217,11 @@ static int mthca_modify_port(struct ib_device *ibdev,
set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
- err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
+ err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
-
out:
- up(&to_mdev(ibdev)->cap_mask_mutex);
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
return err;
}
@@ -191,32 +231,22 @@ static int mthca_query_pkey(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
- in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
- memset(in_mad, 0, sizeof *in_mad);
- in_mad->base_version = 1;
- in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- in_mad->class_version = 1;
- in_mad->method = IB_MGMT_METHOD_GET;
- in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
- in_mad->attr_mod = cpu_to_be32(index / 32);
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
+ in_mad->attr_mod = cpu_to_be32(index / 32);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
- *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]);
+ *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
out:
kfree(in_mad);
@@ -230,52 +260,33 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
- in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
- memset(in_mad, 0, sizeof *in_mad);
- in_mad->base_version = 1;
- in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- in_mad->class_version = 1;
- in_mad->method = IB_MGMT_METHOD_GET;
- in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
memcpy(gid->raw, out_mad->data + 8, 8);
- memset(in_mad, 0, sizeof *in_mad);
- in_mad->base_version = 1;
- in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- in_mad->class_version = 1;
- in_mad->method = IB_MGMT_METHOD_GET;
- in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
- in_mad->attr_mod = cpu_to_be32(index / 8);
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(index / 8);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
- memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
+ memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
kfree(in_mad);
@@ -283,7 +294,83 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
return err;
}
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
+static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mthca_alloc_ucontext_resp uresp;
+ struct mthca_ucontext *context;
+ int err;
+
+ if (!(to_mdev(ibdev)->active))
+ return ERR_PTR(-EAGAIN);
+
+ memset(&uresp, 0, sizeof uresp);
+
+ uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
+ if (mthca_is_memfree(to_mdev(ibdev)))
+ uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
+ else
+ uresp.uarc_size = 0;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
+ if (IS_ERR(context->db_tab)) {
+ err = PTR_ERR(context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ context->reg_mr_warned = 0;
+
+ return &context->ibucontext;
+}
+
+static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+{
+ mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab);
+ mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
+ kfree(to_mucontext(context));
+
+ return 0;
+}
+
+static int mthca_mmap_uar(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
struct mthca_pd *pd;
int err;
@@ -292,12 +379,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
if (!pd)
return ERR_PTR(-ENOMEM);
- err = mthca_pd_alloc(to_mdev(ibdev), pd);
+ err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
+ mthca_pd_free(to_mdev(ibdev), pd);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
return &pd->ibpd;
}
@@ -336,52 +431,176 @@ static int mthca_ah_destroy(struct ib_ah *ah)
return 0;
}
+static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mthca_create_srq ucmd;
+ struct mthca_ucontext *context = NULL;
+ struct mthca_srq *srq;
+ int err;
+
+ if (init_attr->srq_type != IB_SRQT_BASIC)
+ return ERR_PTR(-ENOSYS);
+
+ srq = kmalloc(sizeof *srq, GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab, ucmd.db_index,
+ ucmd.db_page);
+
+ if (err)
+ goto err_free;
+
+ srq->mr.ibmr.lkey = ucmd.lkey;
+ srq->db_index = ucmd.db_index;
+ }
+
+ err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
+ &init_attr->attr, srq);
+
+ if (err && pd->uobject)
+ mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab, ucmd.db_index);
+
+ if (err)
+ goto err_free;
+
+ if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
+ mthca_free_srq(to_mdev(pd->device), srq);
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ return &srq->ibsrq;
+
+err_free:
+ kfree(srq);
+
+ return ERR_PTR(err);
+}
+
+static int mthca_destroy_srq(struct ib_srq *srq)
+{
+ struct mthca_ucontext *context;
+
+ if (srq->uobject) {
+ context = to_mucontext(srq->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
+ context->db_tab, to_msrq(srq)->db_index);
+ }
+
+ mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
+ kfree(srq);
+
+ return 0;
+}
+
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct mthca_create_qp ucmd;
struct mthca_qp *qp;
int err;
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
switch (init_attr->qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
{
+ struct mthca_ucontext *context;
+
qp = kmalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->sq.max = init_attr->cap.max_send_wr;
- qp->rq.max = init_attr->cap.max_recv_wr;
- qp->sq.max_gs = init_attr->cap.max_send_sge;
- qp->rq.max_gs = init_attr->cap.max_recv_sge;
+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ kfree(qp);
+ return ERR_PTR(-EFAULT);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index, ucmd.sq_db_page);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index, ucmd.rq_db_page);
+ if (err) {
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->mr.ibmr.lkey = ucmd.lkey;
+ qp->sq.db_index = ucmd.sq_db_index;
+ qp->rq.db_index = ucmd.rq_db_index;
+ }
err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- qp);
+ &init_attr->cap, qp);
+
+ if (err && pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index);
+ }
+
qp->ibqp.qp_num = qp->qpn;
break;
}
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ /* Don't allow userspace to create special QPs */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->sq.max = init_attr->cap.max_send_wr;
- qp->rq.max = init_attr->cap.max_recv_wr;
- qp->sq.max_gs = init_attr->cap.max_send_sge;
- qp->rq.max_gs = init_attr->cap.max_recv_sge;
-
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
- init_attr->sq_sig_type,
+ init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
to_msqp(qp));
break;
@@ -396,42 +615,243 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
return ERR_PTR(err);
}
- init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_send_wr = qp->sq.max;
+ init_attr->cap.max_recv_wr = qp->rq.max;
+ init_attr->cap.max_send_sge = qp->sq.max_gs;
+ init_attr->cap.max_recv_sge = qp->rq.max_gs;
+ init_attr->cap.max_inline_data = qp->max_inline_data;
return &qp->ibqp;
}
static int mthca_destroy_qp(struct ib_qp *qp)
{
+ if (qp->uobject) {
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->sq.db_index);
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->rq.db_index);
+ }
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
kfree(qp);
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
+ struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;
+ if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+ return ERR_PTR(-EINVAL);
+
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.set_db_index, ucmd.set_db_page);
+ if (err)
+ return ERR_PTR(err);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.arm_db_index, ucmd.arm_db_page);
+ if (err)
+ goto err_unmap_set;
+ }
+
cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err_unmap_arm;
+ }
+
+ if (context) {
+ cq->buf.mr.ibmr.lkey = ucmd.lkey;
+ cq->set_ci_db_index = ucmd.set_db_index;
+ cq->arm_db_index = ucmd.arm_db_index;
+ }
for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */
- err = mthca_init_cq(to_mdev(ibdev), nent, cq);
- if (err) {
- kfree(cq);
- cq = ERR_PTR(err);
+ err = mthca_init_cq(to_mdev(ibdev), nent,
+ context ? to_mucontext(context) : NULL,
+ context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ cq);
+ if (err)
+ goto err_free;
+
+ if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
+ mthca_free_cq(to_mdev(ibdev), cq);
+ err = -EFAULT;
+ goto err_free;
}
+ cq->resize_buf = NULL;
+
return &cq->ibcq;
+
+err_free:
+ kfree(cq);
+
+err_unmap_arm:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.arm_db_index);
+
+err_unmap_set:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.set_db_index);
+
+ return ERR_PTR(err);
+}
+
+static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
+ int entries)
+{
+ int ret;
+
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ if (!cq->resize_buf) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ cq->resize_buf->state = CQ_RESIZE_ALLOC;
+
+ ret = 0;
+
+unlock:
+ spin_unlock_irq(&cq->lock);
+
+ if (ret)
+ return ret;
+
+ ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+ if (ret) {
+ spin_lock_irq(&cq->lock);
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+ return ret;
+ }
+
+ cq->resize_buf->cqe = entries - 1;
+
+ spin_lock_irq(&cq->lock);
+ cq->resize_buf->state = CQ_RESIZE_READY;
+ spin_unlock_irq(&cq->lock);
+
+ return 0;
+}
+
+static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibcq->device);
+ struct mthca_cq *cq = to_mcq(ibcq);
+ struct mthca_resize_cq ucmd;
+ u32 lkey;
+ int ret;
+
+ if (entries < 1 || entries > dev->limits.max_cqes)
+ return -EINVAL;
+
+ mutex_lock(&cq->mutex);
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries == ibcq->cqe + 1) {
+ ret = 0;
+ goto out;
+ }
+
+ if (cq->is_kernel) {
+ ret = mthca_alloc_resize_buf(dev, cq, entries);
+ if (ret)
+ goto out;
+ lkey = cq->resize_buf->buf.mr.ibmr.lkey;
+ } else {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ lkey = ucmd.lkey;
+ }
+
+ ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries));
+
+ if (ret) {
+ if (cq->resize_buf) {
+ mthca_free_cq_buf(dev, &cq->resize_buf->buf,
+ cq->resize_buf->cqe);
+ kfree(cq->resize_buf);
+ spin_lock_irq(&cq->lock);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+ }
+ goto out;
+ }
+
+ if (cq->is_kernel) {
+ struct mthca_cq_buf tbuf;
+ int tcqe;
+
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf->state == CQ_RESIZE_READY) {
+ mthca_cq_resize_copy_cqes(cq);
+ tbuf = cq->buf;
+ tcqe = cq->ibcq.cqe;
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+ } else {
+ tbuf = cq->resize_buf->buf;
+ tcqe = cq->resize_buf->cqe;
+ }
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+
+ mthca_free_cq_buf(dev, &tbuf, tcqe);
+ } else
+ ibcq->cqe = entries - 1;
+
+out:
+ mutex_unlock(&cq->mutex);
+
+ return ret;
}
static int mthca_destroy_cq(struct ib_cq *cq)
{
+ if (cq->uobject) {
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->arm_db_index);
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->set_ci_db_index);
+ }
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
kfree(cq);
@@ -465,6 +885,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err);
}
+ mr->umem = NULL;
+
return &mr->ibmr;
}
@@ -477,45 +899,27 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
struct mthca_mr *mr;
u64 *page_list;
u64 total_size;
- u64 mask;
+ unsigned long mask;
int shift;
int npages;
int err;
int i, j, n;
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
- return ERR_PTR(-EINVAL);
-
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
- return ERR_PTR(-EINVAL);
-
- mask = 0;
+ mask = buffer_list[0].addr ^ *iova_start;
total_size = 0;
for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return ERR_PTR(-EINVAL);
+ if (i != 0)
+ mask |= buffer_list[i].addr;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
}
- /* Find largest page shift we can use to cover buffers */
- for (shift = PAGE_SHIFT; shift < 31; ++shift)
- if (num_phys_buf > 1) {
- if ((1ULL << shift) & mask)
- break;
- } else {
- if (1ULL << shift >=
- buffer_list[0].size +
- (buffer_list[0].addr & ((1ULL << shift) - 1)))
- break;
- }
+ if (mask & ~PAGE_MASK)
+ return ERR_PTR(-EINVAL);
+
+ shift = __ffs(mask | 1 << 31);
buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
buffer_list[0].addr &= ~0ull << shift;
@@ -558,19 +962,126 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
convert_access(acc), mr);
if (err) {
+ kfree(page_list);
kfree(mr);
return ERR_PTR(err);
}
kfree(page_list);
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+}
+
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(pd->device);
+ struct scatterlist *sg;
+ struct mthca_mr *mr;
+ struct mthca_reg_mr ucmd;
+ u64 *pages;
+ int shift, n, len;
+ int i, k, entry;
+ int err = 0;
+ int write_mtt_size;
+
+ if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
+ if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+ mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
+ current->comm);
+ mthca_warn(dev, " Update libmthca to fix this.\n");
+ }
+ ++to_mucontext(pd->uobject->context)->reg_mr_warned;
+ ucmd.mr_attrs = 0;
+ } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
+ ucmd.mr_attrs & MTHCA_MR_DMASYNC);
+
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err;
+ }
+
+ shift = ffs(mr->umem->page_size) - 1;
+ n = mr->umem->nmap;
+
+ mr->mtt = mthca_alloc_mtt(dev, n);
+ if (IS_ERR(mr->mtt)) {
+ err = PTR_ERR(mr->mtt);
+ goto err_umem;
+ }
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
+
+ i = n = 0;
+
+ write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
+
+ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ mr->umem->page_size * k;
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+mtt_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_mtt;
+
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
+ convert_access(acc), mr);
+
+ if (err)
+ goto err_mtt;
+
return &mr->ibmr;
+
+err_mtt:
+ mthca_free_mtt(dev, mr->mtt);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err:
+ kfree(mr);
+ return ERR_PTR(err);
}
static int mthca_dereg_mr(struct ib_mr *mr)
{
struct mthca_mr *mmr = to_mmr(mr);
+
mthca_free_mr(to_mdev(mr->device), mmr);
+ if (mmr->umem)
+ ib_umem_release(mmr->umem);
kfree(mmr);
+
return 0;
}
@@ -613,7 +1124,6 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
{
struct ib_fmr *fmr;
int err;
- u8 status;
struct mthca_dev *mdev = NULL;
list_for_each_entry(fmr, fmr_list, list) {
@@ -634,31 +1144,33 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
list_for_each_entry(fmr, fmr_list, list)
mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
- err = mthca_SYNC_TPT(mdev, &status);
- if (err)
- return err;
- if (status)
- return -EINVAL;
- return 0;
+ err = mthca_SYNC_TPT(mdev);
+ return err;
}
-static ssize_t show_rev(struct class_device *cdev, char *buf)
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
{
- struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->rev_id);
}
-static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
{
- struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
- return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
+ return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);
}
-static ssize_t show_hca(struct class_device *cdev, char *buf)
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
{
- struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
switch (dev->pdev->device) {
case PCI_DEVICE_ID_MELLANOX_TAVOR:
return sprintf(buf, "MT23108\n");
@@ -674,43 +1186,142 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
}
}
-static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
+ return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-static struct class_device_attribute *mthca_class_attributes[] = {
- &class_device_attr_hw_rev,
- &class_device_attr_fw_ver,
- &class_device_attr_hca_type
+static struct device_attribute *mthca_dev_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
};
+static int mthca_init_node_data(struct mthca_dev *dev)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+ err = mthca_MAD_IFC(dev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mthca_MAD_IFC(dev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ if (mthca_is_memfree(dev))
+ dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
+ memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
int i;
+ ret = mthca_init_node_data(dev);
+ if (ret)
+ return ret;
+
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
- dev->ib_dev.node_type = IB_NODE_CA;
+ dev->ib_dev.owner = THIS_MODULE;
+
+ dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
+ dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dma_device = &dev->pdev->dev;
- dev->ib_dev.class_dev.dev = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
+ dev->ib_dev.modify_device = mthca_modify_device;
dev->ib_dev.modify_port = mthca_modify_port;
dev->ib_dev.query_pkey = mthca_query_pkey;
dev->ib_dev.query_gid = mthca_query_gid;
+ dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
+ dev->ib_dev.mmap = mthca_mmap_uar;
dev->ib_dev.alloc_pd = mthca_alloc_pd;
dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
dev->ib_dev.create_ah = mthca_ah_create;
+ dev->ib_dev.query_ah = mthca_ah_query;
dev->ib_dev.destroy_ah = mthca_ah_destroy;
+
+ if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
+ dev->ib_dev.create_srq = mthca_create_srq;
+ dev->ib_dev.modify_srq = mthca_modify_srq;
+ dev->ib_dev.query_srq = mthca_query_srq;
+ dev->ib_dev.destroy_srq = mthca_destroy_srq;
+ dev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ if (mthca_is_memfree(dev))
+ dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
+ else
+ dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
+ }
+
dev->ib_dev.create_qp = mthca_create_qp;
dev->ib_dev.modify_qp = mthca_modify_qp;
+ dev->ib_dev.query_qp = mthca_query_qp;
dev->ib_dev.destroy_qp = mthca_destroy_qp;
dev->ib_dev.create_cq = mthca_create_cq;
+ dev->ib_dev.resize_cq = mthca_resize_cq;
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
@@ -737,25 +1348,28 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.post_recv = mthca_tavor_post_receive;
}
- init_MUTEX(&dev->cap_mask_mutex);
+ mutex_init(&dev->cap_mask_mutex);
- ret = ib_register_device(&dev->ib_dev);
+ ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
- for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
- ret = class_device_create_file(&dev->ib_dev.class_dev,
- mthca_class_attributes[i]);
+ for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
+ ret = device_create_file(&dev->ib_dev.dev,
+ mthca_dev_attributes[i]);
if (ret) {
ib_unregister_device(&dev->ib_dev);
return ret;
}
}
+ mthca_start_catas_poll(dev);
+
return 0;
}
void mthca_unregister_device(struct mthca_dev *dev)
{
+ mthca_stop_catas_poll(dev);
ib_unregister_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 619710f95a8..596acc45569 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,15 +30,13 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_PROVIDER_H
#define MTHCA_PROVIDER_H
-#include <ib_verbs.h>
-#include <ib_pack.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
#define MTHCA_MPT_FLAG_ATOMIC (1 << 14)
#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13)
@@ -46,7 +46,12 @@
struct mthca_buf_list {
void *buf;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+};
+
+union mthca_buf {
+ struct mthca_buf_list direct;
+ struct mthca_buf_list *page_list;
};
struct mthca_uar {
@@ -54,18 +59,28 @@ struct mthca_uar {
int index;
};
+struct mthca_user_db_table;
+
+struct mthca_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mthca_uar uar;
+ struct mthca_user_db_table *db_tab;
+ int reg_mr_warned;
+};
+
+struct mthca_mtt;
+
struct mthca_mr {
- struct ib_mr ibmr;
- int order;
- u32 first_seg;
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct mthca_mtt *mtt;
};
struct mthca_fmr {
- struct ib_fmr ibmr;
+ struct ib_fmr ibmr;
struct ib_fmr_attr attr;
- int order;
- u32 first_seg;
- int maps;
+ struct mthca_mtt *mtt;
+ int maps;
union {
struct {
struct mthca_mpt_entry __iomem *mpt;
@@ -74,6 +89,7 @@ struct mthca_fmr {
struct {
struct mthca_mpt_entry *mpt;
__be64 *mtts;
+ dma_addr_t dma_handle;
} arbel;
} mem;
};
@@ -83,6 +99,7 @@ struct mthca_pd {
u32 pd_num;
atomic_t sqp_count;
struct mthca_mr ntmr;
+ int privileged;
};
struct mthca_eq {
@@ -96,6 +113,7 @@ struct mthca_eq {
int nent;
struct mthca_buf_list *page_list;
struct mthca_mr mr;
+ char irq_name[IB_DEVICE_NAME_MAX];
};
struct mthca_av;
@@ -120,14 +138,15 @@ struct mthca_ah {
* We have one global lock that protects dev->cq/qp_table. Each
* struct mthca_cq/qp also has its own lock. An individual qp lock
* may be taken inside of an individual cq lock. Both cqs attached to
- * a qp may be locked, with the send cq locked first. No other
- * nesting should be done.
+ * a qp may be locked, with the cq with the lower cqn locked first.
+ * No other nesting should be done.
*
- * Each struct mthca_cq/qp also has an atomic_t ref count. The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
+ * Each struct mthca_cq/qp also has an ref count, protected by the
+ * corresponding table lock. The pointer from the cq/qp_table to the
+ * struct counts as one reference. This reference also is good for
+ * access through the consumer API, so modifying the CQ/QP etc doesn't
+ * need to take another reference. Access to a QP because of a
+ * completion being polled does not need a reference either.
*
* Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
* destroy function to sleep on.
@@ -143,14 +162,17 @@ struct mthca_ah {
* - decrement ref count; if zero, wake up waiters
*
* To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
+ * - lock cq/qp_table
+ * - remove pointer and decrement ref count
+ * - unlock cq/qp_table lock
* - wait_event until ref count is zero
*
* It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
+ * operations (WQE posting or state modification) are pending when a
* QP is destroyed. Also, the consumer must make sure that calls to
- * qp_modify are serialized.
+ * qp_modify are serialized. Similarly, the consumer is responsible
+ * for ensuring that no CQ resize operations are pending when a CQ
+ * is destroyed.
*
* Possible optimizations (wait for profile data to see if/where we
* have locks bouncing between CPUs):
@@ -160,27 +182,65 @@ struct mthca_ah {
* send queue and one for the receive queue)
*/
+struct mthca_cq_buf {
+ union mthca_buf queue;
+ struct mthca_mr mr;
+ int is_direct;
+};
+
+struct mthca_cq_resize {
+ struct mthca_cq_buf buf;
+ int cqe;
+ enum {
+ CQ_RESIZE_ALLOC,
+ CQ_RESIZE_READY,
+ CQ_RESIZE_SWAPPED
+ } state;
+};
+
struct mthca_cq {
- struct ib_cq ibcq;
- spinlock_t lock;
- atomic_t refcount;
- int cqn;
- u32 cons_index;
- int is_direct;
+ struct ib_cq ibcq;
+ spinlock_t lock;
+ int refcount;
+ int cqn;
+ u32 cons_index;
+ struct mthca_cq_buf buf;
+ struct mthca_cq_resize *resize_buf;
+ int is_kernel;
/* Next fields are Arbel only */
- int set_ci_db_index;
- u32 *set_ci_db;
- int arm_db_index;
- u32 *arm_db;
- int arm_sn;
+ int set_ci_db_index;
+ __be32 *set_ci_db;
+ int arm_db_index;
+ __be32 *arm_db;
+ int arm_sn;
+
+ wait_queue_head_t wait;
+ struct mutex mutex;
+};
- union {
- struct mthca_buf_list direct;
- struct mthca_buf_list *page_list;
- } queue;
- struct mthca_mr mr;
- wait_queue_head_t wait;
+struct mthca_srq {
+ struct ib_srq ibsrq;
+ spinlock_t lock;
+ int refcount;
+ int srqn;
+ int max;
+ int max_gs;
+ int wqe_shift;
+ int first_free;
+ int last_free;
+ u16 counter; /* Arbel only */
+ int db_index; /* Arbel only */
+ __be32 *db; /* Arbel only */
+ void *last;
+
+ int is_direct;
+ u64 *wrid;
+ union mthca_buf queue;
+ struct mthca_mr mr;
+
+ wait_queue_head_t wait;
+ struct mutex mutex;
};
struct mthca_wq {
@@ -195,14 +255,16 @@ struct mthca_wq {
int wqe_shift;
int db_index; /* Arbel only */
- u32 *db;
+ __be32 *db;
};
struct mthca_qp {
struct ib_qp ibqp;
- atomic_t refcount;
+ int refcount;
u32 qpn;
int is_direct;
+ u8 port; /* for SQP and memfree use only */
+ u8 alt_port; /* for memfree use only */
u8 transport;
u8 state;
u8 atomic_rd_en;
@@ -214,19 +276,17 @@ struct mthca_qp {
struct mthca_wq sq;
enum ib_sig_type sq_policy;
int send_wqe_offset;
+ int max_inline_data;
u64 *wrid;
- union {
- struct mthca_buf_list direct;
- struct mthca_buf_list *page_list;
- } queue;
+ union mthca_buf queue;
wait_queue_head_t wait;
+ struct mutex mutex;
};
struct mthca_sqp {
struct mthca_qp qp;
- int port;
int pkey_index;
u32 qkey;
u32 send_psn;
@@ -236,6 +296,11 @@ struct mthca_sqp {
dma_addr_t header_dma;
};
+static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mthca_ucontext, ibucontext);
+}
+
static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
{
return container_of(ibmr, struct mthca_fmr, ibmr);
@@ -261,6 +326,11 @@ static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
return container_of(ibcq, struct mthca_cq, ibcq);
}
+static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct mthca_srq, ibsrq);
+}
+
static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mthca_qp, ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index ca73bab11a0..e354b2f04ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1,5 +1,8 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -28,25 +31,30 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
*/
-#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
-#include <ib_verbs.h>
-#include <ib_cache.h>
-#include <ib_pack.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_pack.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
+#include "mthca_wqe.h"
enum {
MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
MTHCA_ACK_REQ_FREQ = 10,
MTHCA_FLIGHT_LIMIT = 9,
- MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */
+ MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */
+ MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */
+ MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */
};
enum {
@@ -91,63 +99,67 @@ enum {
MTHCA_QP_BIT_RSC = 1 << 3
};
+enum {
+ MTHCA_SEND_DOORBELL_FENCE = 1 << 5
+};
+
struct mthca_qp_path {
- u32 port_pkey;
- u8 rnr_retry;
- u8 g_mylmc;
- u16 rlid;
- u8 ackto;
- u8 mgid_index;
- u8 static_rate;
- u8 hop_limit;
- u32 sl_tclass_flowlabel;
- u8 rgid[16];
+ __be32 port_pkey;
+ u8 rnr_retry;
+ u8 g_mylmc;
+ __be16 rlid;
+ u8 ackto;
+ u8 mgid_index;
+ u8 static_rate;
+ u8 hop_limit;
+ __be32 sl_tclass_flowlabel;
+ u8 rgid[16];
} __attribute__((packed));
struct mthca_qp_context {
- u32 flags;
- u32 tavor_sched_queue; /* Reserved on Arbel */
- u8 mtu_msgmax;
- u8 rq_size_stride; /* Reserved on Tavor */
- u8 sq_size_stride; /* Reserved on Tavor */
- u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */
- u32 usr_page;
- u32 local_qpn;
- u32 remote_qpn;
- u32 reserved1[2];
+ __be32 flags;
+ __be32 tavor_sched_queue; /* Reserved on Arbel */
+ u8 mtu_msgmax;
+ u8 rq_size_stride; /* Reserved on Tavor */
+ u8 sq_size_stride; /* Reserved on Tavor */
+ u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */
+ __be32 usr_page;
+ __be32 local_qpn;
+ __be32 remote_qpn;
+ u32 reserved1[2];
struct mthca_qp_path pri_path;
struct mthca_qp_path alt_path;
- u32 rdd;
- u32 pd;
- u32 wqe_base;
- u32 wqe_lkey;
- u32 params1;
- u32 reserved2;
- u32 next_send_psn;
- u32 cqn_snd;
- u32 snd_wqe_base_l; /* Next send WQE on Tavor */
- u32 snd_db_index; /* (debugging only entries) */
- u32 last_acked_psn;
- u32 ssn;
- u32 params2;
- u32 rnr_nextrecvpsn;
- u32 ra_buff_indx;
- u32 cqn_rcv;
- u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */
- u32 rcv_db_index; /* (debugging only entries) */
- u32 qkey;
- u32 srqn;
- u32 rmsn;
- u16 rq_wqe_counter; /* reserved on Tavor */
- u16 sq_wqe_counter; /* reserved on Tavor */
- u32 reserved3[18];
+ __be32 rdd;
+ __be32 pd;
+ __be32 wqe_base;
+ __be32 wqe_lkey;
+ __be32 params1;
+ __be32 reserved2;
+ __be32 next_send_psn;
+ __be32 cqn_snd;
+ __be32 snd_wqe_base_l; /* Next send WQE on Tavor */
+ __be32 snd_db_index; /* (debugging only entries) */
+ __be32 last_acked_psn;
+ __be32 ssn;
+ __be32 params2;
+ __be32 rnr_nextrecvpsn;
+ __be32 ra_buff_indx;
+ __be32 cqn_rcv;
+ __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */
+ __be32 rcv_db_index; /* (debugging only entries) */
+ __be32 qkey;
+ __be32 srqn;
+ __be32 rmsn;
+ __be16 rq_wqe_counter; /* reserved on Tavor */
+ __be16 sq_wqe_counter; /* reserved on Tavor */
+ u32 reserved3[18];
} __attribute__((packed));
struct mthca_qp_param {
- u32 opt_param_mask;
- u32 reserved1;
+ __be32 opt_param_mask;
+ u32 reserved1;
struct mthca_qp_context context;
- u32 reserved2[62];
+ u32 reserved2[62];
} __attribute__((packed));
enum {
@@ -170,80 +182,6 @@ enum {
MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16
};
-enum {
- MTHCA_NEXT_DBD = 1 << 7,
- MTHCA_NEXT_FENCE = 1 << 6,
- MTHCA_NEXT_CQ_UPDATE = 1 << 3,
- MTHCA_NEXT_EVENT_GEN = 1 << 2,
- MTHCA_NEXT_SOLICIT = 1 << 1,
-
- MTHCA_MLX_VL15 = 1 << 17,
- MTHCA_MLX_SLR = 1 << 16
-};
-
-enum {
- MTHCA_INVAL_LKEY = 0x100
-};
-
-struct mthca_next_seg {
- u32 nda_op; /* [31:6] next WQE [4:0] next opcode */
- u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
- u32 flags; /* [3] CQ [2] Event [1] Solicit */
- u32 imm; /* immediate data */
-};
-
-struct mthca_tavor_ud_seg {
- u32 reserved1;
- u32 lkey;
- u64 av_addr;
- u32 reserved2[4];
- u32 dqpn;
- u32 qkey;
- u32 reserved3[2];
-};
-
-struct mthca_arbel_ud_seg {
- u32 av[8];
- u32 dqpn;
- u32 qkey;
- u32 reserved[2];
-};
-
-struct mthca_bind_seg {
- u32 flags; /* [31] Atomic [30] rem write [29] rem read */
- u32 reserved;
- u32 new_rkey;
- u32 lkey;
- u64 addr;
- u64 length;
-};
-
-struct mthca_raddr_seg {
- u64 raddr;
- u32 rkey;
- u32 reserved;
-};
-
-struct mthca_atomic_seg {
- u64 swap_add;
- u64 compare;
-};
-
-struct mthca_data_seg {
- u32 byte_count;
- u32 lkey;
- u64 addr;
-};
-
-struct mthca_mlx_seg {
- u32 nda_op;
- u32 nds;
- u32 flags; /* [17] VL15 [16] SLR [14:12] static rate
- [11:8] SL [3] C [2] E */
- u16 rlid;
- u16 vcrc;
-};
-
static const u8 mthca_opcode[] = {
[IB_WR_SEND] = MTHCA_OPCODE_SEND,
[IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,
@@ -288,6 +226,14 @@ static void *get_send_wqe(struct mthca_qp *qp, int n)
(PAGE_SIZE - 1));
}
+static void mthca_wq_reset(struct mthca_wq *wq)
+{
+ wq->next_ind = 0;
+ wq->last_comp = wq->max - 1;
+ wq->head = 0;
+ wq->tail = 0;
+}
+
void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
enum ib_event_type event_type)
{
@@ -297,22 +243,28 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
spin_lock(&dev->qp_table.lock);
qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
if (qp)
- atomic_inc(&qp->refcount);
+ ++qp->refcount;
spin_unlock(&dev->qp_table.lock);
if (!qp) {
- mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ mthca_warn(dev, "Async event %d for bogus QP %08x\n",
+ event_type, qpn);
return;
}
+ if (event_type == IB_EVENT_PATH_MIG)
+ qp->port = qp->alt_port;
+
event.device = &dev->ib_dev;
event.event = event_type;
event.element.qp = &qp->ibqp;
if (qp->ibqp.event_handler)
qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
- if (atomic_dec_and_test(&qp->refcount))
+ spin_lock(&dev->qp_table.lock);
+ if (!--qp->refcount)
wake_up(&qp->wait);
+ spin_unlock(&dev->qp_table.lock);
}
static int to_mthca_state(enum ib_qp_state ib_state)
@@ -343,178 +295,7 @@ static int to_mthca_st(int transport)
}
}
-static const struct {
- int trans;
- u32 req_param[NUM_TRANS];
- u32 opt_param[NUM_TRANS];
-} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_INIT] = {
- .trans = MTHCA_TRANS_RST2INIT,
- .req_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_QKEY),
- [RC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- },
- /* bug-for-bug compatibility with VAPI: */
- .opt_param = {
- [MLX] = IB_QP_PORT
- }
- },
- },
- [IB_QPS_INIT] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_INIT] = {
- .trans = MTHCA_TRANS_INIT2INIT,
- .opt_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_QKEY),
- [RC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- }
- },
- [IB_QPS_RTR] = {
- .trans = MTHCA_TRANS_INIT2RTR,
- .req_param = {
- [RC] = (IB_QP_AV |
- IB_QP_PATH_MTU |
- IB_QP_DEST_QPN |
- IB_QP_RQ_PSN |
- IB_QP_MAX_DEST_RD_ATOMIC |
- IB_QP_MIN_RNR_TIMER),
- },
- .opt_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- [RC] = (IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_RTR] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_RTR2RTS,
- .req_param = {
- [UD] = IB_QP_SQ_PSN,
- [RC] = (IB_QP_TIMEOUT |
- IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY |
- IB_QP_SQ_PSN |
- IB_QP_MAX_QP_RD_ATOMIC),
- [MLX] = IB_QP_SQ_PSN,
- },
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [RC] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_RTS] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_RTS2RTS,
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [RC] = (IB_QP_ACCESS_FLAGS |
- IB_QP_ALT_PATH |
- IB_QP_PATH_MIG_STATE |
- IB_QP_MIN_RNR_TIMER),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- },
- [IB_QPS_SQD] = {
- .trans = MTHCA_TRANS_RTS2SQD,
- },
- },
- [IB_QPS_SQD] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_SQD2RTS,
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [RC] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- },
- [IB_QPS_SQD] = {
- .trans = MTHCA_TRANS_SQD2SQD,
- .opt_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- [RC] = (IB_QP_AV |
- IB_QP_TIMEOUT |
- IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY |
- IB_QP_MAX_QP_RD_ATOMIC |
- IB_QP_MAX_DEST_RD_ATOMIC |
- IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_SQE] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_SQERR2RTS,
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [RC] = (IB_QP_CUR_STATE |
- IB_QP_MIN_RNR_TIMER),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_ERR] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
- }
-};
-
-static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
+static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
int attr_mask)
{
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -528,91 +309,255 @@ static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
static void init_port(struct mthca_dev *dev, int port)
{
int err;
- u8 status;
struct mthca_init_ib_param param;
memset(&param, 0, sizeof param);
- param.enable_1x = 1;
- param.enable_4x = 1;
- param.vl_cap = dev->limits.vl_cap;
- param.mtu_cap = dev->limits.mtu_cap;
- param.gid_cap = dev->limits.gid_table_len;
- param.pkey_cap = dev->limits.pkey_table_len;
+ param.port_width = dev->limits.port_width_cap;
+ param.vl_cap = dev->limits.vl_cap;
+ param.mtu_cap = dev->limits.mtu_cap;
+ param.gid_cap = dev->limits.gid_table_len;
+ param.pkey_cap = dev->limits.pkey_table_len;
- err = mthca_INIT_IB(dev, &param, port, &status);
+ err = mthca_INIT_IB(dev, &param, port);
if (err)
mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
- if (status)
- mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
}
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+ u32 hw_access_flags = 0;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ dest_rd_atomic = attr->max_dest_rd_atomic;
+ else
+ dest_rd_atomic = qp->resp_depth;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ access_flags = attr->qp_access_flags;
+ else
+ access_flags = qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ hw_access_flags |= MTHCA_QP_BIT_RRE;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ hw_access_flags |= MTHCA_QP_BIT_RAE;
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ hw_access_flags |= MTHCA_QP_BIT_RWE;
+
+ return cpu_to_be32(hw_access_flags);
+}
+
+static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
+{
+ switch (mthca_state) {
+ case MTHCA_QP_STATE_RST: return IB_QPS_RESET;
+ case MTHCA_QP_STATE_INIT: return IB_QPS_INIT;
+ case MTHCA_QP_STATE_RTR: return IB_QPS_RTR;
+ case MTHCA_QP_STATE_RTS: return IB_QPS_RTS;
+ case MTHCA_QP_STATE_DRAINING:
+ case MTHCA_QP_STATE_SQD: return IB_QPS_SQD;
+ case MTHCA_QP_STATE_SQE: return IB_QPS_SQE;
+ case MTHCA_QP_STATE_ERR: return IB_QPS_ERR;
+ default: return -1;
+ }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
+{
+ switch (mthca_mig_state) {
+ case 0: return IB_MIG_ARMED;
+ case 1: return IB_MIG_REARM;
+ case 3: return IB_MIG_MIGRATED;
+ default: return -1;
+ }
+}
+
+static int to_ib_qp_access_flags(int mthca_flags)
+{
+ int ib_flags = 0;
+
+ if (mthca_flags & MTHCA_QP_BIT_RRE)
+ ib_flags |= IB_ACCESS_REMOTE_READ;
+ if (mthca_flags & MTHCA_QP_BIT_RWE)
+ ib_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (mthca_flags & MTHCA_QP_BIT_RAE)
+ ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
+ struct mthca_qp_path *path)
+{
+ memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
+ ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+
+ if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
+ return;
+
+ ib_ah_attr->dlid = be16_to_cpu(path->rlid);
+ ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
+ ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
+ ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
+ path->static_rate & 0xf,
+ ib_ah_attr->port_num);
+ ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+ if (ib_ah_attr->ah_flags) {
+ ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
+ ib_ah_attr->grh.hop_limit = path->hop_limit;
+ ib_ah_attr->grh.traffic_class =
+ (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
+ ib_ah_attr->grh.flow_label =
+ be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
+ memcpy(ib_ah_attr->grh.dgid.raw,
+ path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ }
+}
+
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
- enum ib_qp_state cur_state, new_state;
- void *mailbox = NULL;
+ int err = 0;
+ struct mthca_mailbox *mailbox = NULL;
struct mthca_qp_param *qp_param;
- struct mthca_qp_context *qp_context;
- u32 req_param, opt_param;
- u8 status;
- int err;
+ struct mthca_qp_context *context;
+ int mthca_state;
- if (attr_mask & IB_QP_CUR_STATE) {
- if (attr->cur_qp_state != IB_QPS_RTR &&
- attr->cur_qp_state != IB_QPS_RTS &&
- attr->cur_qp_state != IB_QPS_SQD &&
- attr->cur_qp_state != IB_QPS_SQE)
- return -EINVAL;
- else
- cur_state = attr->cur_qp_state;
- } else {
- spin_lock_irq(&qp->sq.lock);
- spin_lock(&qp->rq.lock);
- cur_state = qp->state;
- spin_unlock(&qp->rq.lock);
- spin_unlock_irq(&qp->sq.lock);
- }
+ mutex_lock(&qp->mutex);
- if (attr_mask & IB_QP_STATE) {
- if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
- return -EINVAL;
- new_state = attr->qp_state;
- } else
- new_state = cur_state;
-
- if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
- mthca_dbg(dev, "Illegal QP transition "
- "%d->%d\n", cur_state, new_state);
- return -EINVAL;
+ if (qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
}
- req_param = state_table[cur_state][new_state].req_param[qp->transport];
- opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto out;
+ }
- if ((req_param & attr_mask) != req_param) {
- mthca_dbg(dev, "QP transition "
- "%d->%d missing req attr 0x%08x\n",
- cur_state, new_state,
- req_param & ~attr_mask);
- return -EINVAL;
+ err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox);
+ if (err) {
+ mthca_warn(dev, "QUERY_QP failed (%d)\n", err);
+ goto out_mailbox;
}
- if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
- mthca_dbg(dev, "QP transition (transport %d) "
- "%d->%d has extra attr 0x%08x\n",
- qp->transport,
- cur_state, new_state,
- attr_mask & ~(req_param | opt_param |
- IB_QP_STATE));
- return -EINVAL;
+ qp_param = mailbox->buf;
+ context = &qp_param->context;
+ mthca_state = be32_to_cpu(context->flags) >> 28;
+
+ qp->state = to_ib_qp_state(mthca_state);
+ qp_attr->qp_state = qp->state;
+ qp_attr->path_mtu = context->mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context->qkey);
+ qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context->params2));
+
+ if (qp->transport == RC || qp->transport == UC) {
+ to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ qp_attr->alt_pkey_index =
+ be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
}
- mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
- return -ENOMEM;
- qp_param = MAILBOX_ALIGN(mailbox);
+ qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+ qp_attr->port_num =
+ (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->timeout = context->pri_path.ackto >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5;
+ qp_attr->alt_timeout = context->alt_path.ackto >> 3;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_send_wr = qp->sq.max;
+ qp_attr->cap.max_recv_wr = qp->rq.max;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+ qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = qp->sq_policy;
+
+out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
+ struct mthca_qp_path *path, u8 port)
+{
+ path->g_mylmc = ah->src_path_bits & 0x7f;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
+
+ if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
+ mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, dev->limits.gid_table_len-1);
+ return -1;
+ }
+
+ path->g_mylmc |= 1 << 7;
+ path->mgid_index = ah->grh.sgid_index;
+ path->hop_limit = ah->grh.hop_limit;
+ path->sl_tclass_flowlabel =
+ cpu_to_be32((ah->sl << 28) |
+ (ah->grh.traffic_class << 20) |
+ (ah->grh.flow_label));
+ memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ } else
+ path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+
+ return 0;
+}
+
+static int __mthca_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ struct mthca_mailbox *mailbox;
+ struct mthca_qp_param *qp_param;
+ struct mthca_qp_context *qp_context;
+ u32 sqd_event = 0;
+ int err = -EINVAL;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto out;
+ }
+ qp_param = mailbox->buf;
qp_context = &qp_param->context;
memset(qp_param, 0, sizeof *qp_param);
@@ -640,19 +585,32 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (qp->transport == MLX || qp->transport == UD)
qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
- else if (attr_mask & IB_QP_PATH_MTU)
+ else if (attr_mask & IB_QP_PATH_MTU) {
+ if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
+ mthca_dbg(dev, "path MTU (%u) is invalid\n",
+ attr->path_mtu);
+ goto out_mailbox;
+ }
qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+ }
if (mthca_is_memfree(dev)) {
- qp_context->rq_size_stride =
- ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
- qp_context->sq_size_stride =
- ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4);
+ if (qp->rq.max)
+ qp_context->rq_size_stride = ilog2(qp->rq.max) << 3;
+ qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
+
+ if (qp->sq.max)
+ qp_context->sq_size_stride = ilog2(qp->sq.max) << 3;
+ qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
}
/* leave arbel_sched_queue as 0 */
- qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
+ if (qp->ibqp.uobject)
+ qp_context->usr_page =
+ cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ else
+ qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
qp_context->local_qpn = cpu_to_be32(qp->qpn);
if (attr_mask & IB_QP_DEST_QPN) {
qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -660,7 +618,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (qp->transport == MLX)
qp_context->pri_path.port_pkey |=
- cpu_to_be32(to_msqp(qp)->port << 24);
+ cpu_to_be32(qp->port << 24);
else {
if (attr_mask & IB_QP_PORT) {
qp_context->pri_path.port_pkey |=
@@ -676,37 +634,60 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
}
if (attr_mask & IB_QP_RNR_RETRY) {
- qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
+ qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
+ attr->rnr_retry << 5;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
+ MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
}
if (attr_mask & IB_QP_AV) {
- qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f;
- qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid);
- qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
- if (attr->ah_attr.ah_flags & IB_AH_GRH) {
- qp_context->pri_path.g_mylmc |= 1 << 7;
- qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
- qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
- qp_context->pri_path.sl_tclass_flowlabel =
- cpu_to_be32((attr->ah_attr.sl << 28) |
- (attr->ah_attr.grh.traffic_class << 20) |
- (attr->ah_attr.grh.flow_label));
- memcpy(qp_context->pri_path.rgid,
- attr->ah_attr.grh.dgid.raw, 16);
- } else {
- qp_context->pri_path.sl_tclass_flowlabel =
- cpu_to_be32(attr->ah_attr.sl << 28);
- }
+ if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
+ goto out_mailbox;
+
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
}
+ if (ibqp->qp_type == IB_QPT_RC &&
+ cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;
+
+ if (mthca_is_memfree(dev))
+ qp_context->rlkey_arbel_sched_queue |= sched_queue;
+ else
+ qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
+
+ qp_param->opt_param_mask |=
+ cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
+ }
+
if (attr_mask & IB_QP_TIMEOUT) {
- qp_context->pri_path.ackto = attr->timeout;
+ qp_context->pri_path.ackto = attr->timeout << 3;
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
}
- /* XXX alt_path */
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
+ attr->alt_pkey_index, dev->limits.pkey_table_len-1);
+ goto out_mailbox;
+ }
+
+ if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
+ mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
+ attr->alt_port_num);
+ goto out_mailbox;
+ }
+
+ if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
+ attr->alt_ah_attr.port_num))
+ goto out_mailbox;
+
+ qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
+ attr->alt_port_num << 24);
+ qp_context->alt_path.ackto = attr->alt_timeout << 3;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
+ }
/* leave rdd as 0 */
qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
@@ -714,9 +695,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey);
qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
(MTHCA_FLIGHT_LIMIT << 24) |
- MTHCA_QP_BIT_SRE |
- MTHCA_QP_BIT_SWE |
- MTHCA_QP_BIT_SAE);
+ MTHCA_QP_BIT_SWE);
if (qp->sq_policy == IB_SIGNAL_ALL_WR)
qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
if (attr_mask & IB_QP_RETRY_CNT) {
@@ -724,10 +703,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
}
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
- qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
- ffs(attr->max_dest_rd_atomic) - 1 : 0,
- 7) << 21);
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic) {
+ qp_context->params1 |=
+ cpu_to_be32(MTHCA_QP_BIT_SRE |
+ MTHCA_QP_BIT_SAE);
+ qp_context->params1 |=
+ cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+ }
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
}
@@ -740,78 +723,26 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);
}
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- /*
- * Only enable RDMA/atomics if we have responder
- * resources set to a non-zero value.
- */
- if (qp->resp_depth) {
- qp_context->params2 |=
- cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
- MTHCA_QP_BIT_RWE : 0);
- qp_context->params2 |=
- cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
- MTHCA_QP_BIT_RRE : 0);
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic)
qp_context->params2 |=
- cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
- MTHCA_QP_BIT_RAE : 0);
- }
+ cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+ qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask);
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
MTHCA_QP_OPTPAR_RRE |
MTHCA_QP_OPTPAR_RAE);
-
- qp->atomic_rd_en = attr->qp_access_flags;
- }
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
- u8 rra_max;
-
- if (qp->resp_depth && !attr->max_rd_atomic) {
- /*
- * Lowering our responder resources to zero.
- * Turn off RDMA/atomics as responder.
- * (RWE/RRE/RAE in params2 already zero)
- */
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
- MTHCA_QP_OPTPAR_RRE |
- MTHCA_QP_OPTPAR_RAE);
- }
-
- if (!qp->resp_depth && attr->max_rd_atomic) {
- /*
- * Increasing our responder resources from
- * zero. Turn on RDMA/atomics as appropriate.
- */
- qp_context->params2 |=
- cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ?
- MTHCA_QP_BIT_RWE : 0);
- qp_context->params2 |=
- cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
- MTHCA_QP_BIT_RRE : 0);
- qp_context->params2 |=
- cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
- MTHCA_QP_BIT_RAE : 0);
-
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
- MTHCA_QP_OPTPAR_RRE |
- MTHCA_QP_OPTPAR_RAE);
- }
-
- for (rra_max = 0;
- 1 << rra_max < attr->max_rd_atomic &&
- rra_max < dev->qp_table.rdb_shift;
- ++rra_max)
- ; /* nothing */
-
- qp_context->params2 |= cpu_to_be32(rra_max << 21);
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
-
- qp->resp_depth = attr->max_rd_atomic;
}
qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
+ if (ibqp->srq)
+ qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC);
+
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
@@ -834,41 +765,201 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
}
- err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
- qp->qpn, 0, qp_param, 0, &status);
- if (status) {
- mthca_warn(dev, "modify QP %d returned status %02x.\n",
- state_table[cur_state][new_state].trans, status);
- err = -EINVAL;
- }
+ if (ibqp->srq)
+ qp_context->srqn = cpu_to_be32(1 << 24 |
+ to_msrq(ibqp->srq)->srqn);
- if (!err)
- qp->state = new_state;
+ if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
+ attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY &&
+ attr->en_sqd_async_notify)
+ sqd_event = 1 << 31;
- kfree(mailbox);
+ err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
+ mailbox, sqd_event);
+ if (err) {
+ mthca_warn(dev, "modify QP %d->%d returned %d.\n",
+ cur_state, new_state, err);
+ goto out_mailbox;
+ }
+
+ qp->state = new_state;
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->atomic_rd_en = attr->qp_access_flags;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
store_attrs(to_msqp(qp), attr, attr_mask);
/*
- * If we are moving QP0 to RTR, bring the IB link up; if we
- * are moving QP0 to RESET or ERROR, bring the link back down.
+ * If we moved QP0 to RTR, bring the IB link up; if we moved
+ * QP0 to RESET or ERROR, bring the link back down.
*/
if (is_qp0(dev, qp)) {
if (cur_state != IB_QPS_RTR &&
new_state == IB_QPS_RTR)
- init_port(dev, to_msqp(qp)->port);
+ init_port(dev, qp->port);
if (cur_state != IB_QPS_RESET &&
cur_state != IB_QPS_ERR &&
(new_state == IB_QPS_RESET ||
new_state == IB_QPS_ERR))
- mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
+ mthca_CLOSE_IB(dev, qp->port);
+ }
+
+ /*
+ * If we moved a kernel QP to RESET, clean up all old CQ
+ * entries and reinitialize the QP.
+ */
+ if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+ if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
+
+ mthca_wq_reset(&qp->sq);
+ qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
+
+ mthca_wq_reset(&qp->rq);
+ qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
+
+ if (mthca_is_memfree(dev)) {
+ *qp->sq.db = 0;
+ *qp->rq.db = 0;
+ }
+ }
+
+out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+out:
+ return err;
+}
+
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+
+ mutex_lock(&qp->mutex);
+ if (attr_mask & IB_QP_CUR_STATE) {
+ cur_state = attr->cur_qp_state;
+ } else {
+ spin_lock_irq(&qp->sq.lock);
+ spin_lock(&qp->rq.lock);
+ cur_state = qp->state;
+ spin_unlock(&qp->rq.lock);
+ spin_unlock_irq(&qp->sq.lock);
+ }
+
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED)) {
+ mthca_dbg(dev, "Bad QP transition (transport %d) "
+ "%d->%d with attr 0x%08x\n",
+ qp->transport, cur_state, new_state,
+ attr_mask);
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
+ attr->pkey_index, dev->limits.pkey_table_len-1);
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
+ mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
+ mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
+ attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
+ mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
+ attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
}
+ err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+ mutex_unlock(&qp->mutex);
return err;
}
+static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
+{
+ /*
+ * Calculate the maximum size of WQE s/g segments, excluding
+ * the next segment and other non-data segments.
+ */
+ int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
+
+ switch (qp->transport) {
+ case MLX:
+ max_data_size -= 2 * sizeof (struct mthca_data_seg);
+ break;
+
+ case UD:
+ if (mthca_is_memfree(dev))
+ max_data_size -= sizeof (struct mthca_arbel_ud_seg);
+ else
+ max_data_size -= sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ default:
+ max_data_size -= sizeof (struct mthca_raddr_seg);
+ break;
+ }
+
+ return max_data_size;
+}
+
+static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
+{
+ /* We don't support inline data for kernel QPs (yet). */
+ return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
+}
+
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_qp *qp)
+{
+ int max_data_size = mthca_max_data_size(dev, qp,
+ min(dev->limits.max_desc_sz,
+ 1 << qp->sq.wqe_shift));
+
+ qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
+
+ qp->sq.max_gs = min_t(int, dev->limits.max_sg,
+ max_data_size / sizeof (struct mthca_data_seg));
+ qp->rq.max_gs = min_t(int, dev->limits.max_sg,
+ (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
+ sizeof (struct mthca_next_seg)) /
+ sizeof (struct mthca_data_seg));
+}
+
/*
* Allocate and register buffer for WQEs. qp->rq.max, sq.max,
* rq.max_gs and sq.max_gs must all be assigned.
@@ -881,42 +972,73 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
struct mthca_qp *qp)
{
int size;
- int i;
- int npages, shift;
- dma_addr_t t;
- u64 *dma_list = NULL;
int err = -ENOMEM;
size = sizeof (struct mthca_next_seg) +
qp->rq.max_gs * sizeof (struct mthca_data_seg);
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
qp->rq.wqe_shift++)
; /* nothing */
- size = sizeof (struct mthca_next_seg) +
- qp->sq.max_gs * sizeof (struct mthca_data_seg);
+ size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
switch (qp->transport) {
case MLX:
size += 2 * sizeof (struct mthca_data_seg);
break;
+
case UD:
- if (mthca_is_memfree(dev))
- size += sizeof (struct mthca_arbel_ud_seg);
- else
- size += sizeof (struct mthca_tavor_ud_seg);
+ size += mthca_is_memfree(dev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg);
break;
+
+ case UC:
+ size += sizeof (struct mthca_raddr_seg);
+ break;
+
+ case RC:
+ size += sizeof (struct mthca_raddr_seg);
+ /*
+ * An atomic op will require an atomic segment, a
+ * remote address segment and one scatter entry.
+ */
+ size = max_t(int, size,
+ sizeof (struct mthca_atomic_seg) +
+ sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_data_seg));
+ break;
+
default:
- /* bind seg is as big as atomic + raddr segs */
- size += sizeof (struct mthca_bind_seg);
+ break;
}
+ /* Make sure that we have enough space for a bind request */
+ size = max_t(int, size, sizeof (struct mthca_bind_seg));
+
+ size += sizeof (struct mthca_next_seg);
+
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
qp->sq.wqe_shift++)
; /* nothing */
qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
1 << qp->sq.wqe_shift);
+
+ /*
+ * If this is a userspace QP, we don't actually have to
+ * allocate anything. All we need is to calculate the WQE
+ * sizes and the send_wqe_offset, so we're done now.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
size = PAGE_ALIGN(qp->send_wqe_offset +
(qp->sq.max << qp->sq.wqe_shift));
@@ -925,100 +1047,31 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
if (!qp->wrid)
goto err_out;
- if (size <= MTHCA_MAX_DIRECT_QP_SIZE) {
- qp->is_direct = 1;
- npages = 1;
- shift = get_order(size) + PAGE_SHIFT;
-
- if (0)
- mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
- size, shift);
-
- qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
- if (!qp->queue.direct.buf)
- goto err_out;
-
- pci_unmap_addr_set(&qp->queue.direct, mapping, t);
-
- memset(qp->queue.direct.buf, 0, size);
-
- while (t & ((1 << shift) - 1)) {
- --shift;
- npages *= 2;
- }
-
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
- if (!dma_list)
- goto err_out_free;
-
- for (i = 0; i < npages; ++i)
- dma_list[i] = t + i * (1 << shift);
- } else {
- qp->is_direct = 0;
- npages = size / PAGE_SIZE;
- shift = PAGE_SHIFT;
-
- if (0)
- mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages);
-
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
- if (!dma_list)
- goto err_out;
-
- qp->queue.page_list = kmalloc(npages *
- sizeof *qp->queue.page_list,
- GFP_KERNEL);
- if (!qp->queue.page_list)
- goto err_out;
-
- for (i = 0; i < npages; ++i) {
- qp->queue.page_list[i].buf =
- pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
- if (!qp->queue.page_list[i].buf)
- goto err_out_free;
-
- memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE);
-
- pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t);
- dma_list[i] = t;
- }
- }
-
- err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift,
- npages, 0, size,
- MTHCA_MPT_FLAG_LOCAL_READ,
- &qp->mr);
+ err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
+ &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
if (err)
- goto err_out_free;
+ goto err_out;
- kfree(dma_list);
return 0;
- err_out_free:
- if (qp->is_direct) {
- pci_free_consistent(dev->pdev, size,
- qp->queue.direct.buf,
- pci_unmap_addr(&qp->queue.direct, mapping));
- } else
- for (i = 0; i < npages; ++i) {
- if (qp->queue.page_list[i].buf)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- qp->queue.page_list[i].buf,
- pci_unmap_addr(&qp->queue.page_list[i],
- mapping));
-
- }
-
- err_out:
+err_out:
kfree(qp->wrid);
- kfree(dma_list);
return err;
}
-static int mthca_alloc_memfree(struct mthca_dev *dev,
+static void mthca_free_wqe_buf(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- int ret = 0;
+ mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift)),
+ &qp->queue, qp->is_direct, &qp->mr);
+ kfree(qp->wrid);
+}
+
+static int mthca_map_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret;
if (mthca_is_memfree(dev)) {
ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1034,30 +1087,10 @@ static int mthca_alloc_memfree(struct mthca_dev *dev,
if (ret)
goto err_eqpc;
- qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
- qp->qpn, &qp->rq.db);
- if (qp->rq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rdb;
- }
-
- qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
- qp->qpn, &qp->sq.db);
- if (qp->sq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rq_db;
- }
}
return 0;
-err_rq_db:
- mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-
-err_rdb:
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
-
err_eqpc:
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
@@ -1067,27 +1100,42 @@ err_qpc:
return ret;
}
-static void mthca_free_memfree(struct mthca_dev *dev,
+static void mthca_unmap_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
struct mthca_qp *qp)
{
if (mthca_is_memfree(dev)) {
- mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
- mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
- mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+ qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+ qp->qpn, &qp->rq.db);
+ if (qp->rq.db_index < 0)
+ return -ENOMEM;
+
+ qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+ qp->qpn, &qp->sq.db);
+ if (qp->sq.db_index < 0) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ return -ENOMEM;
+ }
}
+
+ return 0;
}
-static void mthca_wq_init(struct mthca_wq* wq)
+static void mthca_free_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
{
- spin_lock_init(&wq->lock);
- wq->next_ind = 0;
- wq->last_comp = wq->max - 1;
- wq->head = 0;
- wq->tail = 0;
- wq->last = NULL;
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ }
}
static int mthca_alloc_qp_common(struct mthca_dev *dev,
@@ -1099,27 +1147,49 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
{
int ret;
int i;
+ struct mthca_next_seg *next;
- atomic_set(&qp->refcount, 1);
+ qp->refcount = 1;
+ init_waitqueue_head(&qp->wait);
+ mutex_init(&qp->mutex);
qp->state = IB_QPS_RESET;
qp->atomic_rd_en = 0;
qp->resp_depth = 0;
qp->sq_policy = send_policy;
- mthca_wq_init(&qp->sq);
- mthca_wq_init(&qp->rq);
+ mthca_wq_reset(&qp->sq);
+ mthca_wq_reset(&qp->rq);
- ret = mthca_alloc_memfree(dev, qp);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ ret = mthca_map_memfree(dev, qp);
if (ret)
return ret;
ret = mthca_alloc_wqe_buf(dev, pd, qp);
if (ret) {
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
+ return ret;
+ }
+
+ mthca_adjust_qp_caps(dev, pd, qp);
+
+ /*
+ * If this is a userspace QP, we're done now. The doorbells
+ * will be allocated and buffers will be initialized in
+ * userspace.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
+ ret = mthca_alloc_memfree(dev, qp);
+ if (ret) {
+ mthca_free_wqe_buf(dev, qp);
+ mthca_unmap_memfree(dev, qp);
return ret;
}
if (mthca_is_memfree(dev)) {
- struct mthca_next_seg *next;
struct mthca_data_seg *scatter;
int size = (sizeof (struct mthca_next_seg) +
qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
@@ -1142,27 +1212,58 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
qp->sq.wqe_shift) +
qp->send_wqe_offset);
}
+ } else {
+ for (i = 0; i < qp->rq.max; ++i) {
+ next = get_recv_wqe(qp, i);
+ next->nda_op = htonl((((i + 1) % qp->rq.max) <<
+ qp->rq.wqe_shift) | 1);
+ }
+
}
+ qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
+ qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
+
return 0;
}
-static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
+static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
+ struct mthca_pd *pd, struct mthca_qp *qp)
{
- int i;
-
- if (!mthca_is_memfree(dev))
- return;
+ int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
+
+ /* Sanity check QP size before proceeding */
+ if (cap->max_send_wr > dev->limits.max_wqes ||
+ cap->max_recv_wr > dev->limits.max_wqes ||
+ cap->max_send_sge > dev->limits.max_sg ||
+ cap->max_recv_sge > dev->limits.max_sg ||
+ cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
+ return -EINVAL;
- for (i = 0; 1 << i < qp->rq.max; ++i)
- ; /* nothing */
+ /*
+ * For MLX transport we need 2 extra send gather entries:
+ * one for the header and one for the checksum at the end
+ */
+ if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
+ return -EINVAL;
- qp->rq.max = 1 << i;
+ if (mthca_is_memfree(dev)) {
+ qp->rq.max = cap->max_recv_wr ?
+ roundup_pow_of_two(cap->max_recv_wr) : 0;
+ qp->sq.max = cap->max_send_wr ?
+ roundup_pow_of_two(cap->max_send_wr) : 0;
+ } else {
+ qp->rq.max = cap->max_recv_wr;
+ qp->sq.max = cap->max_send_wr;
+ }
- for (i = 0; 1 << i < qp->sq.max; ++i)
- ; /* nothing */
+ qp->rq.max_gs = cap->max_recv_sge;
+ qp->sq.max_gs = max_t(int, cap->max_send_sge,
+ ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
+ MTHCA_INLINE_CHUNK_SIZE) /
+ sizeof (struct mthca_data_seg));
- qp->sq.max = 1 << i;
+ return 0;
}
int mthca_alloc_qp(struct mthca_dev *dev,
@@ -1171,12 +1272,11 @@ int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
struct mthca_qp *qp)
{
int err;
- mthca_align_qp_size(dev, qp);
-
switch (type) {
case IB_QPT_RC: qp->transport = RC; break;
case IB_QPT_UC: qp->transport = UC; break;
@@ -1184,10 +1284,17 @@ int mthca_alloc_qp(struct mthca_dev *dev,
default: return -EINVAL;
}
+ err = mthca_set_qp_size(dev, cap, pd, qp);
+ if (err)
+ return err;
+
qp->qpn = mthca_alloc(&dev->qp_table.alloc);
if (qp->qpn == -1)
return -ENOMEM;
+ /* initialize port to zero for error-catching. */
+ qp->port = 0;
+
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
send_policy, qp);
if (err) {
@@ -1203,19 +1310,53 @@ int mthca_alloc_qp(struct mthca_dev *dev,
return 0;
}
+static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
+{
+ if (send_cq == recv_cq) {
+ spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
+{
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp)
{
- int err = 0;
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
+ int err;
- mthca_align_qp_size(dev, &sqp->qp);
+ sqp->qp.transport = MLX;
+ err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
+ if (err)
+ return err;
sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1233,7 +1374,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
if (err)
goto err_out;
- sqp->port = port;
+ sqp->qp.port = port;
sqp->qp.qpn = mqpn;
sqp->qp.transport = MLX;
@@ -1251,17 +1392,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
* Lock CQs here, so that CQ polling code can do QP lookup
* without taking a lock.
*/
- spin_lock_irq(&send_cq->lock);
- if (send_cq != recv_cq)
- spin_lock(&recv_cq->lock);
+ mthca_lock_cqs(send_cq, recv_cq);
spin_lock(&dev->qp_table.lock);
mthca_array_clear(&dev->qp_table.qp, mqpn);
spin_unlock(&dev->qp_table.lock);
- if (send_cq != recv_cq)
- spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ mthca_unlock_cqs(send_cq, recv_cq);
err_out:
dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1270,12 +1407,20 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
return err;
}
+static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ int c;
+
+ spin_lock_irq(&dev->qp_table.lock);
+ c = qp->refcount;
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ return c;
+}
+
void mthca_free_qp(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- u8 status;
- int size;
- int i;
struct mthca_cq *send_cq;
struct mthca_cq *recv_cq;
@@ -1286,50 +1431,38 @@ void mthca_free_qp(struct mthca_dev *dev,
* Lock CQs here, so that CQ polling code can do QP lookup
* without taking a lock.
*/
- spin_lock_irq(&send_cq->lock);
- if (send_cq != recv_cq)
- spin_lock(&recv_cq->lock);
+ mthca_lock_cqs(send_cq, recv_cq);
spin_lock(&dev->qp_table.lock);
mthca_array_clear(&dev->qp_table.qp,
qp->qpn & (dev->limits.num_qps - 1));
+ --qp->refcount;
spin_unlock(&dev->qp_table.lock);
- if (send_cq != recv_cq)
- spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ mthca_unlock_cqs(send_cq, recv_cq);
- atomic_dec(&qp->refcount);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
+ wait_event(qp->wait, !get_qp_refcount(dev, qp));
if (qp->state != IB_QPS_RESET)
- mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
-
- mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
- if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
- mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
+ mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
+ NULL, 0);
- mthca_free_mr(dev, &qp->mr);
-
- size = PAGE_ALIGN(qp->send_wqe_offset +
- (qp->sq.max << qp->sq.wqe_shift));
+ /*
+ * If this is a userspace QP, the buffers, MR, CQs and so on
+ * will be cleaned up in userspace, so all we have to do is
+ * unref the mem-free tables and free the QPN in our table.
+ */
+ if (!qp->ibqp.uobject) {
+ mthca_cq_clean(dev, recv_cq, qp->qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+ if (send_cq != recv_cq)
+ mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
- if (qp->is_direct) {
- pci_free_consistent(dev->pdev, size,
- qp->queue.direct.buf,
- pci_unmap_addr(&qp->queue.direct, mapping));
- } else {
- for (i = 0; i < size / PAGE_SIZE; ++i) {
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- qp->queue.page_list[i].buf,
- pci_unmap_addr(&qp->queue.page_list[i],
- mapping));
- }
+ mthca_free_memfree(dev, qp);
+ mthca_free_wqe_buf(dev, qp);
}
- kfree(qp->wrid);
-
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
@@ -1349,9 +1482,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
{
int header_size;
int err;
+ u16 pkey;
- ib_ud_header_init(256, /* assume a MAD */
- sqp->ud_header.grh_present,
+ ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
+ mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
&sqp->ud_header);
err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
@@ -1359,8 +1493,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
return err;
mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid == 0xffff ?
- MTHCA_MLX_SLR : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
mlx->rlid = sqp->ud_header.lrh.destination_lid;
mlx->vcrc = 0;
@@ -1373,25 +1507,23 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->imm_data;
+ sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
default:
return -EINVAL;
}
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
- if (sqp->ud_header.lrh.destination_lid == 0xffff)
- sqp->ud_header.lrh.source_lid = 0xffff;
+ if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+ sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(&dev->ib_dev, sqp->port,
- sqp->pkey_index,
- &sqp->ud_header.bth.pkey);
+ ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
+ sqp->pkey_index, &pkey);
else
- ib_get_cached_pkey(&dev->ib_dev, sqp->port,
- wr->wr.ud.pkey_index,
- &sqp->ud_header.bth.pkey);
- cpu_to_be16s(&sqp->ud_header.bth.pkey);
+ ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
+ wr->wr.ud.pkey_index, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
@@ -1428,6 +1560,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
return cur + nreq >= wq->max;
}
+static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
+ struct ib_send_wr *wr)
+{
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = 0;
+ }
+
+}
+
+static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
+ struct ib_send_wr *wr)
+{
+ useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
+ useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
+ useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+
+}
+
+static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
+ struct ib_send_wr *wr)
+{
+ memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
+ useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1440,8 +1611,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
int size;
- int size0 = 0;
- u32 f0 = 0;
+ /*
+ * f0 and size0 are only used if nreq != 0, and they will
+ * always be initialized the first time through the main loop
+ * before nreq is incremented. So nreq cannot become non-zero
+ * without initializing f0 and size0, and they are in fact
+ * never used uninitialized.
+ */
+ int uninitialized_var(size0);
+ u32 uninitialized_var(f0);
int ind;
u8 op0 = 0;
@@ -1476,7 +1654,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+ ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
@@ -1486,39 +1664,39 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.atomic.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.atomic.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
wqe += sizeof (struct mthca_raddr_seg);
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.swap);
- ((struct mthca_atomic_seg *) wqe)->compare =
- cpu_to_be64(wr->wr.atomic.compare_add);
- } else {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.compare_add);
- ((struct mthca_atomic_seg *) wqe)->compare = 0;
- }
-
+ set_atomic_seg(wqe, wr);
wqe += sizeof (struct mthca_atomic_seg);
- size += sizeof (struct mthca_raddr_seg) / 16 +
- sizeof (struct mthca_atomic_seg);
+ size += (sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_atomic_seg)) / 16;
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
case IB_WR_RDMA_READ:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
- wqe += sizeof (struct mthca_raddr_seg);
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1530,16 +1708,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- ((struct mthca_tavor_ud_seg *) wqe)->lkey =
- cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
- ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
- cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
- ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
- cpu_to_be32(wr->wr.ud.remote_qpn);
- ((struct mthca_tavor_ud_seg *) wqe)->qkey =
- cpu_to_be32(wr->wr.ud.remote_qkey);
-
- wqe += sizeof (struct mthca_tavor_ud_seg);
+ set_tavor_ud_seg(wqe, wr);
+ wqe += sizeof (struct mthca_tavor_ud_seg);
size += sizeof (struct mthca_tavor_ud_seg) / 16;
break;
@@ -1564,13 +1734,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
- wqe += sizeof (struct mthca_data_seg);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
size += sizeof (struct mthca_data_seg) / 16;
}
@@ -1592,19 +1757,21 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- if (prev_wqe) {
- ((struct mthca_next_seg *) prev_wqe)->nda_op =
- cpu_to_be32(((ind << qp->sq.wqe_shift) +
- qp->send_wqe_offset) |
- mthca_opcode[wr->opcode]);
- wmb();
- ((struct mthca_next_seg *) prev_wqe)->ee_nds =
- cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
- }
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32(((ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) |
+ mthca_opcode[wr->opcode]);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
+ ((wr->send_flags & IB_SEND_FENCE) ?
+ MTHCA_NEXT_FENCE : 0));
- if (!size0) {
+ if (!nreq) {
size0 = size;
op0 = mthca_opcode[wr->opcode];
+ f0 = wr->send_flags & IB_SEND_FENCE ?
+ MTHCA_SEND_DOORBELL_FENCE : 0;
}
++ind;
@@ -1614,17 +1781,18 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out:
if (likely(nreq)) {
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
- qp->send_wqe_offset) | f0 | op0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
-
wmb();
- mthca_write64(doorbell,
+ mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) | f0 | op0,
+ (qp->qpn << 8) | size0,
dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ /*
+ * Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order:
+ */
+ mmiowb();
}
qp->sq.next_ind = ind;
@@ -1644,7 +1812,14 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int nreq;
int i;
int size;
- int size0 = 0;
+ /*
+ * size0 is only used if nreq != 0, and it will always be
+ * initialized the first time through the main loop before
+ * nreq is incremented. So nreq cannot become non-zero
+ * without initializing size0, and it is in fact never used
+ * uninitialized.
+ */
+ int uninitialized_var(size0);
int ind;
void *wqe;
void *prev_wqe;
@@ -1655,7 +1830,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
ind = qp->rq.next_ind;
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ for (nreq = 0; wr; wr = wr->next) {
if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
mthca_err(dev, "RQ %06x full (%u head, %u tail,"
" %d max, %d nreq)\n", qp->qpn,
@@ -1670,7 +1845,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
prev_wqe = qp->rq.last;
qp->rq.last = wqe;
- ((struct mthca_next_seg *) wqe)->nda_op = 0;
((struct mthca_next_seg *) wqe)->ee_nds =
cpu_to_be32(MTHCA_NEXT_DBD);
((struct mthca_next_seg *) wqe)->flags = 0;
@@ -1685,51 +1859,56 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
- wqe += sizeof (struct mthca_data_seg);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
size += sizeof (struct mthca_data_seg) / 16;
}
qp->wrid[ind] = wr->wr_id;
- if (likely(prev_wqe)) {
- ((struct mthca_next_seg *) prev_wqe)->nda_op =
- cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
- wmb();
- ((struct mthca_next_seg *) prev_wqe)->ee_nds =
- cpu_to_be32(MTHCA_NEXT_DBD | size);
- }
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD | size);
- if (!size0)
+ if (!nreq)
size0 = size;
++ind;
if (unlikely(ind >= qp->rq.max))
ind -= qp->rq.max;
+
+ ++nreq;
+ if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+ nreq = 0;
+
+ wmb();
+
+ mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+ qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+ qp->rq.next_ind = ind;
+ qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
+ }
}
out:
if (likely(nreq)) {
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
-
wmb();
- mthca_write64(doorbell,
- dev->kar + MTHCA_RECEIVE_DOORBELL,
+ mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+ qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
qp->rq.next_ind = ind;
qp->rq.head += nreq;
+ /*
+ * Make sure doorbells don't leak out of RQ spinlock and reach
+ * the HCA out of order:
+ */
+ mmiowb();
+
spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}
@@ -1739,6 +1918,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
+ u32 dbhi;
void *wqe;
void *prev_wqe;
unsigned long flags;
@@ -1746,8 +1926,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
int size;
- int size0 = 0;
- u32 f0 = 0;
+ /*
+ * f0 and size0 are only used if nreq != 0, and they will
+ * always be initialized the first time through the main loop
+ * before nreq is incremented. So nreq cannot become non-zero
+ * without initializing f0 and size0, and they are in fact
+ * never used uninitialized.
+ */
+ int uninitialized_var(size0);
+ u32 uninitialized_var(f0);
int ind;
u8 op0 = 0;
@@ -1758,6 +1945,32 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ind = qp->sq.head & (qp->sq.max - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
+ nreq = 0;
+
+ dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
+ ((qp->sq.head & 0xffff) << 8) | f0 | op0;
+
+ qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
+
+ /*
+ * Make sure doorbell record is written before we
+ * write MMIO send doorbell.
+ */
+ wmb();
+
+ mthca_write64(dbhi, (qp->qpn << 8) | size0,
+ dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
mthca_err(dev, "SQ %06x full (%u head, %u tail,"
" %d max, %d nreq)\n", qp->qpn,
@@ -1777,10 +1990,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
((wr->send_flags & IB_SEND_SOLICITED) ?
cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
+ ((wr->send_flags & IB_SEND_IP_CSUM) ?
+ cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+ ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
@@ -1790,39 +2005,39 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.atomic.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.atomic.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
wqe += sizeof (struct mthca_raddr_seg);
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.swap);
- ((struct mthca_atomic_seg *) wqe)->compare =
- cpu_to_be64(wr->wr.atomic.compare_add);
- } else {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.compare_add);
- ((struct mthca_atomic_seg *) wqe)->compare = 0;
- }
+ set_atomic_seg(wqe, wr);
+ wqe += sizeof (struct mthca_atomic_seg);
+ size += (sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_atomic_seg)) / 16;
+ break;
- wqe += sizeof (struct mthca_atomic_seg);
- size += sizeof (struct mthca_raddr_seg) / 16 +
- sizeof (struct mthca_atomic_seg);
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
break;
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UC:
+ switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- case IB_WR_RDMA_READ:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
- wqe += sizeof (struct mthca_raddr_seg);
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1834,14 +2049,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
- to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
- ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
- cpu_to_be32(wr->wr.ud.remote_qpn);
- ((struct mthca_arbel_ud_seg *) wqe)->qkey =
- cpu_to_be32(wr->wr.ud.remote_qkey);
-
- wqe += sizeof (struct mthca_arbel_ud_seg);
+ set_arbel_ud_seg(wqe, wr);
+ wqe += sizeof (struct mthca_arbel_ud_seg);
size += sizeof (struct mthca_arbel_ud_seg) / 16;
break;
@@ -1866,13 +2075,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
- wqe += sizeof (struct mthca_data_seg);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
size += sizeof (struct mthca_data_seg) / 16;
}
@@ -1894,19 +2098,21 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- if (likely(prev_wqe)) {
- ((struct mthca_next_seg *) prev_wqe)->nda_op =
- cpu_to_be32(((ind << qp->sq.wqe_shift) +
- qp->send_wqe_offset) |
- mthca_opcode[wr->opcode]);
- wmb();
- ((struct mthca_next_seg *) prev_wqe)->ee_nds =
- cpu_to_be32(MTHCA_NEXT_DBD | size);
- }
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32(((ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) |
+ mthca_opcode[wr->opcode]);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD | size |
+ ((wr->send_flags & IB_SEND_FENCE) ?
+ MTHCA_NEXT_FENCE : 0));
- if (!size0) {
+ if (!nreq) {
size0 = size;
op0 = mthca_opcode[wr->opcode];
+ f0 = wr->send_flags & IB_SEND_FENCE ?
+ MTHCA_SEND_DOORBELL_FENCE : 0;
}
++ind;
@@ -1916,12 +2122,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out:
if (likely(nreq)) {
- u32 doorbell[2];
-
- doorbell[0] = cpu_to_be32((nreq << 24) |
- ((qp->sq.head & 0xffff) << 8) |
- f0 | op0);
- doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+ dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
qp->sq.head += nreq;
@@ -1937,11 +2138,17 @@ out:
* write MMIO send doorbell.
*/
wmb();
- mthca_write64(doorbell,
- dev->kar + MTHCA_SEND_DOORBELL,
+
+ mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
+ /*
+ * Make sure doorbells don't leak out of SQ spinlock and reach
+ * the HCA out of order:
+ */
+ mmiowb();
+
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}
@@ -1958,7 +2165,7 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int i;
void *wqe;
- spin_lock_irqsave(&qp->rq.lock, flags);
+ spin_lock_irqsave(&qp->rq.lock, flags);
/* XXX check that state is OK to post receive */
@@ -1988,20 +2195,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
wqe += sizeof (struct mthca_data_seg);
}
- if (i < qp->rq.max_gs) {
- ((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
- ((struct mthca_data_seg *) wqe)->addr = 0;
- }
+ if (i < qp->rq.max_gs)
+ mthca_set_data_seg_inval(wqe);
qp->wrid[ind] = wr->wr_id;
@@ -2025,33 +2224,36 @@ out:
return err;
}
-int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
- int index, int *dbd, u32 *new_wqe)
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, __be32 *new_wqe)
{
struct mthca_next_seg *next;
+ /*
+ * For SRQs, all receive WQEs generate a CQE, so we're always
+ * at the end of the doorbell chain.
+ */
+ if (qp->ibqp.srq && !is_send) {
+ *new_wqe = 0;
+ return;
+ }
+
if (is_send)
next = get_send_wqe(qp, index);
else
next = get_recv_wqe(qp, index);
- if (mthca_is_memfree(dev))
- *dbd = 1;
- else
- *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
+ *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
if (next->ee_nds & cpu_to_be32(0x3f))
*new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
(next->ee_nds & cpu_to_be32(0x3f));
else
*new_wqe = 0;
-
- return 0;
}
-int __devinit mthca_init_qp_table(struct mthca_dev *dev)
+int mthca_init_qp_table(struct mthca_dev *dev)
{
int err;
- u8 status;
int i;
spin_lock_init(&dev->qp_table.lock);
@@ -2078,15 +2280,10 @@ int __devinit mthca_init_qp_table(struct mthca_dev *dev)
for (i = 0; i < 2; ++i) {
err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
- dev->qp_table.sqp_start + i * 2,
- &status);
- if (err)
- goto err_out;
- if (status) {
+ dev->qp_table.sqp_start + i * 2);
+ if (err) {
mthca_warn(dev, "CONF_SPECIAL_QP returned "
- "status %02x, aborting.\n",
- status);
- err = -EINVAL;
+ "%d, aborting.\n", err);
goto err_out;
}
}
@@ -2094,7 +2291,7 @@ int __devinit mthca_init_qp_table(struct mthca_dev *dev)
err_out:
for (i = 0; i < 2; ++i)
- mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+ mthca_CONF_SPECIAL_QP(dev, i, 0);
mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
mthca_alloc_cleanup(&dev->qp_table.alloc);
@@ -2102,13 +2299,13 @@ int __devinit mthca_init_qp_table(struct mthca_dev *dev)
return err;
}
-void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev)
+void mthca_cleanup_qp_table(struct mthca_dev *dev)
{
int i;
- u8 status;
for (i = 0; i < 2; ++i)
- mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+ mthca_CONF_SPECIAL_QP(dev, i, 0);
+ mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
mthca_alloc_cleanup(&dev->qp_table.alloc);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 8ea801271a4..74c6a942604 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -28,15 +28,12 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <linux/config.h>
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -48,6 +45,12 @@ int mthca_reset(struct mthca_dev *mdev)
u32 *hca_header = NULL;
u32 *bridge_header = NULL;
struct pci_dev *bridge = NULL;
+ int bridge_pcix_cap = 0;
+ int hca_pcie_cap = 0;
+ int hca_pcix_cap = 0;
+
+ u16 devctl;
+ u16 linkctl;
#define MTHCA_RESET_OFFSET 0xf0010
#define MTHCA_RESET_VALUE swab32(1)
@@ -71,8 +74,8 @@ int mthca_reset(struct mthca_dev *mdev)
bridge)) != NULL) {
if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
bridge->subordinate == mdev->pdev->bus) {
- mthca_dbg(mdev, "Found bridge: %s (%s)\n",
- pci_pretty_name(bridge), pci_name(bridge));
+ mthca_dbg(mdev, "Found bridge: %s\n",
+ pci_name(bridge));
break;
}
}
@@ -83,8 +86,8 @@ int mthca_reset(struct mthca_dev *mdev)
* assume we're in no-bridge mode and hope for
* the best.
*/
- mthca_warn(mdev, "No bridge found for %s (%s)\n",
- pci_pretty_name(mdev->pdev), pci_name(mdev->pdev));
+ mthca_warn(mdev, "No bridge found for %s\n",
+ pci_name(mdev->pdev));
}
}
@@ -109,6 +112,9 @@ int mthca_reset(struct mthca_dev *mdev)
}
}
+ hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
+ hca_pcie_cap = pci_pcie_cap(mdev->pdev);
+
if (bridge) {
bridge_header = kmalloc(256, GFP_KERNEL);
if (!bridge_header) {
@@ -128,6 +134,13 @@ int mthca_reset(struct mthca_dev *mdev)
goto out;
}
}
+ bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
+ if (!bridge_pcix_cap) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't locate HCA bridge "
+ "PCI-X capability, aborting.\n");
+ goto out;
+ }
}
/* actually hit reset */
@@ -177,6 +190,20 @@ int mthca_reset(struct mthca_dev *mdev)
good:
/* Now restore the PCI headers */
if (bridge) {
+ if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
+ bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
+ "split transaction control, aborting.\n");
+ goto out;
+ }
+ if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
+ bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
+ "split transaction control, aborting.\n");
+ goto out;
+ }
/*
* Bridge control register is at 0x3e, so we'll
* naturally restore it last in this loop.
@@ -202,6 +229,35 @@ good:
}
}
+ if (hca_pcix_cap) {
+ if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
+ hca_header[hca_pcix_cap / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA PCI-X "
+ "command register, aborting.\n");
+ goto out;
+ }
+ }
+
+ if (hca_pcie_cap) {
+ devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
+ if (pcie_capability_write_word(mdev->pdev, PCI_EXP_DEVCTL,
+ devctl)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA PCI Express "
+ "Device Control register, aborting.\n");
+ goto out;
+ }
+ linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
+ if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL,
+ linkctl)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA PCI Express "
+ "Link control register, aborting.\n");
+ goto out;
+ }
+ }
+
for (i = 0; i < 16; ++i) {
if (i * 4 == PCI_COMMAND)
continue;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
new file mode 100644
index 00000000000..d22f970480c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+#include "mthca_wqe.h"
+
+enum {
+ MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE
+};
+
+struct mthca_tavor_srq_context {
+ __be64 wqe_base_ds; /* low 6 bits is descriptor size */
+ __be32 state_pd;
+ __be32 lkey;
+ __be32 uar;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u32 reserved[2];
+};
+
+struct mthca_arbel_srq_context {
+ __be32 state_logsize_srqn;
+ __be32 lkey;
+ __be32 db_index;
+ __be32 logstride_usrpage;
+ __be64 wqe_base;
+ __be32 eq_pd;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u16 reserved1;
+ __be16 wqe_counter;
+ u32 reserved2[3];
+};
+
+static void *get_wqe(struct mthca_srq *srq, int n)
+{
+ if (srq->is_direct)
+ return srq->queue.direct.buf + (n << srq->wqe_shift);
+ else
+ return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf +
+ ((n << srq->wqe_shift) & (PAGE_SIZE - 1));
+}
+
+/*
+ * Return a pointer to the location within a WQE that we're using as a
+ * link when the WQE is in the free list. We use the imm field
+ * because in the Tavor case, posting a WQE may overwrite the next
+ * segment of the previous WQE, but a receive WQE will never touch the
+ * imm field. This avoids corrupting our free list if the previous
+ * WQE has already completed and been put on the free list when we
+ * post the next WQE.
+ */
+static inline int *wqe_to_link(void *wqe)
+{
+ return (int *) (wqe + offsetof(struct mthca_next_seg, imm));
+}
+
+static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_srq *srq,
+ struct mthca_tavor_srq_context *context)
+{
+ memset(context, 0, sizeof *context);
+
+ context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
+ context->state_pd = cpu_to_be32(pd->pd_num);
+ context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
+
+ if (pd->ibpd.uobject)
+ context->uar =
+ cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ else
+ context->uar = cpu_to_be32(dev->driver_uar.index);
+}
+
+static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_srq *srq,
+ struct mthca_arbel_srq_context *context)
+{
+ int logsize, max;
+
+ memset(context, 0, sizeof *context);
+
+ /*
+ * Put max in a temporary variable to work around gcc bug
+ * triggered by ilog2() on sparc64.
+ */
+ max = srq->max;
+ logsize = ilog2(max);
+ context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
+ context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
+ context->db_index = cpu_to_be32(srq->db_index);
+ context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
+ if (pd->ibpd.uobject)
+ context->logstride_usrpage |=
+ cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ else
+ context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
+ context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
+}
+
+static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+ mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue,
+ srq->is_direct, &srq->mr);
+ kfree(srq->wrid);
+}
+
+static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
+ struct mthca_srq *srq)
+{
+ struct mthca_data_seg *scatter;
+ void *wqe;
+ int err;
+ int i;
+
+ if (pd->ibpd.uobject)
+ return 0;
+
+ srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
+ if (!srq->wrid)
+ return -ENOMEM;
+
+ err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
+ MTHCA_MAX_DIRECT_SRQ_SIZE,
+ &srq->queue, &srq->is_direct, pd, 1, &srq->mr);
+ if (err) {
+ kfree(srq->wrid);
+ return err;
+ }
+
+ /*
+ * Now initialize the SRQ buffer so that all of the WQEs are
+ * linked into the list of free WQEs. In addition, set the
+ * scatter list L_Keys to the sentry value of 0x100.
+ */
+ for (i = 0; i < srq->max; ++i) {
+ struct mthca_next_seg *next;
+
+ next = wqe = get_wqe(srq, i);
+
+ if (i < srq->max - 1) {
+ *wqe_to_link(wqe) = i + 1;
+ next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1);
+ } else {
+ *wqe_to_link(wqe) = -1;
+ next->nda_op = 0;
+ }
+
+ for (scatter = wqe + sizeof (struct mthca_next_seg);
+ (void *) scatter < wqe + (1 << srq->wqe_shift);
+ ++scatter)
+ scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+ }
+
+ srq->last = get_wqe(srq, srq->max - 1);
+
+ return 0;
+}
+
+int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
+ struct ib_srq_attr *attr, struct mthca_srq *srq)
+{
+ struct mthca_mailbox *mailbox;
+ int ds;
+ int err;
+
+ /* Sanity check SRQ size before proceeding */
+ if (attr->max_wr > dev->limits.max_srq_wqes ||
+ attr->max_sge > dev->limits.max_srq_sge)
+ return -EINVAL;
+
+ srq->max = attr->max_wr;
+ srq->max_gs = attr->max_sge;
+ srq->counter = 0;
+
+ if (mthca_is_memfree(dev))
+ srq->max = roundup_pow_of_two(srq->max + 1);
+ else
+ srq->max = srq->max + 1;
+
+ ds = max(64UL,
+ roundup_pow_of_two(sizeof (struct mthca_next_seg) +
+ srq->max_gs * sizeof (struct mthca_data_seg)));
+
+ if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
+ return -EINVAL;
+
+ srq->wqe_shift = ilog2(ds);
+
+ srq->srqn = mthca_alloc(&dev->srq_table.alloc);
+ if (srq->srqn == -1)
+ return -ENOMEM;
+
+ if (mthca_is_memfree(dev)) {
+ err = mthca_table_get(dev, dev->srq_table.table, srq->srqn);
+ if (err)
+ goto err_out;
+
+ if (!pd->ibpd.uobject) {
+ srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
+ srq->srqn, &srq->db);
+ if (srq->db_index < 0) {
+ err = -ENOMEM;
+ goto err_out_icm;
+ }
+ }
+ }
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_out_db;
+ }
+
+ err = mthca_alloc_srq_buf(dev, pd, srq);
+ if (err)
+ goto err_out_mailbox;
+
+ spin_lock_init(&srq->lock);
+ srq->refcount = 1;
+ init_waitqueue_head(&srq->wait);
+ mutex_init(&srq->mutex);
+
+ if (mthca_is_memfree(dev))
+ mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+ else
+ mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+
+ err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
+
+ if (err) {
+ mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
+ goto err_out_free_buf;
+ }
+
+ spin_lock_irq(&dev->srq_table.lock);
+ if (mthca_array_set(&dev->srq_table.srq,
+ srq->srqn & (dev->limits.num_srqs - 1),
+ srq)) {
+ spin_unlock_irq(&dev->srq_table.lock);
+ goto err_out_free_srq;
+ }
+ spin_unlock_irq(&dev->srq_table.lock);
+
+ mthca_free_mailbox(dev, mailbox);
+
+ srq->first_free = 0;
+ srq->last_free = srq->max - 1;
+
+ attr->max_wr = srq->max - 1;
+ attr->max_sge = srq->max_gs;
+
+ return 0;
+
+err_out_free_srq:
+ err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
+ if (err)
+ mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
+
+err_out_free_buf:
+ if (!pd->ibpd.uobject)
+ mthca_free_srq_buf(dev, srq);
+
+err_out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+
+err_out_db:
+ if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+ mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
+
+err_out_icm:
+ mthca_table_put(dev, dev->srq_table.table, srq->srqn);
+
+err_out:
+ mthca_free(&dev->srq_table.alloc, srq->srqn);
+
+ return err;
+}
+
+static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+ int c;
+
+ spin_lock_irq(&dev->srq_table.lock);
+ c = srq->refcount;
+ spin_unlock_irq(&dev->srq_table.lock);
+
+ return c;
+}
+
+void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+ struct mthca_mailbox *mailbox;
+ int err;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ mthca_warn(dev, "No memory for mailbox to free SRQ.\n");
+ return;
+ }
+
+ err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
+ if (err)
+ mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
+
+ spin_lock_irq(&dev->srq_table.lock);
+ mthca_array_clear(&dev->srq_table.srq,
+ srq->srqn & (dev->limits.num_srqs - 1));
+ --srq->refcount;
+ spin_unlock_irq(&dev->srq_table.lock);
+
+ wait_event(srq->wait, !get_srq_refcount(dev, srq));
+
+ if (!srq->ibsrq.uobject) {
+ mthca_free_srq_buf(dev, srq);
+ if (mthca_is_memfree(dev))
+ mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
+ }
+
+ mthca_table_put(dev, dev->srq_table.table, srq->srqn);
+ mthca_free(&dev->srq_table.alloc, srq->srqn);
+ mthca_free_mailbox(dev, mailbox);
+}
+
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ int ret = 0;
+
+ /* We don't support resizing SRQs (yet?) */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max;
+ if (attr->srq_limit > max_wr)
+ return -EINVAL;
+
+ mutex_lock(&srq->mutex);
+ ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit);
+ mutex_unlock(&srq->mutex);
+ }
+
+ return ret;
+}
+
+int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ struct mthca_mailbox *mailbox;
+ struct mthca_arbel_srq_context *arbel_ctx;
+ struct mthca_tavor_srq_context *tavor_ctx;
+ int err;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox);
+ if (err)
+ goto out;
+
+ if (mthca_is_memfree(dev)) {
+ arbel_ctx = mailbox->buf;
+ srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
+ } else {
+ tavor_ctx = mailbox->buf;
+ srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
+ }
+
+ srq_attr->max_wr = srq->max - 1;
+ srq_attr->max_sge = srq->max_gs;
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+
+ return err;
+}
+
+void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
+ enum ib_event_type event_type)
+{
+ struct mthca_srq *srq;
+ struct ib_event event;
+
+ spin_lock(&dev->srq_table.lock);
+ srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
+ if (srq)
+ ++srq->refcount;
+ spin_unlock(&dev->srq_table.lock);
+
+ if (!srq) {
+ mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ if (!srq->ibsrq.event_handler)
+ goto out;
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.srq = &srq->ibsrq;
+ srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
+
+out:
+ spin_lock(&dev->srq_table.lock);
+ if (!--srq->refcount)
+ wake_up(&srq->wait);
+ spin_unlock(&dev->srq_table.lock);
+}
+
+/*
+ * This function must be called with IRQs disabled.
+ */
+void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
+{
+ int ind;
+ struct mthca_next_seg *last_free;
+
+ ind = wqe_addr >> srq->wqe_shift;
+
+ spin_lock(&srq->lock);
+
+ last_free = get_wqe(srq, srq->last_free);
+ *wqe_to_link(last_free) = ind;
+ last_free->nda_op = htonl((ind << srq->wqe_shift) | 1);
+ *wqe_to_link(get_wqe(srq, ind)) = -1;
+ srq->last_free = ind;
+
+ spin_unlock(&srq->lock);
+}
+
+int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ unsigned long flags;
+ int err = 0;
+ int first_ind;
+ int ind;
+ int next_ind;
+ int nreq;
+ int i;
+ void *wqe;
+ void *prev_wqe;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ first_ind = srq->first_free;
+
+ for (nreq = 0; wr; wr = wr->next) {
+ ind = srq->first_free;
+ wqe = get_wqe(srq, ind);
+ next_ind = *wqe_to_link(wqe);
+
+ if (unlikely(next_ind < 0)) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ prev_wqe = srq->last;
+ srq->last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+ /* flags field will always remain 0 */
+
+ wqe += sizeof (struct mthca_next_seg);
+
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ srq->last = prev_wqe;
+ break;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ }
+
+ if (i < srq->max_gs)
+ mthca_set_data_seg_inval(wqe);
+
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD);
+
+ srq->wrid[ind] = wr->wr_id;
+ srq->first_free = next_ind;
+
+ ++nreq;
+ if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+ nreq = 0;
+
+ /*
+ * Make sure that descriptors are written
+ * before doorbell is rung.
+ */
+ wmb();
+
+ mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+ first_ind = srq->first_free;
+ }
+ }
+
+ if (likely(nreq)) {
+ /*
+ * Make sure that descriptors are written before
+ * doorbell is rung.
+ */
+ wmb();
+
+ mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ /*
+ * Make sure doorbells don't leak out of SRQ spinlock and
+ * reach the HCA out of order:
+ */
+ mmiowb();
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+ return err;
+}
+
+int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ unsigned long flags;
+ int err = 0;
+ int ind;
+ int next_ind;
+ int nreq;
+ int i;
+ void *wqe;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ ind = srq->first_free;
+ wqe = get_wqe(srq, ind);
+ next_ind = *wqe_to_link(wqe);
+
+ if (unlikely(next_ind < 0)) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+ /* flags field will always remain 0 */
+
+ wqe += sizeof (struct mthca_next_seg);
+
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ }
+
+ if (i < srq->max_gs)
+ mthca_set_data_seg_inval(wqe);
+
+ srq->wrid[ind] = wr->wr_id;
+ srq->first_free = next_ind;
+ }
+
+ if (likely(nreq)) {
+ srq->counter += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * we write doorbell record.
+ */
+ wmb();
+ *srq->db = cpu_to_be32(srq->counter);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+ return err;
+}
+
+int mthca_max_srq_sge(struct mthca_dev *dev)
+{
+ if (mthca_is_memfree(dev))
+ return dev->limits.max_sg;
+
+ /*
+ * SRQ allocations are based on powers of 2 for Tavor,
+ * (although they only need to be multiples of 16 bytes).
+ *
+ * Therefore, we need to base the max number of sg entries on
+ * the largest power of 2 descriptor size that is <= to the
+ * actual max WQE descriptor size, rather than return the
+ * max_sg value given by the firmware (which is based on WQE
+ * sizes as multiples of 16, not powers of 2).
+ *
+ * If SRQ implementation is changed for Tavor to be based on
+ * multiples of 16, the calculation below can be deleted and
+ * the FW max_sg value returned.
+ */
+ return min_t(int, dev->limits.max_sg,
+ ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
+ sizeof (struct mthca_next_seg)) /
+ sizeof (struct mthca_data_seg));
+}
+
+int mthca_init_srq_table(struct mthca_dev *dev)
+{
+ int err;
+
+ if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
+ return 0;
+
+ spin_lock_init(&dev->srq_table.lock);
+
+ err = mthca_alloc_init(&dev->srq_table.alloc,
+ dev->limits.num_srqs,
+ dev->limits.num_srqs - 1,
+ dev->limits.reserved_srqs);
+ if (err)
+ return err;
+
+ err = mthca_array_init(&dev->srq_table.srq,
+ dev->limits.num_srqs);
+ if (err)
+ mthca_alloc_cleanup(&dev->srq_table.alloc);
+
+ return err;
+}
+
+void mthca_cleanup_srq_table(struct mthca_dev *dev)
+{
+ if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
+ return;
+
+ mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs);
+ mthca_alloc_cleanup(&dev->srq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 1c8791ded6f..ca5900c96fc 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -28,10 +28,10 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
+#include <asm/page.h> /* PAGE_SHIFT */
+
#include "mthca_dev.h"
#include "mthca_memfree.h"
@@ -58,7 +58,7 @@ int mthca_init_uar_table(struct mthca_dev *dev)
ret = mthca_alloc_init(&dev->uar_table.alloc,
dev->limits.num_uars,
dev->limits.num_uars - 1,
- dev->limits.reserved_uars);
+ dev->limits.reserved_uars + 1);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
new file mode 100644
index 00000000000..5fe56e81073
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_USER_H
+#define MTHCA_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION 1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mthca_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 uarc_size;
+};
+
+struct mthca_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mthca_reg_mr {
+/*
+ * Mark the memory region with a DMA attribute that causes
+ * in-flight DMA to be flushed when the region is written to:
+ */
+#define MTHCA_MR_DMASYNC 0x1
+ __u32 mr_attrs;
+ __u32 reserved;
+};
+
+struct mthca_create_cq {
+ __u32 lkey;
+ __u32 pdn;
+ __u64 arm_db_page;
+ __u64 set_db_page;
+ __u32 arm_db_index;
+ __u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mthca_resize_cq {
+ __u32 lkey;
+ __u32 reserved;
+};
+
+struct mthca_create_srq {
+ __u32 lkey;
+ __u32 db_index;
+ __u64 db_page;
+};
+
+struct mthca_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct mthca_create_qp {
+ __u32 lkey;
+ __u32 reserved;
+ __u64 sq_db_page;
+ __u64 rq_db_page;
+ __u32 sq_db_index;
+ __u32 rq_db_index;
+};
+
+#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
new file mode 100644
index 00000000000..341a5ae881c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_WQE_H
+#define MTHCA_WQE_H
+
+#include <linux/types.h>
+
+enum {
+ MTHCA_NEXT_DBD = 1 << 7,
+ MTHCA_NEXT_FENCE = 1 << 6,
+ MTHCA_NEXT_CQ_UPDATE = 1 << 3,
+ MTHCA_NEXT_EVENT_GEN = 1 << 2,
+ MTHCA_NEXT_SOLICIT = 1 << 1,
+ MTHCA_NEXT_IP_CSUM = 1 << 4,
+ MTHCA_NEXT_TCP_UDP_CSUM = 1 << 5,
+
+ MTHCA_MLX_VL15 = 1 << 17,
+ MTHCA_MLX_SLR = 1 << 16
+};
+
+enum {
+ MTHCA_INVAL_LKEY = 0x100,
+ MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256,
+ MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255
+};
+
+struct mthca_next_seg {
+ __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */
+ __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
+ __be32 flags; /* [3] CQ [2] Event [1] Solicit */
+ __be32 imm; /* immediate data */
+};
+
+struct mthca_tavor_ud_seg {
+ u32 reserved1;
+ __be32 lkey;
+ __be64 av_addr;
+ u32 reserved2[4];
+ __be32 dqpn;
+ __be32 qkey;
+ u32 reserved3[2];
+};
+
+struct mthca_arbel_ud_seg {
+ __be32 av[8];
+ __be32 dqpn;
+ __be32 qkey;
+ u32 reserved[2];
+};
+
+struct mthca_bind_seg {
+ __be32 flags; /* [31] Atomic [30] rem write [29] rem read */
+ u32 reserved;
+ __be32 new_rkey;
+ __be32 lkey;
+ __be64 addr;
+ __be64 length;
+};
+
+struct mthca_raddr_seg {
+ __be64 raddr;
+ __be32 rkey;
+ u32 reserved;
+};
+
+struct mthca_atomic_seg {
+ __be64 swap_add;
+ __be64 compare;
+};
+
+struct mthca_data_seg {
+ __be32 byte_count;
+ __be32 lkey;
+ __be64 addr;
+};
+
+struct mthca_mlx_seg {
+ __be32 nda_op;
+ __be32 nds;
+ __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate
+ [11:8] SL [3] C [2] E */
+ __be16 rlid;
+ __be16 vcrc;
+};
+
+static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg,
+ struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg)
+{
+ dseg->byte_count = 0;
+ dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+ dseg->addr = 0;
+}
+
+#endif /* MTHCA_WQE_H */