aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/ehca
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ehca')
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c98
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h294
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h128
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c175
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c74
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c305
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c838
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h12
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h52
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c650
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mcast.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c2016
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.h34
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c36
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qes.h27
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c1652
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c550
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c134
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h69
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c439
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c341
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h10
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.c18
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.h4
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns_core.h4
-rw-r--r--drivers/infiniband/hw/ehca/hipz_hw.h50
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c250
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h76
29 files changed, 5360 insertions, 2989 deletions
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 922389b6439..59f807d8d58 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -1,16 +1,9 @@
config INFINIBAND_EHCA
tristate "eHCA support"
- depends on IBMEBUS && INFINIBAND
+ depends on IBMEBUS
---help---
This driver supports the IBM pSeries eHCA InfiniBand adapter.
To compile the driver as a module, choose M here. The module
will be called ib_ehca.
-config INFINIBAND_EHCA_SCALING
- bool "Scaling support (EXPERIMENTAL)"
- depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
- ---help---
- eHCA scaling support schedules the CQ callbacks to different CPUs.
-
- To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 3bac197f901..465926319f3 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -1,7 +1,7 @@
/*
* IBM eServer eHCA Infiniband device driver for Linux on POWER
*
- * adress vector functions
+ * address vector functions
*
* Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
* Khadija Souissi <souissik@de.ibm.com>
@@ -41,8 +41,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-
-#include <asm/current.h>
+#include <linux/slab.h>
#include "ehca_tools.h"
#include "ehca_iverbs.h"
@@ -50,6 +49,42 @@
static struct kmem_cache *av_cache;
+int ehca_calc_ipd(struct ehca_shca *shca, int port,
+ enum ib_rate path_rate, u32 *ipd)
+{
+ int path = ib_rate_to_mult(path_rate);
+ int link, ret;
+ struct ib_port_attr pa;
+
+ if (path_rate == IB_RATE_PORT_CURRENT) {
+ *ipd = 0;
+ return 0;
+ }
+
+ if (unlikely(path < 0)) {
+ ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
+ path_rate);
+ return -EINVAL;
+ }
+
+ ret = ehca_query_port(&shca->ib_device, port, &pa);
+ if (unlikely(ret < 0)) {
+ ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret);
+ return ret;
+ }
+
+ link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
+
+ if (path >= link)
+ /* no need to throttle if path faster than link */
+ *ipd = 0;
+ else
+ /* IPD = round((link / path) - 1) */
+ *ipd = ((link + (path >> 1)) / path) - 1;
+
+ return 0;
+}
+
struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
int ret;
@@ -57,7 +92,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
ib_device);
- av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+ av = kmem_cache_alloc(av_cache, GFP_KERNEL);
if (!av) {
ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
pd, ah_attr);
@@ -69,17 +104,15 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
av->av.slid_path_bits = ah_attr->src_path_bits;
if (ehca_static_rate < 0) {
- int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
- int ehca_mult =
- ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
-
- if (ah_mult >= ehca_mult)
- av->av.ipd = 0;
- else
- av->av.ipd = (ah_mult > 0) ?
- ((ehca_mult - 1) / ah_mult) : 0;
+ u32 ipd;
+ if (ehca_calc_ipd(shca, ah_attr->port_num,
+ ah_attr->static_rate, &ipd)) {
+ ret = -EINVAL;
+ goto create_ah_exit1;
+ }
+ av->av.ipd = ipd;
} else
- av->av.ipd = ehca_static_rate;
+ av->av.ipd = ehca_static_rate;
av->av.lnh = ah_attr->ah_flags;
av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
@@ -118,8 +151,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
}
- /* for the time being we use a hard coded PMTU of 2048 Bytes */
- av->av.pmtu = 4;
+ av->av.pmtu = shca->max_mtu;
/* dgid comes in grh.word_3 */
memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
@@ -137,15 +169,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
{
struct ehca_av *av;
struct ehca_ud_av new_ehca_av;
- struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
- u32 cur_pid = current->tgid;
-
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
- }
+ struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
+ ib_device);
memset(&new_ehca_av, 0, sizeof(new_ehca_av));
new_ehca_av.sl = ah_attr->sl;
@@ -193,7 +218,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
}
- new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+ new_ehca_av.pmtu = shca->max_mtu;
memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
sizeof(ah_attr->grh.dgid));
@@ -207,15 +232,6 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
{
struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
- struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
- u32 cur_pid = current->tgid;
-
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
- }
memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
sizeof(ah_attr->grh.dgid));
@@ -238,16 +254,6 @@ int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
int ehca_destroy_ah(struct ib_ah *ah)
{
- struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
- u32 cur_pid = current->tgid;
-
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
- }
-
kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
return 0;
@@ -258,7 +264,7 @@ int ehca_init_av_cache(void)
av_cache = kmem_cache_create("ehca_cache_av",
sizeof(struct ehca_av), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!av_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1c722032319..bd45e0f3923 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -5,6 +5,7 @@
*
* Authors: Heiko J Schick <schickhj@de.ibm.com>
* Christoph Raisch <raisch@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
@@ -42,9 +43,6 @@
#ifndef __EHCA_CLASSES_H__
#define __EHCA_CLASSES_H__
-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"
-
struct ehca_module;
struct ehca_qp;
struct ehca_cq;
@@ -54,15 +52,27 @@ struct ehca_mw;
struct ehca_pd;
struct ehca_av;
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
+#include <linux/wait.h>
+#include <linux/mutex.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
#include "ehca_irq.h"
+#define EHCA_EQE_CACHE_SIZE 20
+#define EHCA_MAX_NUM_QUEUES 0xffff
+
+struct ehca_eqe_cache_entry {
+ struct ehca_eqe *eqe;
+ struct ehca_cq *cq;
+};
+
struct ehca_eq {
u32 length;
struct ipz_queue ipz_queue;
@@ -74,18 +84,35 @@ struct ehca_eq {
spinlock_t spinlock;
struct tasklet_struct interrupt_task;
u32 ist;
+ spinlock_t irq_spinlock;
+ struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
+};
+
+struct ehca_sma_attr {
+ u16 lid, lmc, sm_sl, sm_lid;
+ u16 pkey_tbl_len, pkeys[16];
};
struct ehca_sport {
struct ib_cq *ibcq_aqp1;
- struct ib_qp *ibqp_aqp1;
- enum ib_rate rate;
+ struct ib_qp *ibqp_sqp[2];
+ /* lock to serialze modify_qp() calls for sqp in normal
+ * and irq path (when event PORT_ACTIVE is received first time)
+ */
+ spinlock_t mod_sqp_lock;
enum ib_port_state port_state;
+ struct ehca_sma_attr saved_attr;
+ u32 pma_qp_nr;
};
+#define HCA_CAP_MR_PGSIZE_4K 0x80000000
+#define HCA_CAP_MR_PGSIZE_64K 0x40000000
+#define HCA_CAP_MR_PGSIZE_1M 0x20000000
+#define HCA_CAP_MR_PGSIZE_16M 0x10000000
+
struct ehca_shca {
struct ib_device ib_device;
- struct ibmebus_dev *ibmebus_dev;
+ struct platform_device *ofdev;
u8 num_ports;
int hw_level;
struct list_head shca_list;
@@ -96,19 +123,77 @@ struct ehca_shca {
struct ehca_mr *maxmr;
struct ehca_pd *pd;
struct h_galpas galpas;
+ struct mutex modify_mutex;
+ u64 hca_cap;
+ /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
+ u32 hca_cap_mr_pgsize;
+ int max_mtu;
+ int max_num_qps;
+ int max_num_cqs;
+ atomic_t num_cqs;
+ atomic_t num_qps;
};
struct ehca_pd {
struct ib_pd ib_pd;
struct ipz_pd fw_pd;
- u32 ownpid;
+ /* small queue mgmt */
+ struct mutex lock;
+ struct list_head free[2];
+ struct list_head full[2];
+};
+
+enum ehca_ext_qp_type {
+ EQPT_NORMAL = 0,
+ EQPT_LLQP = 1,
+ EQPT_SRQBASE = 2,
+ EQPT_SRQ = 3,
};
+/* struct to cache modify_qp()'s parms for GSI/SMI qp */
+struct ehca_mod_qp_parm {
+ int mask;
+ struct ib_qp_attr attr;
+};
+
+#define EHCA_MOD_QP_PARM_MAX 4
+
+#define QMAP_IDX_MASK 0xFFFFULL
+
+/* struct for tracking if cqes have been reported to the application */
+struct ehca_qmap_entry {
+ u16 app_wr_id;
+ u8 reported;
+ u8 cqe_req;
+};
+
+struct ehca_queue_map {
+ struct ehca_qmap_entry *map;
+ unsigned int entries;
+ unsigned int tail;
+ unsigned int left_to_poll;
+ unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */
+};
+
+/* function to calculate the next index for the qmap */
+static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
+{
+ unsigned int temp = cur_index + 1;
+ return (temp == limit) ? 0 : temp;
+}
+
struct ehca_qp {
- struct ib_qp ib_qp;
+ union {
+ struct ib_qp ib_qp;
+ struct ib_srq ib_srq;
+ };
u32 qp_type;
+ enum ehca_ext_qp_type ext_type;
+ enum ib_qp_state state;
struct ipz_queue ipz_squeue;
+ struct ehca_queue_map sq_map;
struct ipz_queue ipz_rqueue;
+ struct ehca_queue_map rq_map;
struct h_galpas galpas;
u32 qkey;
u32 real_qp_num;
@@ -119,15 +204,33 @@ struct ehca_qp {
struct ipz_qp_handle ipz_qp_handle;
struct ehca_pfqp pf;
struct ib_qp_init_attr init_attr;
- u64 uspace_squeue;
- u64 uspace_rqueue;
- u64 uspace_fwh;
struct ehca_cq *send_cq;
struct ehca_cq *recv_cq;
unsigned int sqerr_purgeflag;
struct hlist_node list_entries;
+ /* array to cache modify_qp()'s parms for GSI/SMI qp */
+ struct ehca_mod_qp_parm *mod_qp_parm;
+ int mod_qp_parm_idx;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_squeue;
+ u32 mm_count_rqueue;
+ u32 mm_count_galpa;
+ /* unsolicited ack circumvention */
+ int unsol_ack_circ;
+ int mtu_shift;
+ u32 message_count;
+ u32 packet_count;
+ atomic_t nr_events; /* events seen */
+ wait_queue_head_t wait_completion;
+ int mig_armed;
+ struct list_head sq_err_node;
+ struct list_head rq_err_node;
};
+#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
+#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
+
/* must be power of 2 */
#define QP_HASHTAB_LEN 8
@@ -142,13 +245,17 @@ struct ehca_cq {
struct ipz_cq_handle ipz_cq_handle;
struct ehca_pfcq pf;
spinlock_t cb_lock;
- u64 uspace_queue;
- u64 uspace_fwh;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
- u32 nr_callbacks;
+ u32 nr_callbacks; /* #events assigned to cpu by scaling code */
+ atomic_t nr_events; /* #events seen */
+ wait_queue_head_t wait_completion;
spinlock_t task_lock;
- u32 ownpid;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_queue;
+ u32 mm_count_galpa;
+ struct list_head sqp_err_list;
+ struct list_head rqp_err_list;
};
enum ehca_mr_flag {
@@ -161,14 +268,16 @@ struct ehca_mr {
struct ib_mr ib_mr; /* must always be first in ehca_mr */
struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
} ib;
+ struct ib_umem *umem;
spinlock_t mrlock;
enum ehca_mr_flag flags;
- u32 num_pages; /* number of MR pages */
- u32 num_4k; /* number of 4k "page" portions to form MR */
+ u32 num_kpages; /* number of kernel pages */
+ u32 num_hwpages; /* number of hw pages to form MR */
+ u64 hwpage_size; /* hw page size used for this MR */
int acl; /* ACL (stored here for usage in reregister) */
u64 *start; /* virtual start address (stored here for */
- /* usage in reregister) */
+ /* usage in reregister) */
u64 size; /* size (stored here for usage in reregister) */
u32 fmr_page_size; /* page size for FMR */
u32 fmr_max_pages; /* max pages for FMR */
@@ -177,9 +286,6 @@ struct ehca_mr {
/* fw specific data */
struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */
struct h_galpas galpas;
- /* data for userspace bridge */
- u32 nr_of_pages;
- void *pagearray;
};
struct ehca_mw {
@@ -201,26 +307,30 @@ enum ehca_mr_pgi_type {
struct ehca_mr_pginfo {
enum ehca_mr_pgi_type type;
- u64 num_pages;
- u64 page_cnt;
- u64 num_4k; /* number of 4k "page" portions */
- u64 page_4k_cnt; /* counter for 4k "page" portions */
- u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */
-
- /* type EHCA_MR_PGI_PHYS section */
- int num_phys_buf;
- struct ib_phys_buf *phys_buf_array;
- u64 next_buf;
-
- /* type EHCA_MR_PGI_USER section */
- struct ib_umem *region;
- struct ib_umem_chunk *next_chunk;
- u64 next_nmap;
-
- /* type EHCA_MR_PGI_FMR section */
- u64 *page_list;
- u64 next_listelem;
- /* next_4k also used within EHCA_MR_PGI_FMR */
+ u64 num_kpages;
+ u64 kpage_cnt;
+ u64 hwpage_size; /* hw page size used for this MR */
+ u64 num_hwpages; /* number of hw pages */
+ u64 hwpage_cnt; /* counter for hw pages */
+ u64 next_hwpage; /* next hw page in buffer/chunk/listelem */
+
+ union {
+ struct { /* type EHCA_MR_PGI_PHYS section */
+ int num_phys_buf;
+ struct ib_phys_buf *phys_buf_array;
+ u64 next_buf;
+ } phy;
+ struct { /* type EHCA_MR_PGI_USER section */
+ struct ib_umem *region;
+ struct scatterlist *next_sg;
+ u64 next_nmap;
+ } usr;
+ struct { /* type EHCA_MR_PGI_FMR section */
+ u64 fmr_pgsize;
+ u64 *page_list;
+ u64 next_listelem;
+ } fmr;
+ } u;
};
/* output parameters for MR/FMR hipz calls */
@@ -248,20 +358,6 @@ struct ehca_ucontext {
struct ib_ucontext ib_ucontext;
};
-struct ehca_module *ehca_module_new(void);
-
-int ehca_module_delete(struct ehca_module *me);
-
-int ehca_eq_ctor(struct ehca_eq *eq);
-
-int ehca_eq_dtor(struct ehca_eq *eq);
-
-struct ehca_shca *ehca_shca_new(void);
-
-int ehca_shca_delete(struct ehca_shca *me);
-
-struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
-
int ehca_init_pd_cache(void);
void ehca_cleanup_pd_cache(void);
int ehca_init_cq_cache(void);
@@ -272,44 +368,53 @@ int ehca_init_av_cache(void);
void ehca_cleanup_av_cache(void);
int ehca_init_mrmw_cache(void);
void ehca_cleanup_mrmw_cache(void);
+int ehca_init_small_qp_cache(void);
+void ehca_cleanup_small_qp_cache(void);
-extern spinlock_t ehca_qp_idr_lock;
-extern spinlock_t ehca_cq_idr_lock;
+extern rwlock_t ehca_qp_idr_lock;
+extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
+extern spinlock_t shca_list_lock;
extern int ehca_static_rate;
extern int ehca_port_act_time;
-extern int ehca_use_hp_mr;
+extern bool ehca_use_hp_mr;
+extern bool ehca_scaling_code;
+extern int ehca_lock_hcalls;
+extern int ehca_nr_ports;
+extern int ehca_max_cq;
+extern int ehca_max_qp;
struct ipzu_queue_resp {
- u64 queue; /* points to first queue entry */
u32 qe_size; /* queue entry size */
u32 act_nr_of_sg;
u32 queue_length; /* queue length allocated in bytes */
u32 pagesize;
u32 toggle_state;
- u32 dummy; /* padding for 8 byte alignment */
+ u32 offset; /* save offset within a page for small_qp */
};
struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
- struct h_galpas galpas;
+ u32 fw_handle_ofs;
+ u32 dummy;
};
struct ehca_create_qp_resp {
u32 qp_num;
u32 token;
u32 qp_type;
+ u32 ext_type;
u32 qkey;
/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
u32 real_qp_num;
- u32 dummy; /* padding for 8 byte alignment */
+ u32 fw_handle_ofs;
+ u32 dummy;
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
- struct h_galpas galpas;
};
struct ehca_alloc_cq_parms {
@@ -319,28 +424,59 @@ struct ehca_alloc_cq_parms {
struct ipz_eq_handle eq_handle;
};
+enum ehca_service_type {
+ ST_RC = 0,
+ ST_UC = 1,
+ ST_RD = 2,
+ ST_UD = 3,
+};
+
+enum ehca_ll_comp_flags {
+ LLQP_SEND_COMP = 0x20,
+ LLQP_RECV_COMP = 0x40,
+ LLQP_COMP_MASK = 0x60,
+};
+
+struct ehca_alloc_queue_parms {
+ /* input parameters */
+ int max_wr;
+ int max_sge;
+ int page_size;
+ int is_small;
+
+ /* output parameters */
+ u16 act_nr_wqes;
+ u8 act_nr_sges;
+ u32 queue_size; /* bytes for small queues, pages otherwise */
+};
+
struct ehca_alloc_qp_parms {
- int servicetype;
+ struct ehca_alloc_queue_parms squeue;
+ struct ehca_alloc_queue_parms rqueue;
+
+ /* input parameters */
+ enum ehca_service_type servicetype;
+ int qp_storage;
int sigtype;
- int daqp_ctrl;
- int max_send_sge;
- int max_recv_sge;
+ enum ehca_ext_qp_type ext_type;
+ enum ehca_ll_comp_flags ll_comp_flags;
int ud_av_l_key_ctl;
- u16 act_nr_send_wqes;
- u16 act_nr_recv_wqes;
- u8 act_nr_recv_sges;
- u8 act_nr_send_sges;
+ u32 token;
+ struct ipz_eq_handle eq_handle;
+ struct ipz_pd pd;
+ struct ipz_cq_handle send_cq_handle, recv_cq_handle;
- u32 nr_rq_pages;
- u32 nr_sq_pages;
+ u32 srq_qpn, srq_token, srq_limit;
- struct ipz_eq_handle ipz_eq_handle;
- struct ipz_pd pd;
+ /* output parameters */
+ u32 real_qp_num;
+ struct ipz_qp_handle qp_handle;
+ struct h_galpas galpas;
};
int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
index 5665f213b81..689c35786dd 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -154,83 +154,55 @@ struct hcp_modify_qp_control_block {
u32 reserved_70_127[58]; /* 70 */
};
-#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0)
-#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2)
-#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3)
-#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4)
-#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6)
-#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8)
-#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9)
-#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13)
-#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15)
-#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16)
-#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17)
-#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18)
-#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19)
-#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20)
-#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21)
-#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22)
-#define MQPCB_DLID EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23)
-#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31)
-#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24)
-#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31)
-#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25)
-#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26)
-#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27)
-#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28)
-#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31)
-#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31)
-#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32)
-#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31)
-#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33)
-#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
-#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34)
-#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35)
-#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36)
-#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37)
-#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38)
-#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39)
-#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40)
-#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41)
-#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42)
-#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31)
-#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45)
-#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46)
-#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47)
-#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31)
-#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31)
-#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48)
-#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31)
-#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49)
-#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50)
-#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51)
+#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0)
+#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2)
+#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3)
+#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4)
+#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6)
+#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8)
+#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13)
+#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15)
+#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16)
+#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17)
+#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18)
+#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19)
+#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20)
+#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21)
+#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22)
+#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23)
+#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24)
+#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25)
+#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26)
+#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27)
+#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28)
+#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32)
+#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33)
+#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35)
+#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39)
+#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41)
+#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42)
+#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47)
+#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48)
+#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50)
+#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51)
#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 458fe19648a..8cc83753776 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -43,7 +43,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <asm/current.h>
+#include <linux/slab.h>
#include "ehca_iverbs.h"
#include "ehca_classes.h"
@@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
{
unsigned int qp_num = qp->real_qp_num;
unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
- unsigned long spl_flags;
+ unsigned long flags;
- spin_lock_irqsave(&cq->spinlock, spl_flags);
+ spin_lock_irqsave(&cq->spinlock, flags);
hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
- spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+ spin_unlock_irqrestore(&cq->spinlock, flags);
ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
cq->cq_number, qp_num);
@@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
struct hlist_node *iter;
struct ehca_qp *qp;
- unsigned long spl_flags;
+ unsigned long flags;
- spin_lock_irqsave(&cq->spinlock, spl_flags);
+ spin_lock_irqsave(&cq->spinlock, flags);
hlist_for_each(iter, &cq->qp_hashtab[key]) {
qp = hlist_entry(iter, struct ehca_qp, list_entries);
if (qp->real_qp_num == real_qp_num) {
@@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
break;
}
}
- spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+ spin_unlock_irqrestore(&cq->spinlock, flags);
if (ret)
ehca_err(cq->ib_cq.device,
"qp not found cq_num=%x real_qp_num=%x",
@@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
return ret;
}
-struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
{
struct ehca_qp *ret = NULL;
unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
@@ -113,7 +113,7 @@ struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
return ret;
}
-struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -128,47 +128,48 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
void *vpage;
u32 counter;
u64 rpage, cqx_fec, h_ret;
- int ipz_rc, ret, i;
+ int ipz_rc, i;
unsigned long flags;
if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
return ERR_PTR(-EINVAL);
- my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+ if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
+ ehca_err(device, "Unable to create CQ, max number of %i "
+ "CQs reached.", shca->max_num_cqs);
+ ehca_err(device, "To increase the maximum number of CQs "
+ "use the number_of_cqs module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
if (!my_cq) {
ehca_err(device, "Out of memory for ehca_cq struct device=%p",
device);
+ atomic_dec(&shca->num_cqs);
return ERR_PTR(-ENOMEM);
}
- memset(my_cq, 0, sizeof(struct ehca_cq));
memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
- my_cq->ownpid = current->tgid;
+ atomic_set(&my_cq->nr_events, 0);
+ init_waitqueue_head(&my_cq->wait_completion);
cq = &my_cq->ib_cq;
adapter_handle = shca->ipz_hca_handle;
param.eq_handle = shca->eq.ipz_eq_handle;
- do {
- if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
- cq = ERR_PTR(-ENOMEM);
- ehca_err(device, "Can't reserve idr nr. device=%p",
- device);
- goto create_cq_exit1;
- }
-
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
- } while (ret == -EAGAIN);
+ idr_preload(GFP_KERNEL);
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
+ my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ idr_preload_end();
- if (ret) {
+ if (my_cq->token < 0) {
cq = ERR_PTR(-ENOMEM);
ehca_err(device, "Can't allocate new idr entry. device=%p",
device);
@@ -184,15 +185,15 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
if (h_ret != H_SUCCESS) {
ehca_err(device, "hipz_h_alloc_resource_cq() failed "
- "h_ret=%lx device=%p", h_ret, device);
+ "h_ret=%lli device=%p", h_ret, device);
cq = ERR_PTR(ehca2ib_return_code(h_ret));
goto create_cq_exit2;
}
- ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
- EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+ ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
+ EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
if (!ipz_rc) {
- ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+ ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
ipz_rc, device);
cq = ERR_PTR(-EINVAL);
goto create_cq_exit3;
@@ -206,7 +207,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
cq = ERR_PTR(-EAGAIN);
goto create_cq_exit4;
}
- rpage = virt_to_abs(vpage);
+ rpage = __pa(vpage);
h_ret = hipz_h_register_rpage_cq(adapter_handle,
my_cq->ipz_cq_handle,
@@ -220,7 +221,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
if (h_ret < H_SUCCESS) {
ehca_err(device, "hipz_h_register_rpage_cq() failed "
- "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+ "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
"act_pages=%i", my_cq, my_cq->cq_number,
h_ret, counter, param.act_pages);
cq = ERR_PTR(-EINVAL);
@@ -232,7 +233,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
if ((h_ret != H_SUCCESS) || vpage) {
ehca_err(device, "Registration of pages not "
"complete ehca_cq=%p cq_num=%x "
- "h_ret=%lx", my_cq, my_cq->cq_number,
+ "h_ret=%lli", my_cq, my_cq->cq_number,
h_ret);
cq = ERR_PTR(-EAGAIN);
goto create_cq_exit4;
@@ -240,7 +241,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
} else {
if (h_ret != H_PAGE_REGISTERED) {
ehca_err(device, "Registration of page failed "
- "ehca_cq=%p cq_num=%x h_ret=%lx"
+ "ehca_cq=%p cq_num=%x h_ret=%lli "
"counter=%i act_pages=%i",
my_cq, my_cq->cq_number,
h_ret, counter, param.act_pages);
@@ -254,7 +255,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
gal = my_cq->galpas.kernel;
cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
- ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+ ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
my_cq, my_cq->cq_number, cqx_fec);
my_cq->ib_cq.cqe = my_cq->nr_of_entries =
@@ -264,10 +265,12 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
for (i = 0; i < QP_HASHTAB_LEN; i++)
INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+ INIT_LIST_HEAD(&my_cq->sqp_err_list);
+ INIT_LIST_HEAD(&my_cq->rqp_err_list);
+
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
- struct vm_area_struct *vma;
memset(&resp, 0, sizeof(resp));
resp.cq_number = my_cq->cq_number;
resp.token = my_cq->token;
@@ -276,103 +279,73 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
- ipz_queue->queue_length,
- (void**)&resp.ipz_queue.queue,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap queue pages");
- cq = ERR_PTR(ret);
- goto create_cq_exit4;
- }
- my_cq->uspace_queue = resp.ipz_queue.queue;
- resp.galpas = my_cq->galpas;
- ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
- (void**)&resp.galpas.kernel.fw_handle,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap fw_handle");
- cq = ERR_PTR(ret);
- goto create_cq_exit5;
- }
- my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+ resp.fw_handle_ofs = (u32)
+ (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
- goto create_cq_exit6;
+ cq = ERR_PTR(-EFAULT);
+ goto create_cq_exit4;
}
}
return cq;
-create_cq_exit6:
- ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-
-create_cq_exit5:
- ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
-
create_cq_exit4:
- ipz_queue_dtor(&my_cq->ipz_queue);
+ ipz_queue_dtor(NULL, &my_cq->ipz_queue);
create_cq_exit3:
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
if (h_ret != H_SUCCESS)
ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
- "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+ "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
create_cq_exit2:
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
idr_remove(&ehca_cq_idr, my_cq->token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
create_cq_exit1:
kmem_cache_free(cq_cache, my_cq);
+ atomic_dec(&shca->num_cqs);
return cq;
}
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
- int ret;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int cq_num = my_cq->cq_number;
struct ib_device *device = cq->device;
struct ehca_shca *shca = container_of(device, struct ehca_shca,
ib_device);
struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
- u32 cur_pid = current->tgid;
unsigned long flags;
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- while (my_cq->nr_callbacks)
- yield();
+ if (cq->uobject) {
+ if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+ ehca_err(device, "Resources still referenced in "
+ "user space cq_num=%x", my_cq->cq_number);
+ return -EINVAL;
+ }
+ }
+ /*
+ * remove the CQ from the idr first to make sure
+ * no more interrupt tasklets will touch this CQ
+ */
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
idr_remove(&ehca_cq_idr, my_cq->token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
- ehca_err(device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_cq->ownpid);
- return -EINVAL;
- }
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- /* un-mmap if vma alloc */
- if (my_cq->uspace_queue ) {
- ret = ehca_munmap(my_cq->uspace_queue,
- my_cq->ipz_queue.queue_length);
- if (ret)
- ehca_err(device, "Could not munmap queue ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
- if (ret)
- ehca_err(device, "Could not munmap fwh ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- }
+ /* now wait until all pending events have completed */
+ wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
+ /* nobody's using our CQ any longer -- we can destroy it */
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
if (h_ret == H_R_STATE) {
/* cq in err: read err data and destroy it forcibly */
- ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+ ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
"state. Try to delete it forcibly.",
my_cq, cq_num, my_cq->ipz_cq_handle.handle);
ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
@@ -382,27 +355,19 @@ int ehca_destroy_cq(struct ib_cq *cq)
cq_num);
}
if (h_ret != H_SUCCESS) {
- ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+ ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
"ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
return ehca2ib_return_code(h_ret);
}
- ipz_queue_dtor(&my_cq->ipz_queue);
+ ipz_queue_dtor(NULL, &my_cq->ipz_queue);
kmem_cache_free(cq_cache, my_cq);
+ atomic_dec(&shca->num_cqs);
return 0;
}
int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
{
- struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
- u32 cur_pid = current->tgid;
-
- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
- ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_cq->ownpid);
- return -EINVAL;
- }
-
/* TODO: proper resize needs to be done */
ehca_err(cq->device, "not implemented yet");
@@ -414,7 +379,7 @@ int ehca_init_cq_cache(void)
cq_cache = kmem_cache_create("ehca_cache_cq",
sizeof(struct ehca_cq), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!cq_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 5281dec66f1..90da6747d39 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -54,13 +54,15 @@ int ehca_create_eq(struct ehca_shca *shca,
struct ehca_eq *eq,
const enum ehca_eq_type type, const u32 length)
{
- u64 ret;
+ int ret;
+ u64 h_ret;
u32 nr_pages;
u32 i;
void *vpage;
struct ib_device *ib_dev = &shca->ib_device;
spin_lock_init(&eq->spinlock);
+ spin_lock_init(&eq->irq_spinlock);
eq->is_initialized = 0;
if (type != EHCA_EQ && type != EHCA_NEQ) {
@@ -72,21 +74,21 @@ int ehca_create_eq(struct ehca_shca *shca,
return -EINVAL;
}
- ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
- &eq->pf,
- type,
- length,
- &eq->ipz_eq_handle,
- &eq->length,
- &nr_pages, &eq->ist);
+ h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+ &eq->pf,
+ type,
+ length,
+ &eq->ipz_eq_handle,
+ &eq->length,
+ &nr_pages, &eq->ist);
- if (ret != H_SUCCESS) {
+ if (h_ret != H_SUCCESS) {
ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
return -EINVAL;
}
- ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
- EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+ ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
+ EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
if (!ret) {
ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
goto create_eq_exit1;
@@ -95,24 +97,23 @@ int ehca_create_eq(struct ehca_shca *shca,
for (i = 0; i < nr_pages; i++) {
u64 rpage;
- if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
- ret = H_RESOURCE;
+ vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+ if (!vpage)
goto create_eq_exit2;
- }
- rpage = virt_to_abs(vpage);
- ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
- eq->ipz_eq_handle,
- &eq->pf,
- 0, 0, rpage, 1);
+ rpage = __pa(vpage);
+ h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+ eq->ipz_eq_handle,
+ &eq->pf,
+ 0, 0, rpage, 1);
if (i == (nr_pages - 1)) {
/* last page */
vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
- if (ret != H_SUCCESS || vpage)
+ if (h_ret != H_SUCCESS || vpage)
goto create_eq_exit2;
} else {
- if (ret != H_PAGE_REGISTERED || !vpage)
+ if (h_ret != H_PAGE_REGISTERED)
goto create_eq_exit2;
}
}
@@ -121,21 +122,21 @@ int ehca_create_eq(struct ehca_shca *shca,
/* register interrupt handlers and initialize work queues */
if (type == EHCA_EQ) {
- ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
- SA_INTERRUPT, "ehca_eq",
+ tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+
+ ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
+ 0, "ehca_eq",
(void *)shca);
if (ret < 0)
ehca_err(ib_dev, "Can't map interrupt handler.");
-
- tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
} else if (type == EHCA_NEQ) {
- ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
- SA_INTERRUPT, "ehca_neq",
+ tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+
+ ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
+ 0, "ehca_neq",
(void *)shca);
if (ret < 0)
ehca_err(ib_dev, "Can't map interrupt handler.");
-
- tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
}
eq->is_initialized = 1;
@@ -143,7 +144,7 @@ int ehca_create_eq(struct ehca_shca *shca,
return 0;
create_eq_exit2:
- ipz_queue_dtor(&eq->ipz_queue);
+ ipz_queue_dtor(NULL, &eq->ipz_queue);
create_eq_exit1:
hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
@@ -168,18 +169,21 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
unsigned long flags;
u64 h_ret;
- spin_lock_irqsave(&eq->spinlock, flags);
- ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+ ibmebus_free_irq(eq->ist, (void *)shca);
- h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+ spin_lock_irqsave(&shca_list_lock, flags);
+ eq->is_initialized = 0;
+ spin_unlock_irqrestore(&shca_list_lock, flags);
- spin_unlock_irqrestore(&eq->spinlock, flags);
+ tasklet_kill(&eq->interrupt_task);
+
+ h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't free EQ resources.");
return -EINVAL;
}
- ipz_queue_dtor(&eq->ipz_queue);
+ ipz_queue_dtor(NULL, &eq->ipz_queue);
return 0;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 5eae6ac4842..9ed4d258830 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -39,17 +39,39 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/gfp.h>
+
#include "ehca_tools.h"
+#include "ehca_iverbs.h"
#include "hcp_if.h"
+static unsigned int limit_uint(unsigned int value)
+{
+ return min_t(unsigned int, value, INT_MAX);
+}
+
int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
{
- int ret = 0;
+ int i, ret = 0;
struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
ib_device);
struct hipz_query_hca *rblock;
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ static const u32 cap_mapping[] = {
+ IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE,
+ IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR,
+ IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR,
+ IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST,
+ IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG,
+ IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE,
+ IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
+ IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD,
+ IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT,
+ IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE,
+ IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT,
+ };
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
@@ -62,92 +84,132 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
memset(props, 0, sizeof(struct ib_device_attr));
+ props->page_size_cap = shca->hca_cap_mr_pgsize;
props->fw_ver = rblock->hw_ver;
props->max_mr_size = rblock->max_mr_size;
props->vendor_id = rblock->vendor_id >> 8;
props->vendor_part_id = rblock->vendor_part_id >> 16;
props->hw_ver = rblock->hw_ver;
- props->max_qp = min_t(int, rblock->max_qp, INT_MAX);
- props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX);
- props->max_sge = min_t(int, rblock->max_sge, INT_MAX);
- props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX);
- props->max_cq = min_t(int, rblock->max_cq, INT_MAX);
- props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX);
- props->max_mr = min_t(int, rblock->max_mr, INT_MAX);
- props->max_mw = min_t(int, rblock->max_mw, INT_MAX);
- props->max_pd = min_t(int, rblock->max_pd, INT_MAX);
- props->max_ah = min_t(int, rblock->max_ah, INT_MAX);
- props->max_fmr = min_t(int, rblock->max_mr, INT_MAX);
- props->max_srq = 0;
- props->max_srq_wr = 0;
- props->max_srq_sge = 0;
- props->max_pkeys = 16;
- props->local_ca_ack_delay
- = rblock->local_ca_ack_delay;
- props->max_raw_ipv6_qp
- = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
- props->max_raw_ethy_qp
- = min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
- props->max_mcast_grp
- = min_t(int, rblock->max_mcast_grp, INT_MAX);
- props->max_mcast_qp_attach
- = min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+ props->max_qp = limit_uint(rblock->max_qp);
+ props->max_qp_wr = limit_uint(rblock->max_wqes_wq);
+ props->max_sge = limit_uint(rblock->max_sge);
+ props->max_sge_rd = limit_uint(rblock->max_sge_rd);
+ props->max_cq = limit_uint(rblock->max_cq);
+ props->max_cqe = limit_uint(rblock->max_cqe);
+ props->max_mr = limit_uint(rblock->max_mr);
+ props->max_mw = limit_uint(rblock->max_mw);
+ props->max_pd = limit_uint(rblock->max_pd);
+ props->max_ah = limit_uint(rblock->max_ah);
+ props->max_ee = limit_uint(rblock->max_rd_ee_context);
+ props->max_rdd = limit_uint(rblock->max_rd_domain);
+ props->max_fmr = limit_uint(rblock->max_mr);
+ props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp);
+ props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context);
+ props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
+ props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
+ props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
+
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+ props->max_srq = limit_uint(props->max_qp);
+ props->max_srq_wr = limit_uint(props->max_qp_wr);
+ props->max_srq_sge = 3;
+ }
+
+ props->max_pkeys = 16;
+ /* Some FW versions say 0 here; insert sensible value in that case */
+ props->local_ca_ack_delay = rblock->local_ca_ack_delay ?
+ min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
+ props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
+ props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
+ props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
+ props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
props->max_total_mcast_qp_attach
- = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+ = limit_uint(rblock->max_total_mcast_qp_attach);
+
+ /* translate device capabilities */
+ props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
+ for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
+ if (rblock->hca_cap_indicators & cap_mapping[i + 1])
+ props->device_cap_flags |= cap_mapping[i];
query_device1:
- kfree(rblock);
+ ehca_free_fw_ctrlblock(rblock);
return ret;
}
+static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+{
+ switch (fw_mtu) {
+ case 0x1:
+ return IB_MTU_256;
+ case 0x2:
+ return IB_MTU_512;
+ case 0x3:
+ return IB_MTU_1024;
+ case 0x4:
+ return IB_MTU_2048;
+ case 0x5:
+ return IB_MTU_4096;
+ default:
+ ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+ fw_mtu);
+ return 0;
+ }
+}
+
+static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+{
+ switch (vl_cap) {
+ case 0x1:
+ return 1;
+ case 0x2:
+ return 2;
+ case 0x3:
+ return 4;
+ case 0x4:
+ return 8;
+ case 0x5:
+ return 15;
+ default:
+ ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
+ vl_cap);
+ return 0;
+ }
+}
+
int ehca_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
int ret = 0;
+ u64 h_ret;
struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
ib_device);
struct hipz_query_port *rblock;
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto query_port1;
}
memset(props, 0, sizeof(struct ib_port_attr));
- props->state = rblock->state;
-
- switch (rblock->max_mtu) {
- case 0x1:
- props->active_mtu = props->max_mtu = IB_MTU_256;
- break;
- case 0x2:
- props->active_mtu = props->max_mtu = IB_MTU_512;
- break;
- case 0x3:
- props->active_mtu = props->max_mtu = IB_MTU_1024;
- break;
- case 0x4:
- props->active_mtu = props->max_mtu = IB_MTU_2048;
- break;
- case 0x5:
- props->active_mtu = props->max_mtu = IB_MTU_4096;
- break;
- default:
- ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
- rblock->max_mtu);
- break;
- }
+ props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
+ props->port_cap_flags = rblock->capability_mask;
props->gid_tbl_len = rblock->gid_tbl_len;
- props->max_msg_sz = rblock->max_msg_sz;
+ if (rblock->max_msg_sz)
+ props->max_msg_sz = rblock->max_msg_sz;
+ else
+ props->max_msg_sz = 0x1 << 31;
props->bad_pkey_cntr = rblock->bad_pkey_cntr;
props->qkey_viol_cntr = rblock->qkey_viol_cntr;
props->pkey_tbl_len = rblock->pkey_tbl_len;
@@ -157,12 +219,61 @@ int ehca_query_port(struct ib_device *ibdev,
props->sm_sl = rblock->sm_sl;
props->subnet_timeout = rblock->subnet_timeout;
props->init_type_reply = rblock->init_type_reply;
+ props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap);
- props->active_width = IB_WIDTH_12X;
- props->active_speed = 0x1;
+ if (rblock->state && rblock->phys_width) {
+ props->phys_state = rblock->phys_pstate;
+ props->state = rblock->phys_state;
+ props->active_width = rblock->phys_width;
+ props->active_speed = rblock->phys_speed;
+ } else {
+ /* old firmware releases don't report physical
+ * port info, so use default values
+ */
+ props->phys_state = 5;
+ props->state = rblock->state;
+ props->active_width = IB_WIDTH_12X;
+ props->active_speed = IB_SPEED_SDR;
+ }
query_port1:
- kfree(rblock);
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
+int ehca_query_sma_attr(struct ehca_shca *shca,
+ u8 port, struct ehca_sma_attr *attr)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct hipz_query_port *rblock;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto query_sma_attr1;
+ }
+
+ memset(attr, 0, sizeof(struct ehca_sma_attr));
+
+ attr->lid = rblock->lid;
+ attr->lmc = rblock->lmc;
+ attr->sm_sl = rblock->sm_sl;
+ attr->sm_lid = rblock->sm_lid;
+
+ attr->pkey_tbl_len = rblock->pkey_tbl_len;
+ memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
+
+query_sma_attr1:
+ ehca_free_fw_ctrlblock(rblock);
return ret;
}
@@ -170,21 +281,24 @@ query_port1:
int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
int ret = 0;
- struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+ u64 h_ret;
+ struct ehca_shca *shca;
struct hipz_query_port *rblock;
+ shca = container_of(ibdev, struct ehca_shca, ib_device);
if (index > 16) {
ehca_err(&shca->ib_device, "Invalid index: %x.", index);
return -EINVAL;
}
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto query_pkey1;
@@ -193,7 +307,7 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
query_pkey1:
- kfree(rblock);
+ ehca_free_fw_ctrlblock(rblock);
return ret;
}
@@ -202,22 +316,24 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
int index, union ib_gid *gid)
{
int ret = 0;
+ u64 h_ret;
struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
ib_device);
struct hipz_query_port *rblock;
- if (index > 255) {
+ if (index < 0 || index > 255) {
ehca_err(&shca->ib_device, "Invalid index: %x.", index);
return -EINVAL;
}
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto query_gid1;
@@ -227,15 +343,68 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
query_gid1:
- kfree(rblock);
+ ehca_free_fw_ctrlblock(rblock);
return ret;
}
+static const u32 allowed_port_caps = (
+ IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP);
+
int ehca_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
struct ib_port_modify *props)
{
- /* Not implemented yet */
- return -EFAULT;
+ int ret = 0;
+ struct ehca_shca *shca;
+ struct hipz_query_port *rblock;
+ u32 cap;
+ u64 hret;
+
+ shca = container_of(ibdev, struct ehca_shca, ib_device);
+ if ((props->set_port_cap_mask | props->clr_port_cap_mask)
+ & ~allowed_port_caps) {
+ ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
+ "set=%x clr=%x allowed=%x", props->set_port_cap_mask,
+ props->clr_port_cap_mask, allowed_port_caps);
+ return -EINVAL;
+ }
+
+ if (mutex_lock_interruptible(&shca->modify_mutex))
+ return -ERESTARTSYS;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ ret = -ENOMEM;
+ goto modify_port1;
+ }
+
+ hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (hret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto modify_port2;
+ }
+
+ cap = (rblock->capability_mask | props->set_port_cap_mask)
+ & ~props->clr_port_cap_mask;
+
+ hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
+ cap, props->init_type, port_modify_mask);
+ if (hret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli",
+ hret);
+ ret = -EINVAL;
+ }
+
+modify_port2:
+ ehca_free_fw_ctrlblock(rblock);
+
+modify_port1:
+ mutex_unlock(&shca->modify_mutex);
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 2a65b5be197..8615d7cf7e0 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -5,6 +5,8 @@
*
* Authors: Heiko J Schick <schickhj@de.ibm.com>
* Khadija Souissi <souissi@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
@@ -39,37 +41,38 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
#include "ehca_classes.h"
#include "ehca_irq.h"
#include "ehca_iverbs.h"
#include "ehca_tools.h"
#include "hcp_if.h"
#include "hipz_fns.h"
-
-#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
-#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31)
-#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7)
-#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31)
-#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31)
-#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63)
-#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63)
-
-#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
-#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
-#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
-
-#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
-#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
-
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+#include "ipz_pt_fn.h"
+
+#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
+#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
+#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
+#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
+
+#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
+#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
+#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
+#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
+#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)
+
+#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
+#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
static void queue_comp_task(struct ehca_cq *__cq);
-static struct ehca_comp_pool* pool;
-static struct notifier_block comp_pool_callback_nb;
-
-#endif
+static struct ehca_comp_pool *pool;
static inline void comp_event_callback(struct ehca_cq *cq)
{
@@ -83,8 +86,8 @@ static inline void comp_event_callback(struct ehca_cq *cq)
return;
}
-static void print_error_data(struct ehca_shca * shca, void* data,
- u64* rblock, int length)
+static void print_error_data(struct ehca_shca *shca, void *data,
+ u64 *rblock, int length)
{
u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
u64 resource = rblock[1];
@@ -92,37 +95,37 @@ static void print_error_data(struct ehca_shca * shca, void* data,
switch (type) {
case 0x1: /* Queue Pair */
{
- struct ehca_qp *qp = (struct ehca_qp*)data;
+ struct ehca_qp *qp = (struct ehca_qp *)data;
/* only print error data if AER is set */
if (rblock[6] == 0)
return;
ehca_err(&shca->ib_device,
- "QP 0x%x (resource=%lx) has errors.",
+ "QP 0x%x (resource=%llx) has errors.",
qp->ib_qp.qp_num, resource);
break;
}
case 0x4: /* Completion Queue */
{
- struct ehca_cq *cq = (struct ehca_cq*)data;
+ struct ehca_cq *cq = (struct ehca_cq *)data;
ehca_err(&shca->ib_device,
- "CQ 0x%x (resource=%lx) has errors.",
+ "CQ 0x%x (resource=%llx) has errors.",
cq->cq_number, resource);
break;
}
default:
ehca_err(&shca->ib_device,
- "Unknown errror type: %lx on %s.",
+ "Unknown error type: %llx on %s.",
type, shca->ib_device.name);
break;
}
- ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+ ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
ehca_err(&shca->ib_device, "EHCA ----- error data begin "
"---------------------------------------------------");
- ehca_dmp(rblock, length, "resource=%lx", resource);
+ ehca_dmp(rblock, length, "resource=%llx", resource);
ehca_err(&shca->ib_device, "EHCA ----- error data end "
"----------------------------------------------------");
@@ -137,91 +140,123 @@ int ehca_error_data(struct ehca_shca *shca, void *data,
u64 *rblock;
unsigned long block_count;
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
if (!rblock) {
ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
ret = -ENOMEM;
goto error_data1;
}
+ /* rblock must be 4K aligned and should be 4K large */
ret = hipz_h_error_data(shca->ipz_hca_handle,
resource,
rblock,
&block_count);
- if (ret == H_R_STATE) {
+ if (ret == H_R_STATE)
ehca_err(&shca->ib_device,
- "No error data is available: %lx.", resource);
- }
+ "No error data is available: %llx.", resource);
else if (ret == H_SUCCESS) {
int length;
length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
- if (length > PAGE_SIZE)
- length = PAGE_SIZE;
+ if (length > EHCA_PAGESIZE)
+ length = EHCA_PAGESIZE;
print_error_data(shca, data, rblock, length);
- }
- else {
+ } else
ehca_err(&shca->ib_device,
- "Error data could not be fetched: %lx", resource);
- }
+ "Error data could not be fetched: %llx", resource);
- kfree(rblock);
+ ehca_free_fw_ctrlblock(rblock);
error_data1:
return ret;
}
-static void qp_event_callback(struct ehca_shca *shca,
- u64 eqe,
+static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
enum ib_event_type event_type)
{
struct ib_event event;
+
+ /* PATH_MIG without the QP ever having been armed is false alarm */
+ if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+ return;
+
+ event.device = &shca->ib_device;
+ event.event = event_type;
+
+ if (qp->ext_type == EQPT_SRQ) {
+ if (!qp->ib_srq.event_handler)
+ return;
+
+ event.element.srq = &qp->ib_srq;
+ qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
+ } else {
+ if (!qp->ib_qp.event_handler)
+ return;
+
+ event.element.qp = &qp->ib_qp;
+ qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ }
+}
+
+static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+ enum ib_event_type event_type, int fatal)
+{
struct ehca_qp *qp;
- unsigned long flags;
u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
+ if (qp)
+ atomic_inc(&qp->nr_events);
+ read_unlock(&ehca_qp_idr_lock);
if (!qp)
return;
- ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
-
- if (!qp->ib_qp.event_handler)
- return;
+ if (fatal)
+ ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
- event.device = &shca->ib_device;
- event.event = event_type;
- event.element.qp = &qp->ib_qp;
+ dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
+ IB_EVENT_SRQ_ERR : event_type);
- qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ /*
+ * eHCA only processes one WQE at a time for SRQ base QPs,
+ * so the last WQE has been processed as soon as the QP enters
+ * error state.
+ */
+ if (fatal && qp->ext_type == EQPT_SRQBASE)
+ dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+ if (atomic_dec_and_test(&qp->nr_events))
+ wake_up(&qp->wait_completion);
return;
}
static void cq_event_callback(struct ehca_shca *shca,
- u64 eqe)
+ u64 eqe)
{
struct ehca_cq *cq;
- unsigned long flags;
u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ if (cq)
+ atomic_inc(&cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
if (!cq)
return;
ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+
return;
}
@@ -231,17 +266,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
switch (identifier) {
case 0x02: /* path migrated */
- qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+ qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
break;
case 0x03: /* communication established */
- qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+ qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
break;
case 0x04: /* send queue drained */
- qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+ qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
break;
case 0x05: /* QP error */
case 0x06: /* QP error */
- qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+ qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
break;
case 0x07: /* CQ error */
case 0x08: /* CQ error */
@@ -268,13 +303,18 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
case 0x11: /* unaffiliated access error */
ehca_err(&shca->ib_device, "Unaffiliated access error.");
break;
- case 0x12: /* path migrating error */
- ehca_err(&shca->ib_device, "Path migration error.");
+ case 0x12: /* path migrating */
+ ehca_err(&shca->ib_device, "Path migrating.");
break;
case 0x13: /* interface trace stopped */
ehca_err(&shca->ib_device, "Interface trace stopped.");
break;
case 0x14: /* first error capture info available */
+ ehca_info(&shca->ib_device, "First error capture available");
+ break;
+ case 0x15: /* SRQ limit reached */
+ qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
+ break;
default:
ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
identifier, shca->ib_device.name);
@@ -284,30 +324,90 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
return;
}
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
+static void dispatch_port_event(struct ehca_shca *shca, int port_num,
+ enum ib_event_type type, const char *msg)
{
struct ib_event event;
+
+ ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
+ event.device = &shca->ib_device;
+ event.event = type;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+}
+
+static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
+{
+ struct ehca_sma_attr new_attr;
+ struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
+
+ ehca_query_sma_attr(shca, port_num, &new_attr);
+
+ if (new_attr.sm_sl != old_attr->sm_sl ||
+ new_attr.sm_lid != old_attr->sm_lid)
+ dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
+ "SM changed");
+
+ if (new_attr.lid != old_attr->lid ||
+ new_attr.lmc != old_attr->lmc)
+ dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
+ "LID changed");
+
+ if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
+ memcmp(new_attr.pkeys, old_attr->pkeys,
+ sizeof(u16) * new_attr.pkey_tbl_len))
+ dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
+ "P_Key changed");
+
+ *old_attr = new_attr;
+}
+
+/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
+static int replay_modify_qp(struct ehca_sport *sport)
+{
+ int aqp1_destroyed;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+
+ aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
+
+ if (sport->ibqp_sqp[IB_QPT_SMI])
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
+ if (!aqp1_destroyed)
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
+
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+
+ return aqp1_destroyed;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+ u8 spec_event;
+ struct ehca_sport *sport = &shca->sport[port - 1];
switch (ec) {
case 0x30: /* port availability change */
if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
- ehca_info(&shca->ib_device,
- "port %x is active.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ACTIVE;
- event.element.port_num = port;
- shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
- ib_dispatch_event(&event);
+ /* only replay modify_qp calls in autodetect mode;
+ * if AQP1 was destroyed, the port is already down
+ * again and we can drop the event.
+ */
+ if (ehca_nr_ports < 0)
+ if (replay_modify_qp(sport))
+ break;
+
+ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ ehca_query_sma_attr(shca, port, &sport->saved_attr);
} else {
- ehca_info(&shca->ib_device,
- "port %x is inactive.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ERR;
- event.element.port_num = port;
- shca->sport[port - 1].port_state = IB_PORT_DOWN;
- ib_dispatch_event(&event);
+ sport->port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
}
break;
case 0x31:
@@ -315,24 +415,21 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
* disruptive change is caused by
* LID, PKEY or SM change
*/
- ehca_warn(&shca->ib_device,
- "disruptive port %x configuration change", port);
-
- ehca_info(&shca->ib_device,
- "port %x is inactive.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ERR;
- event.element.port_num = port;
- shca->sport[port - 1].port_state = IB_PORT_DOWN;
- ib_dispatch_event(&event);
-
- ehca_info(&shca->ib_device,
- "port %x is active.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ACTIVE;
- event.element.port_num = port;
- shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
- ib_dispatch_event(&event);
+ if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
+ ehca_warn(&shca->ib_device, "disruptive port "
+ "%d configuration change", port);
+
+ sport->port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
+
+ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ ehca_query_sma_attr(shca, port,
+ &sport->saved_attr);
+ } else
+ notify_port_conf_change(shca, port);
break;
case 0x32: /* adapter malfunction */
ehca_err(&shca->ib_device, "Adapter malfunction.");
@@ -340,6 +437,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
case 0x33: /* trace stopped */
ehca_err(&shca->ib_device, "Traced stopped.");
break;
+ case 0x34: /* util async event */
+ spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
+ if (spec_event == 0x80) /* client reregister required */
+ dispatch_port_event(shca, port,
+ IB_EVENT_CLIENT_REREGISTER,
+ "client reregister req.");
+ else
+ ehca_warn(&shca->ib_device, "Unknown util async "
+ "event %x on port %x", spec_event, port);
+ break;
default:
ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
ec, shca->ib_device.name);
@@ -360,7 +467,7 @@ static inline void reset_eq_pending(struct ehca_cq *cq)
return;
}
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
{
struct ehca_shca *shca = (struct ehca_shca*)dev_id;
@@ -375,13 +482,13 @@ void ehca_tasklet_neq(unsigned long data)
struct ehca_eqe *eqe;
u64 ret;
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+ eqe = ehca_poll_eq(shca, &shca->neq);
while (eqe) {
if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
parse_ec(shca, eqe->entry);
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+ eqe = ehca_poll_eq(shca, &shca->neq);
}
ret = hipz_h_reset_event(shca->ipz_hca_handle,
@@ -393,7 +500,7 @@ void ehca_tasklet_neq(unsigned long data)
return;
}
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
{
struct ehca_shca *shca = (struct ehca_shca*)dev_id;
@@ -402,361 +509,362 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
return IRQ_HANDLED;
}
-void ehca_tasklet_eq(unsigned long data)
+
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
{
- struct ehca_shca *shca = (struct ehca_shca*)data;
- struct ehca_eqe *eqe;
- int int_state;
- int query_cnt = 0;
+ u64 eqe_value;
+ u32 token;
+ struct ehca_cq *cq;
- do {
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+ eqe_value = eqe->entry;
+ ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ ehca_dbg(&shca->ib_device, "Got completion event");
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ read_lock(&ehca_cq_idr_lock);
+ cq = idr_find(&ehca_cq_idr, token);
+ if (cq)
+ atomic_inc(&cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
+ if (cq == NULL) {
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq token=%x",
+ token);
+ return;
+ }
+ reset_eq_pending(cq);
+ if (ehca_scaling_code)
+ queue_comp_task(cq);
+ else {
+ comp_event_callback(cq);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eqe_value);
+ }
+}
- if ((shca->hw_level >= 2) && eqe)
- int_state = 1;
- else
- int_state = 0;
-
- while ((int_state == 1) || eqe) {
- while (eqe) {
- u64 eqe_value = eqe->entry;
-
- ehca_dbg(&shca->ib_device,
- "eqe_value=%lx", eqe_value);
-
- /* TODO: better structure */
- if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
- eqe_value)) {
- unsigned long flags;
- u32 token;
- struct ehca_cq *cq;
-
- ehca_dbg(&shca->ib_device,
- "... completion event");
- token =
- EHCA_BMASK_GET(EQE_CQ_TOKEN,
- eqe_value);
- spin_lock_irqsave(&ehca_cq_idr_lock,
- flags);
- cq = idr_find(&ehca_cq_idr, token);
-
- if (cq == NULL) {
- spin_unlock(&ehca_cq_idr_lock);
- break;
- }
-
- reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
- queue_comp_task(cq);
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
-#else
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
- comp_event_callback(cq);
-#endif
- } else {
- ehca_dbg(&shca->ib_device,
- "... non completion event");
- parse_identifier(shca, eqe_value);
- }
- eqe =
- (struct ehca_eqe *)ehca_poll_eq(shca,
- &shca->eq);
- }
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+ struct ehca_eq *eq = &shca->eq;
+ struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+ u64 eqe_value, ret;
+ int eqe_cnt, i;
+ int eq_empty = 0;
+
+ spin_lock(&eq->irq_spinlock);
+ if (is_irq) {
+ const int max_query_cnt = 100;
+ int query_cnt = 0;
+ int int_state = 1;
+ do {
+ int_state = hipz_h_query_int_state(
+ shca->ipz_hca_handle, eq->ist);
+ query_cnt++;
+ iosync();
+ } while (int_state && query_cnt < max_query_cnt);
+ if (unlikely((query_cnt == max_query_cnt)))
+ ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
+ int_state, query_cnt);
+ }
- if (shca->hw_level >= 2) {
- int_state =
- hipz_h_query_int_state(shca->ipz_hca_handle,
- shca->eq.ist);
- query_cnt++;
- iosync();
- if (query_cnt >= 100) {
- query_cnt = 0;
- int_state = 0;
- }
+ /* read out all eqes */
+ eqe_cnt = 0;
+ do {
+ u32 token;
+ eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
+ if (!eqe_cache[eqe_cnt].eqe)
+ break;
+ eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ read_lock(&ehca_cq_idr_lock);
+ eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+ if (eqe_cache[eqe_cnt].cq)
+ atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
+ if (!eqe_cache[eqe_cnt].cq) {
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq "
+ "token=%x", token);
+ continue;
}
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
-
+ } else
+ eqe_cache[eqe_cnt].cq = NULL;
+ eqe_cnt++;
+ } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+ if (!eqe_cnt) {
+ if (is_irq)
+ ehca_dbg(&shca->ib_device,
+ "No eqe found for irq event");
+ goto unlock_irq_spinlock;
+ } else if (!is_irq) {
+ ret = hipz_h_eoi(eq->ist);
+ if (ret != H_SUCCESS)
+ ehca_err(&shca->ib_device,
+ "bad return code EOI -rc = %lld\n", ret);
+ ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+ }
+ if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
+ ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+ /* enable irq for new packets */
+ for (i = 0; i < eqe_cnt; i++) {
+ if (eq->eqe_cache[i].cq)
+ reset_eq_pending(eq->eqe_cache[i].cq);
+ }
+ /* check eq */
+ spin_lock(&eq->spinlock);
+ eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+ spin_unlock(&eq->spinlock);
+ /* call completion handler for cached eqes */
+ for (i = 0; i < eqe_cnt; i++)
+ if (eq->eqe_cache[i].cq) {
+ if (ehca_scaling_code)
+ queue_comp_task(eq->eqe_cache[i].cq);
+ else {
+ struct ehca_cq *cq = eq->eqe_cache[i].cq;
+ comp_event_callback(cq);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
}
- } while (int_state != 0);
-
- return;
+ /* poll eq if not empty */
+ if (eq_empty)
+ goto unlock_irq_spinlock;
+ do {
+ struct ehca_eqe *eqe;
+ eqe = ehca_poll_eq(shca, &shca->eq);
+ if (!eqe)
+ break;
+ process_eqe(shca, eqe);
+ } while (1);
+
+unlock_irq_spinlock:
+ spin_unlock(&eq->irq_spinlock);
}
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
-static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+void ehca_tasklet_eq(unsigned long data)
{
- unsigned long flags_last_cpu;
+ ehca_process_eq((struct ehca_shca*)data, 1);
+}
- if (ehca_debug_level)
- ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+static int find_next_online_cpu(struct ehca_comp_pool *pool)
+{
+ int cpu;
+ unsigned long flags;
- spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
- pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
- if (pool->last_cpu == NR_CPUS)
- pool->last_cpu = first_cpu(cpu_online_map);
- spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+ WARN_ON_ONCE(!in_interrupt());
+ if (ehca_debug_level >= 3)
+ ehca_dmp(cpu_online_mask, cpumask_size(), "");
- return pool->last_cpu;
+ spin_lock_irqsave(&pool->last_cpu_lock, flags);
+ do {
+ cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = cpumask_first(cpu_online_mask);
+ pool->last_cpu = cpu;
+ } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
+ spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
+
+ return cpu;
}
static void __queue_comp_task(struct ehca_cq *__cq,
- struct ehca_cpu_comp_task *cct)
+ struct ehca_cpu_comp_task *cct,
+ struct task_struct *thread)
{
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
- spin_lock_irqsave(&__cq->task_lock, flags_cq);
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&__cq->task_lock);
if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++;
list_add_tail(&__cq->entry, &cct->cq_list);
cct->cq_jobs++;
- wake_up(&cct->wait_queue);
- }
- else
+ wake_up_process(thread);
+ } else
__cq->nr_callbacks++;
- spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock(&__cq->task_lock);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static void queue_comp_task(struct ehca_cq *__cq)
{
- int cpu;
int cpu_id;
struct ehca_cpu_comp_task *cct;
+ struct task_struct *thread;
+ int cq_jobs;
+ unsigned long flags;
- cpu = get_cpu();
cpu_id = find_next_online_cpu(pool);
-
BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
+ BUG_ON(!cct || !thread);
- if (cct->cq_jobs > 0) {
+ spin_lock_irqsave(&cct->task_lock, flags);
+ cq_jobs = cct->cq_jobs;
+ spin_unlock_irqrestore(&cct->task_lock, flags);
+ if (cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
+ BUG_ON(!cct || !thread);
}
-
- __queue_comp_task(__cq, cct);
-
- put_cpu();
-
- return;
+ __queue_comp_task(__cq, cct, thread);
}
-static void run_comp_task(struct ehca_cpu_comp_task* cct)
+static void run_comp_task(struct ehca_cpu_comp_task *cct)
{
struct ehca_cq *cq;
- unsigned long flags_cct;
- unsigned long flags_cq;
-
- spin_lock_irqsave(&cct->task_lock, flags_cct);
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock_irq(&cct->task_lock);
+
comp_event_callback(cq);
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
- spin_lock_irqsave(&cq->task_lock, flags_cq);
+ spin_lock_irq(&cct->task_lock);
+ spin_lock(&cq->task_lock);
cq->nr_callbacks--;
- if (cq->nr_callbacks == 0) {
+ if (!cq->nr_callbacks) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
- spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+ spin_unlock(&cq->task_lock);
}
-
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
- return;
}
-static int comp_task(void *__cct)
+static void comp_task_park(unsigned int cpu)
{
- struct ehca_cpu_comp_task* cct = __cct;
- DECLARE_WAITQUEUE(wait, current);
-
- set_current_state(TASK_INTERRUPTIBLE);
- while(!kthread_should_stop()) {
- add_wait_queue(&cct->wait_queue, &wait);
-
- if (list_empty(&cct->cq_list))
- schedule();
- else
- __set_current_state(TASK_RUNNING);
-
- remove_wait_queue(&cct->wait_queue, &wait);
-
- if (!list_empty(&cct->cq_list))
- run_comp_task(__cct);
+ struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ struct ehca_cpu_comp_task *target;
+ struct task_struct *thread;
+ struct ehca_cq *cq, *tmp;
+ LIST_HEAD(list);
- set_current_state(TASK_INTERRUPTIBLE);
+ spin_lock_irq(&cct->task_lock);
+ cct->cq_jobs = 0;
+ cct->active = 0;
+ list_splice_init(&cct->cq_list, &list);
+ spin_unlock_irq(&cct->task_lock);
+
+ cpu = find_next_online_cpu(pool);
+ target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
+ spin_lock_irq(&target->task_lock);
+ list_for_each_entry_safe(cq, tmp, &list, entry) {
+ list_del(&cq->entry);
+ __queue_comp_task(cq, target, thread);
}
- __set_current_state(TASK_RUNNING);
-
- return 0;
+ spin_unlock_irq(&target->task_lock);
}
-static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
- int cpu)
+static void comp_task_stop(unsigned int cpu, bool online)
{
- struct ehca_cpu_comp_task *cct;
-
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- spin_lock_init(&cct->task_lock);
- INIT_LIST_HEAD(&cct->cq_list);
- init_waitqueue_head(&cct->wait_queue);
- cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
-
- return cct->task;
-}
-
-static void destroy_comp_task(struct ehca_comp_pool *pool,
- int cpu)
-{
- struct ehca_cpu_comp_task *cct;
- struct task_struct *task;
- unsigned long flags_cct;
-
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- task = cct->task;
- cct->task = NULL;
+ spin_lock_irq(&cct->task_lock);
cct->cq_jobs = 0;
-
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
- if (task)
- kthread_stop(task);
-
- return;
+ cct->active = 0;
+ WARN_ON(!list_empty(&cct->cq_list));
+ spin_unlock_irq(&cct->task_lock);
}
-static void take_over_work(struct ehca_comp_pool *pool,
- int cpu)
+static int comp_task_should_run(unsigned int cpu)
{
struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- LIST_HEAD(list);
- struct ehca_cq *cq;
- unsigned long flags_cct;
-
- spin_lock_irqsave(&cct->task_lock, flags_cct);
-
- list_splice_init(&cct->cq_list, &list);
-
- while(!list_empty(&list)) {
- cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-
- list_del(&cq->entry);
- __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
- smp_processor_id()));
- }
-
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ return cct->cq_jobs;
}
-static int comp_pool_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static void comp_task(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
- struct ehca_cpu_comp_task *cct;
-
- switch (action) {
- case CPU_UP_PREPARE:
- ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
- if(!create_comp_task(pool, cpu)) {
- ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
- return NOTIFY_BAD;
- }
- break;
- case CPU_UP_CANCELED:
- ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- kthread_bind(cct->task, any_online_cpu(cpu_online_map));
- destroy_comp_task(pool, cpu);
- break;
- case CPU_ONLINE:
- ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- kthread_bind(cct->task, cpu);
- wake_up_process(cct->task);
- break;
- case CPU_DOWN_PREPARE:
- ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
- break;
- case CPU_DOWN_FAILED:
- ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
- break;
- case CPU_DEAD:
- ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
- destroy_comp_task(pool, cpu);
- take_over_work(pool, cpu);
- break;
+ struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
+ int cql_empty;
+
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ if (!cql_empty) {
+ __set_current_state(TASK_RUNNING);
+ run_comp_task(cct);
}
-
- return NOTIFY_OK;
+ spin_unlock_irq(&cct->task_lock);
}
-#endif
+static struct smp_hotplug_thread comp_pool_threads = {
+ .thread_should_run = comp_task_should_run,
+ .thread_fn = comp_task,
+ .thread_comm = "ehca_comp/%u",
+ .cleanup = comp_task_stop,
+ .park = comp_task_park,
+};
int ehca_create_comp_pool(void)
{
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
- int cpu;
- struct task_struct *task;
+ int cpu, ret = -ENOMEM;
+
+ if (!ehca_scaling_code)
+ return 0;
pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
if (pool == NULL)
return -ENOMEM;
spin_lock_init(&pool->last_cpu_lock);
- pool->last_cpu = any_online_cpu(cpu_online_map);
+ pool->last_cpu = cpumask_any(cpu_online_mask);
pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
- if (pool->cpu_comp_tasks == NULL) {
- kfree(pool);
- return -EINVAL;
- }
+ if (!pool->cpu_comp_tasks)
+ goto out_pool;
- for_each_online_cpu(cpu) {
- task = create_comp_task(pool, cpu);
- if (task) {
- kthread_bind(task, cpu);
- wake_up_process(task);
- }
+ pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
+ if (!pool->cpu_comp_threads)
+ goto out_tasks;
+
+ for_each_present_cpu(cpu) {
+ struct ehca_cpu_comp_task *cct;
+
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ spin_lock_init(&cct->task_lock);
+ INIT_LIST_HEAD(&cct->cq_list);
}
- comp_pool_callback_nb.notifier_call = comp_pool_callback;
- comp_pool_callback_nb.priority =0;
- register_cpu_notifier(&comp_pool_callback_nb);
-#endif
+ comp_pool_threads.store = pool->cpu_comp_threads;
+ ret = smpboot_register_percpu_thread(&comp_pool_threads);
+ if (ret)
+ goto out_threads;
- return 0;
+ pr_info("eHCA scaling code enabled\n");
+ return ret;
+
+out_threads:
+ free_percpu(pool->cpu_comp_threads);
+out_tasks:
+ free_percpu(pool->cpu_comp_tasks);
+out_pool:
+ kfree(pool);
+ return ret;
}
void ehca_destroy_comp_pool(void)
{
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
- int i;
-
- unregister_cpu_notifier(&comp_pool_callback_nb);
+ if (!ehca_scaling_code)
+ return;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_online(i))
- destroy_comp_task(pool, i);
- }
-#endif
+ smpboot_unregister_percpu_thread(&comp_pool_threads);
- return;
+ free_percpu(pool->cpu_comp_threads);
+ free_percpu(pool->cpu_comp_tasks);
+ kfree(pool);
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 85bf1fe16fe..5370199f08c 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -47,26 +47,26 @@ struct ehca_shca;
#include <linux/interrupt.h>
#include <linux/types.h>
-#include <asm/atomic.h>
int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
void ehca_tasklet_neq(unsigned long data);
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
void ehca_tasklet_eq(unsigned long data);
+void ehca_process_eq(struct ehca_shca *shca, int is_irq);
struct ehca_cpu_comp_task {
- wait_queue_head_t wait_queue;
struct list_head cq_list;
- struct task_struct *task;
spinlock_t task_lock;
int cq_jobs;
+ int active;
};
struct ehca_comp_pool {
- struct ehca_cpu_comp_task *cpu_comp_tasks;
+ struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
+ struct task_struct * __percpu *cpu_comp_threads;
int last_cpu;
spinlock_t last_cpu_lock;
};
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 319c39d47f3..22f79afa7fc 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
int ehca_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
+int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
+ struct ehca_sma_attr *attr);
+
int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -78,9 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf,
int mr_access_flags, u64 *iova_start);
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
- struct ib_umem *region,
- int mr_access_flags, struct ib_udata *udata);
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int mr_access_flags,
+ struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr,
int mr_rereg_mask,
@@ -92,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
int ehca_dereg_mr(struct ib_mr *mr);
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
@@ -123,7 +126,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata);
@@ -135,7 +138,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
struct ib_qp *ehca_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
@@ -155,6 +158,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
+int ehca_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+
+int ehca_destroy_srq(struct ib_srq *srq);
+
u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
struct ib_qp_init_attr *qp_init_attr);
@@ -169,14 +187,26 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context);
int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad);
+
void ehca_poll_eqs(unsigned long data);
-int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
- struct vm_area_struct **vma);
+int ehca_calc_ipd(struct ehca_shca *shca, int port,
+ enum ib_rate path_rate, u32 *ipd);
-int ehca_mmap_register(u64 physical,void **mapped,
- struct vm_area_struct **vma);
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
+
+#ifdef CONFIG_PPC_64K_PAGES
+void *ehca_alloc_fw_ctrlblock(gfp_t flags);
+void ehca_free_fw_ctrlblock(void *ptr);
+#else
+#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
+#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
+#endif
-int ehca_munmap(unsigned long addr, size_t len);
+void ehca_recover_sqp(struct ib_qp *sqp);
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 2380994418a..cd8d290a09f 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -40,65 +40,129 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef CONFIG_PPC_64K_PAGES
+#include <linux/slab.h>
+#endif
+
+#include <linux/notifier.h>
+#include <linux/memory.h>
#include "ehca_classes.h"
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
#include "ehca_tools.h"
#include "hcp_if.h"
+#define HCAD_VERSION "0029"
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0016");
+MODULE_VERSION(HCAD_VERSION);
+
+static bool ehca_open_aqp1 = 0;
+static int ehca_hw_level = 0;
+static bool ehca_poll_all_eqs = 1;
-int ehca_open_aqp1 = 0;
int ehca_debug_level = 0;
-int ehca_hw_level = 0;
-int ehca_nr_ports = 2;
-int ehca_use_hp_mr = 0;
+int ehca_nr_ports = -1;
+bool ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
-int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
-
-module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
-module_param_named(debug_level, ehca_debug_level, int, 0);
-module_param_named(hw_level, ehca_hw_level, int, 0);
-module_param_named(nr_ports, ehca_nr_ports, int, 0);
-module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
-module_param_named(port_act_time, ehca_port_act_time, int, 0);
-module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
-module_param_named(static_rate, ehca_static_rate, int, 0);
+bool ehca_scaling_code = 0;
+int ehca_lock_hcalls = -1;
+int ehca_max_cq = -1;
+int ehca_max_qp = -1;
+
+module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO);
+module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
+module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
+module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
+module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO);
+module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
+module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
+module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
+module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
+module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO);
+module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
+module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
MODULE_PARM_DESC(open_aqp1,
- "AQP1 on startup (0: no (default), 1: yes)");
+ "Open AQP1 on startup (default: no)");
MODULE_PARM_DESC(debug_level,
- "debug level"
- " (0: no debug traces (default), 1: with debug traces)");
+ "Amount of debug output (0: none (default), 1: traces, "
+ "2: some dumps, 3: lots)");
MODULE_PARM_DESC(hw_level,
- "hardware level"
- " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+ "Hardware level (0: autosensing (default), "
+ "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
MODULE_PARM_DESC(nr_ports,
- "number of connected ports (default: 2)");
+ "number of connected ports (-1: autodetect (default), "
+ "1: port one only, 2: two ports)");
MODULE_PARM_DESC(use_hp_mr,
- "high performance MRs (0: no (default), 1: yes)");
+ "Use high performance MRs (default: no)");
MODULE_PARM_DESC(port_act_time,
- "time to wait for port activation (default: 30 sec)");
+ "Time to wait for port activation (default: 30 sec)");
MODULE_PARM_DESC(poll_all_eqs,
- "polls all event queues periodically"
- " (0: no, 1: yes (default))");
+ "Poll all event queues periodically (default: yes)");
MODULE_PARM_DESC(static_rate,
- "set permanent static rate (default: disabled)");
-
-spinlock_t ehca_qp_idr_lock;
-spinlock_t ehca_cq_idr_lock;
+ "Set permanent static rate (default: no static rate)");
+MODULE_PARM_DESC(scaling_code,
+ "Enable scaling code (default: no)");
+MODULE_PARM_DESC(lock_hcalls,
+ "Serialize all hCalls made by the driver "
+ "(default: autodetect)");
+MODULE_PARM_DESC(number_of_cqs,
+ "Max number of CQs which can be allocated "
+ "(default: autodetect)");
+MODULE_PARM_DESC(number_of_qps,
+ "Max number of QPs which can be allocated "
+ "(default: autodetect)");
+
+DEFINE_RWLOCK(ehca_qp_idr_lock);
+DEFINE_RWLOCK(ehca_cq_idr_lock);
DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
-static struct list_head shca_list; /* list of all registered ehcas */
-static spinlock_t shca_list_lock;
+static LIST_HEAD(shca_list); /* list of all registered ehcas */
+DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;
+#ifdef CONFIG_PPC_64K_PAGES
+static struct kmem_cache *ctblk_cache;
+
+void *ehca_alloc_fw_ctrlblock(gfp_t flags)
+{
+ void *ret = kmem_cache_zalloc(ctblk_cache, flags);
+ if (!ret)
+ ehca_gen_err("Out of memory for ctblk");
+ return ret;
+}
+
+void ehca_free_fw_ctrlblock(void *ptr)
+{
+ if (ptr)
+ kmem_cache_free(ctblk_cache, ptr);
+
+}
+#endif
+
+int ehca2ib_return_code(u64 ehca_rc)
+{
+ switch (ehca_rc) {
+ case H_SUCCESS:
+ return 0;
+ case H_RESOURCE: /* Resource in use */
+ case H_BUSY:
+ return -EBUSY;
+ case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+ case H_CONSTRAINED: /* resource constraint */
+ case H_NO_MEM:
+ return -ENOMEM;
+ default:
+ return -EINVAL;
+ }
+}
+
static int ehca_create_slab_caches(void)
{
int ret;
@@ -133,8 +197,29 @@ static int ehca_create_slab_caches(void)
goto create_slab_caches5;
}
+ ret = ehca_init_small_qp_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create small queue SLAB cache.");
+ goto create_slab_caches6;
+ }
+
+#ifdef CONFIG_PPC_64K_PAGES
+ ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
+ EHCA_PAGESIZE, H_CB_ALIGNMENT,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!ctblk_cache) {
+ ehca_gen_err("Cannot create ctblk SLAB cache.");
+ ehca_cleanup_small_qp_cache();
+ ret = -ENOMEM;
+ goto create_slab_caches6;
+ }
+#endif
return 0;
+create_slab_caches6:
+ ehca_cleanup_mrmw_cache();
+
create_slab_caches5:
ehca_cleanup_av_cache();
@@ -152,23 +237,68 @@ create_slab_caches2:
static void ehca_destroy_slab_caches(void)
{
+ ehca_cleanup_small_qp_cache();
ehca_cleanup_mrmw_cache();
ehca_cleanup_av_cache();
ehca_cleanup_qp_cache();
ehca_cleanup_cq_cache();
ehca_cleanup_pd_cache();
+#ifdef CONFIG_PPC_64K_PAGES
+ if (ctblk_cache)
+ kmem_cache_destroy(ctblk_cache);
+#endif
}
-#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39)
-#define EHCA_REVID EHCA_BMASK_IBM(40,63)
+#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39)
+#define EHCA_REVID EHCA_BMASK_IBM(40, 63)
+
+static struct cap_descr {
+ u64 mask;
+ char *descr;
+} hca_cap_descr[] = {
+ { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
+ { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
+ { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
+ { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
+ { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
+ { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
+ { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
+ { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
+ { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
+ { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
+ { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
+ { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
+ { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
+ { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
+ { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
+ { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
+ { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
+ { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
+};
-int ehca_sense_attributes(struct ehca_shca *shca)
+static int ehca_sense_attributes(struct ehca_shca *shca)
{
- int ret = 0;
+ int i, ret = 0;
u64 h_ret;
struct hipz_query_hca *rblock;
-
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ struct hipz_query_port *port;
+ const char *loc_code;
+
+ static const u32 pgsize_map[] = {
+ HCA_CAP_MR_PGSIZE_4K, 0x1000,
+ HCA_CAP_MR_PGSIZE_64K, 0x10000,
+ HCA_CAP_MR_PGSIZE_1M, 0x100000,
+ HCA_CAP_MR_PGSIZE_16M, 0x1000000,
+ };
+
+ ehca_gen_dbg("Probing adapter %s...",
+ shca->ofdev->dev.of_node->full_name);
+ loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
+ NULL);
+ if (loc_code)
+ ehca_gen_dbg(" ... location lode=%s", loc_code);
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_gen_err("Cannot allocate rblock memory.");
return -ENOMEM;
@@ -176,10 +306,10 @@ int ehca_sense_attributes(struct ehca_shca *shca)
h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
if (h_ret != H_SUCCESS) {
- ehca_gen_err("Cannot query device properties. h_ret=%lx",
+ ehca_gen_err("Cannot query device properties. h_ret=%lli",
h_ret);
ret = -EPERM;
- goto num_ports1;
+ goto sense_attributes1;
}
if (ehca_nr_ports == 1)
@@ -198,20 +328,82 @@ int ehca_sense_attributes(struct ehca_shca *shca)
ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
- if ((hcaaver == 1) && (revid == 0))
- shca->hw_level = 0;
- else if ((hcaaver == 1) && (revid == 1))
- shca->hw_level = 1;
- else if ((hcaaver == 1) && (revid == 2))
- shca->hw_level = 2;
- }
+ if (hcaaver == 1) {
+ if (revid <= 3)
+ shca->hw_level = 0x10 | (revid + 1);
+ else
+ shca->hw_level = 0x14;
+ } else if (hcaaver == 2) {
+ if (revid == 0)
+ shca->hw_level = 0x21;
+ else if (revid == 0x10)
+ shca->hw_level = 0x22;
+ else if (revid == 0x20 || revid == 0x21)
+ shca->hw_level = 0x23;
+ }
+
+ if (!shca->hw_level) {
+ ehca_gen_warn("unknown hardware version"
+ " - assuming default level");
+ shca->hw_level = 0x22;
+ }
+ } else
+ shca->hw_level = ehca_hw_level;
ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
- shca->sport[0].rate = IB_RATE_30_GBPS;
- shca->sport[1].rate = IB_RATE_30_GBPS;
+ shca->hca_cap = rblock->hca_cap_indicators;
+ ehca_gen_dbg(" ... HCA capabilities:");
+ for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
+ if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
+ ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
+
+ /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
+ * a firmware property, so it's valid across all adapters
+ */
+ if (ehca_lock_hcalls == -1)
+ ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
+ shca->hca_cap);
+
+ /* translate supported MR page sizes; always support 4K */
+ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
+ for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
+ if (rblock->memory_page_size_supported & pgsize_map[i])
+ shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+
+ /* Set maximum number of CQs and QPs to calculate EQ size */
+ if (shca->max_num_qps == -1)
+ shca->max_num_qps = min_t(int, rblock->max_qp,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
+ ehca_gen_warn("The requested number of QPs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_qp, rblock->max_qp);
+ shca->max_num_qps = rblock->max_qp;
+ }
+
+ if (shca->max_num_cqs == -1)
+ shca->max_num_cqs = min_t(int, rblock->max_cq,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
+ ehca_gen_warn("The requested number of CQs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_cq, rblock->max_cq);
+ }
+
+ /* query max MTU from first port -- it's the same for all ports */
+ port = (struct hipz_query_port *)rblock;
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
+ if (h_ret != H_SUCCESS) {
+ ehca_gen_err("Cannot query port properties. h_ret=%lli",
+ h_ret);
+ ret = -EPERM;
+ goto sense_attributes1;
+ }
+
+ shca->max_mtu = port->max_mtu;
-num_ports1:
- kfree(rblock);
+sense_attributes1:
+ ehca_free_fw_ctrlblock(rblock);
return ret;
}
@@ -220,7 +412,7 @@ static int init_node_guid(struct ehca_shca *shca)
int ret = 0;
struct hipz_query_hca *rblock;
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
@@ -235,11 +427,11 @@ static int init_node_guid(struct ehca_shca *shca)
memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
init_node_guid1:
- kfree(rblock);
+ ehca_free_fw_ctrlblock(rblock);
return ret;
}
-int ehca_register_device(struct ehca_shca *shca)
+static int ehca_init_device(struct ehca_shca *shca)
{
int ret;
@@ -250,7 +442,7 @@ int ehca_register_device(struct ehca_shca *shca)
strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
shca->ib_device.owner = THIS_MODULE;
- shca->ib_device.uverbs_abi_ver = 5;
+ shca->ib_device.uverbs_abi_ver = 8;
shca->ib_device.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -271,7 +463,8 @@ int ehca_register_device(struct ehca_shca *shca)
shca->ib_device.node_type = RDMA_NODE_IB_CA;
shca->ib_device.phys_port_cnt = shca->num_ports;
- shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev;
+ shca->ib_device.num_comp_vectors = 1;
+ shca->ib_device.dma_device = &shca->ofdev->dev;
shca->ib_device.query_device = ehca_query_device;
shca->ib_device.query_port = ehca_query_port;
shca->ib_device.query_gid = ehca_query_gid;
@@ -314,13 +507,23 @@ int ehca_register_device(struct ehca_shca *shca)
shca->ib_device.dealloc_fmr = ehca_dealloc_fmr;
shca->ib_device.attach_mcast = ehca_attach_mcast;
shca->ib_device.detach_mcast = ehca_detach_mcast;
- /* shca->ib_device.process_mad = ehca_process_mad; */
+ shca->ib_device.process_mad = ehca_process_mad;
shca->ib_device.mmap = ehca_mmap;
-
- ret = ib_register_device(&shca->ib_device);
- if (ret)
- ehca_err(&shca->ib_device,
- "ib_register_device() failed ret=%x", ret);
+ shca->ib_device.dma_ops = &ehca_dma_mapping_ops;
+
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+ shca->ib_device.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ shca->ib_device.create_srq = ehca_create_srq;
+ shca->ib_device.modify_srq = ehca_modify_srq;
+ shca->ib_device.query_srq = ehca_query_srq;
+ shca->ib_device.destroy_srq = ehca_destroy_srq;
+ shca->ib_device.post_srq_recv = ehca_post_srq_recv;
+ }
return ret;
}
@@ -338,14 +541,14 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
return -EPERM;
}
- ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+ ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0);
if (IS_ERR(ibcq)) {
ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
return PTR_ERR(ibcq);
}
sport->ibcq_aqp1 = ibcq;
- if (sport->ibqp_aqp1) {
+ if (sport->ibqp_sqp[IB_QPT_GSI]) {
ehca_err(&shca->ib_device, "AQP1 QP is already created.");
ret = -EPERM;
goto create_aqp1;
@@ -371,7 +574,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
ret = PTR_ERR(ibqp);
goto create_aqp1;
}
- sport->ibqp_aqp1 = ibqp;
+ sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
return 0;
@@ -384,23 +587,22 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
{
int ret;
- ret = ib_destroy_qp(sport->ibqp_aqp1);
+ ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
if (ret) {
- ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+ ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
return ret;
}
ret = ib_destroy_cq(sport->ibcq_aqp1);
if (ret)
- ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+ ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
return ret;
}
static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n",
- ehca_debug_level);
+ return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
}
static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -412,18 +614,22 @@ static ssize_t ehca_store_debug_level(struct device_driver *ddp,
return 1;
}
-DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
- ehca_show_debug_level, ehca_store_debug_level);
+static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+ ehca_show_debug_level, ehca_store_debug_level);
-void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
-{
- driver_create_file(&drv->driver, &driver_attr_debug_level);
-}
+static struct attribute *ehca_drv_attrs[] = {
+ &driver_attr_debug_level.attr,
+ NULL
+};
-void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
-{
- driver_remove_file(&drv->driver, &driver_attr_debug_level);
-}
+static struct attribute_group ehca_drv_attr_grp = {
+ .attrs = ehca_drv_attrs
+};
+
+static const struct attribute_group *ehca_drv_attr_groups[] = {
+ &ehca_drv_attr_grp,
+ NULL,
+};
#define EHCA_RESOURCE_ATTR(name) \
static ssize_t ehca_show_##name(struct device *dev, \
@@ -434,22 +640,22 @@ static ssize_t ehca_show_##name(struct device *dev, \
struct hipz_query_hca *rblock; \
int data; \
\
- shca = dev->driver_data; \
+ shca = dev_get_drvdata(dev); \
\
- rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \
if (!rblock) { \
- dev_err(dev, "Can't allocate rblock memory."); \
+ dev_err(dev, "Can't allocate rblock memory.\n"); \
return 0; \
} \
\
if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
- dev_err(dev, "Can't query device properties"); \
- kfree(rblock); \
+ dev_err(dev, "Can't query device properties\n"); \
+ ehca_free_fw_ctrlblock(rblock); \
return 0; \
} \
\
data = rblock->name; \
- kfree(rblock); \
+ ehca_free_fw_ctrlblock(rblock); \
\
if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \
return snprintf(buf, 256, "1\n"); \
@@ -478,70 +684,54 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct ehca_shca *shca = dev->driver_data;
+ struct ehca_shca *shca = dev_get_drvdata(dev);
- return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+ return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+static struct attribute *ehca_dev_attrs[] = {
+ &dev_attr_adapter_handle.attr,
+ &dev_attr_num_ports.attr,
+ &dev_attr_hw_ver.attr,
+ &dev_attr_max_eq.attr,
+ &dev_attr_cur_eq.attr,
+ &dev_attr_max_cq.attr,
+ &dev_attr_cur_cq.attr,
+ &dev_attr_max_qp.attr,
+ &dev_attr_cur_qp.attr,
+ &dev_attr_max_mr.attr,
+ &dev_attr_cur_mr.attr,
+ &dev_attr_max_mw.attr,
+ &dev_attr_cur_mw.attr,
+ &dev_attr_max_pd.attr,
+ &dev_attr_max_ah.attr,
+ NULL
+};
-void ehca_create_device_sysfs(struct ibmebus_dev *dev)
-{
- device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
- device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
- device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
-}
-
-void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
-{
- device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
- device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
- device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
-}
+static struct attribute_group ehca_dev_attr_grp = {
+ .attrs = ehca_dev_attrs
+};
-static int __devinit ehca_probe(struct ibmebus_dev *dev,
- const struct of_device_id *id)
+static int ehca_probe(struct platform_device *dev)
{
struct ehca_shca *shca;
- u64 *handle;
+ const u64 *handle;
struct ib_pd *ibpd;
- int ret;
+ int ret, i, eq_size;
+ unsigned long flags;
- handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+ handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
if (!handle) {
ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
- dev->ofdev.node->full_name);
+ dev->dev.of_node->full_name);
return -ENODEV;
}
if (!(*handle)) {
ehca_gen_err("Wrong eHCA handle for adapter: %s.",
- dev->ofdev.node->full_name);
+ dev->dev.of_node->full_name);
return -ENODEV;
}
@@ -551,9 +741,18 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
return -ENOMEM;
}
- shca->ibmebus_dev = dev;
+ mutex_init(&shca->modify_mutex);
+ atomic_set(&shca->num_cqs, 0);
+ atomic_set(&shca->num_qps, 0);
+ shca->max_num_qps = ehca_max_qp;
+ shca->max_num_cqs = ehca_max_cq;
+
+ for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
+ spin_lock_init(&shca->sport[i].mod_sqp_lock);
+
+ shca->ofdev = dev;
shca->ipz_hca_handle.handle = *handle;
- dev->ofdev.dev.driver_data = shca;
+ dev_set_drvdata(&dev->dev, shca);
ret = ehca_sense_attributes(shca);
if (ret < 0) {
@@ -561,17 +760,18 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
goto probe1;
}
- ret = ehca_register_device(shca);
+ ret = ehca_init_device(shca);
if (ret) {
- ehca_gen_err("Cannot register Infiniband device");
+ ehca_gen_err("Cannot init ehca device struct");
goto probe1;
}
+ eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
/* create event queues */
- ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create EQ.");
- goto probe2;
+ goto probe1;
}
ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
@@ -581,7 +781,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
}
/* create internal protection domain */
- ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+ ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
if (IS_ERR(ibpd)) {
ehca_err(&shca->ib_device, "Cannot create internal PD.");
ret = PTR_ERR(ibpd);
@@ -595,11 +795,18 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
if (ret) {
- ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+ ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
ret);
goto probe5;
}
+ ret = ib_register_device(&shca->ib_device, NULL);
+ if (ret) {
+ ehca_err(&shca->ib_device,
+ "ib_register_device() failed ret=%i", ret);
+ goto probe6;
+ }
+
/* create AQP1 for port 1 */
if (ehca_open_aqp1 == 1) {
shca->sport[0].port_state = IB_PORT_DOWN;
@@ -607,7 +814,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
if (ret) {
ehca_err(&shca->ib_device,
"Cannot create AQP1 for port 1.");
- goto probe6;
+ goto probe7;
}
}
@@ -618,23 +825,29 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
if (ret) {
ehca_err(&shca->ib_device,
"Cannot create AQP1 for port 2.");
- goto probe7;
+ goto probe8;
}
}
- ehca_create_device_sysfs(dev);
+ ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
+ if (ret) /* only complain; we can live without attributes */
+ ehca_err(&shca->ib_device,
+ "Cannot create device attributes ret=%d", ret);
- spin_lock(&shca_list_lock);
+ spin_lock_irqsave(&shca_list_lock, flags);
list_add(&shca->shca_list, &shca_list);
- spin_unlock(&shca_list_lock);
+ spin_unlock_irqrestore(&shca_list_lock, flags);
return 0;
-probe7:
+probe8:
ret = ehca_destroy_aqp1(&shca->sport[0]);
if (ret)
ehca_err(&shca->ib_device,
- "Cannot destroy AQP1 for port 1. ret=%x", ret);
+ "Cannot destroy AQP1 for port 1. ret=%i", ret);
+
+probe7:
+ ib_unregister_device(&shca->ib_device);
probe6:
ret = ehca_dereg_internal_maxmr(shca);
@@ -660,21 +873,19 @@ probe3:
ehca_err(&shca->ib_device,
"Cannot destroy EQ. ret=%x", ret);
-probe2:
- ib_unregister_device(&shca->ib_device);
-
probe1:
ib_dealloc_device(&shca->ib_device);
return -EINVAL;
}
-static int __devexit ehca_remove(struct ibmebus_dev *dev)
+static int ehca_remove(struct platform_device *dev)
{
- struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+ struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
+ unsigned long flags;
int ret;
- ehca_remove_device_sysfs(dev);
+ sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
if (ehca_open_aqp1 == 1) {
int i;
@@ -683,7 +894,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy AQP1 for port %x "
- "ret=%x", ret, i);
+ "ret=%i", ret, i);
}
}
@@ -692,26 +903,26 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
ret = ehca_dereg_internal_maxmr(shca);
if (ret)
ehca_err(&shca->ib_device,
- "Cannot destroy internal MR. ret=%x", ret);
+ "Cannot destroy internal MR. ret=%i", ret);
ret = ehca_dealloc_pd(&shca->pd->ib_pd);
if (ret)
ehca_err(&shca->ib_device,
- "Cannot destroy internal PD. ret=%x", ret);
+ "Cannot destroy internal PD. ret=%i", ret);
ret = ehca_destroy_eq(shca, &shca->eq);
if (ret)
- ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+ ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
ret = ehca_destroy_eq(shca, &shca->neq);
if (ret)
- ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+ ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
ib_dealloc_device(&shca->ib_device);
- spin_lock(&shca_list_lock);
+ spin_lock_irqsave(&shca_list_lock, flags);
list_del(&shca->shca_list);
- spin_unlock(&shca_list_lock);
+ spin_unlock_irqrestore(&shca_list_lock, flags);
return ret;
}
@@ -724,12 +935,17 @@ static struct of_device_id ehca_device_table[] =
},
{},
};
-
-static struct ibmebus_driver ehca_driver = {
- .name = "ehca",
- .id_table = ehca_device_table,
- .probe = ehca_probe,
- .remove = ehca_remove,
+MODULE_DEVICE_TABLE(of, ehca_device_table);
+
+static struct platform_driver ehca_driver = {
+ .probe = ehca_probe,
+ .remove = ehca_remove,
+ .driver = {
+ .name = "ehca",
+ .owner = THIS_MODULE,
+ .groups = ehca_drv_attr_groups,
+ .of_match_table = ehca_device_table,
+ },
};
void ehca_poll_eqs(unsigned long data)
@@ -738,45 +954,102 @@ void ehca_poll_eqs(unsigned long data)
spin_lock(&shca_list_lock);
list_for_each_entry(shca, &shca_list, shca_list) {
- if (shca->eq.is_initialized)
- ehca_tasklet_eq((unsigned long)(void*)shca);
+ if (shca->eq.is_initialized) {
+ /* call deadman proc only if eq ptr does not change */
+ struct ehca_eq *eq = &shca->eq;
+ int max = 3;
+ volatile u64 q_ofs, q_ofs2;
+ unsigned long flags;
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ do {
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs2 = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ max--;
+ } while (q_ofs == q_ofs2 && max > 0);
+ if (q_ofs == q_ofs2)
+ ehca_process_eq(shca, 0);
+ }
}
- mod_timer(&poll_eqs_timer, jiffies + HZ);
+ mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
spin_unlock(&shca_list_lock);
}
-int __init ehca_module_init(void)
+static int ehca_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ static unsigned long ehca_dmem_warn_time;
+ unsigned long flags;
+
+ switch (action) {
+ case MEM_CANCEL_OFFLINE:
+ case MEM_CANCEL_ONLINE:
+ case MEM_ONLINE:
+ case MEM_OFFLINE:
+ return NOTIFY_OK;
+ case MEM_GOING_ONLINE:
+ case MEM_GOING_OFFLINE:
+ /* only ok if no hca is attached to the lpar */
+ spin_lock_irqsave(&shca_list_lock, flags);
+ if (list_empty(&shca_list)) {
+ spin_unlock_irqrestore(&shca_list_lock, flags);
+ return NOTIFY_OK;
+ } else {
+ spin_unlock_irqrestore(&shca_list_lock, flags);
+ if (printk_timed_ratelimit(&ehca_dmem_warn_time,
+ 30 * 1000))
+ ehca_gen_err("DMEM operations are not allowed"
+ "in conjunction with eHCA");
+ return NOTIFY_BAD;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block ehca_mem_nb = {
+ .notifier_call = ehca_mem_notifier,
+};
+
+static int __init ehca_module_init(void)
{
int ret;
printk(KERN_INFO "eHCA Infiniband Device Driver "
- "(Rel.: SVNEHCA_0016)\n");
- idr_init(&ehca_qp_idr);
- idr_init(&ehca_cq_idr);
- spin_lock_init(&ehca_qp_idr_lock);
- spin_lock_init(&ehca_cq_idr_lock);
+ "(Version " HCAD_VERSION ")\n");
- INIT_LIST_HEAD(&shca_list);
- spin_lock_init(&shca_list_lock);
-
- if ((ret = ehca_create_comp_pool())) {
+ ret = ehca_create_comp_pool();
+ if (ret) {
ehca_gen_err("Cannot create comp pool.");
return ret;
}
- if ((ret = ehca_create_slab_caches())) {
+ ret = ehca_create_slab_caches();
+ if (ret) {
ehca_gen_err("Cannot create SLAB caches");
ret = -ENOMEM;
goto module_init1;
}
- if ((ret = ibmebus_register_driver(&ehca_driver))) {
+ ret = ehca_create_busmap();
+ if (ret) {
+ ehca_gen_err("Cannot create busmap.");
+ goto module_init2;
+ }
+
+ ret = ibmebus_register_driver(&ehca_driver);
+ if (ret) {
ehca_gen_err("Cannot register eHCA device driver");
ret = -EINVAL;
- goto module_init2;
+ goto module_init3;
}
- ehca_create_driver_sysfs(&ehca_driver);
+ ret = register_memory_notifier(&ehca_mem_nb);
+ if (ret) {
+ ehca_gen_err("Failed registering memory add/remove notifier");
+ goto module_init4;
+ }
if (ehca_poll_all_eqs != 1) {
ehca_gen_err("WARNING!!!");
@@ -790,6 +1063,12 @@ int __init ehca_module_init(void)
return 0;
+module_init4:
+ ibmebus_unregister_driver(&ehca_driver);
+
+module_init3:
+ ehca_destroy_busmap();
+
module_init2:
ehca_destroy_slab_caches();
@@ -798,14 +1077,17 @@ module_init1:
return ret;
};
-void __exit ehca_module_exit(void)
+static void __exit ehca_module_exit(void)
{
if (ehca_poll_all_eqs == 1)
del_timer_sync(&poll_eqs_timer);
- ehca_remove_driver_sysfs(&ehca_driver);
ibmebus_unregister_driver(&ehca_driver);
+ unregister_memory_notifier(&ehca_mem_nb);
+
+ ehca_destroy_busmap();
+
ehca_destroy_slab_caches();
ehca_destroy_comp_pool();
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
index 32a870660bf..120aedf9f98 100644
--- a/drivers/infiniband/hw/ehca/ehca_mcast.c
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (h_ret != H_SUCCESS)
ehca_err(ibqp->device,
"ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
- "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+ "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
@@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (h_ret != H_SUCCESS)
ehca_err(ibqp->device,
"ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
- "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+ "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 5ca65441e1d..3488e8c9fcb 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -5,6 +5,7 @@
*
* Authors: Dietmar Decker <ddecker@de.ibm.com>
* Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
@@ -39,25 +40,89 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <asm/current.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
#include "hcp_if.h"
#include "hipz_hw.h"
+#define NUM_CHUNKS(length, chunk_size) \
+ (((length) + (chunk_size - 1)) / (chunk_size))
+
+/* max number of rpages (per hcall register_rpages) */
+#define MAX_RPAGES 512
+
+/* DMEM toleration management */
+#define EHCA_SECTSHIFT SECTION_SIZE_BITS
+#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT)
+#define EHCA_HUGEPAGESHIFT 34
+#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT)
+#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
+#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL
+#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */
+#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
+#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
+#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */
+#define EHCA_DIR_MAP_SIZE (0x10000)
+#define EHCA_ENT_MAP_SIZE (0x10000)
+#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
+
+static unsigned long ehca_mr_len;
+
+/*
+ * Memory map data structures
+ */
+struct ehca_dir_bmap {
+ u64 ent[EHCA_MAP_ENTRIES];
+};
+struct ehca_top_bmap {
+ struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
+};
+struct ehca_bmap {
+ struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
+};
+
+static struct ehca_bmap *ehca_bmap;
+
static struct kmem_cache *mr_cache;
static struct kmem_cache *mw_cache;
+enum ehca_mr_pgsize {
+ EHCA_MR_PGSIZE4K = 0x1000L,
+ EHCA_MR_PGSIZE64K = 0x10000L,
+ EHCA_MR_PGSIZE1M = 0x100000L,
+ EHCA_MR_PGSIZE16M = 0x1000000L
+};
+
+#define EHCA_MR_PGSHIFT4K 12
+#define EHCA_MR_PGSHIFT64K 16
+#define EHCA_MR_PGSHIFT1M 20
+#define EHCA_MR_PGSHIFT16M 24
+
+static u64 ehca_map_vaddr(void *caddr);
+
+static u32 ehca_encode_hwpage_size(u32 pgsize)
+{
+ int log = ilog2(pgsize);
+ WARN_ON(log < 12 || log > 24 || log & 3);
+ return (log - 12) / 4;
+}
+
+static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
+{
+ return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
+}
+
static struct ehca_mr *ehca_mr_new(void)
{
struct ehca_mr *me;
- me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
- if (me) {
- memset(me, 0, sizeof(struct ehca_mr));
+ me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
+ if (me)
spin_lock_init(&me->mrlock);
- } else
+ else
ehca_gen_err("alloc failed");
return me;
@@ -72,11 +137,10 @@ static struct ehca_mw *ehca_mw_new(void)
{
struct ehca_mw *me;
- me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
- if (me) {
- memset(me, 0, sizeof(struct ehca_mw));
+ me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
+ if (me)
spin_lock_init(&me->mwlock);
- } else
+ else
ehca_gen_err("alloc failed");
return me;
@@ -106,11 +170,13 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
goto get_dma_mr_exit0;
}
- ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
+ ret = ehca_reg_maxmr(shca, e_maxmr,
+ (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
mr_access_flags, e_pd,
&e_maxmr->ib.ib_mr.lkey,
&e_maxmr->ib.ib_mr.rkey);
if (ret) {
+ ehca_mr_delete(e_maxmr);
ib_mr = ERR_PTR(ret);
goto get_dma_mr_exit0;
}
@@ -123,7 +189,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
get_dma_mr_exit0:
if (IS_ERR(ib_mr))
- ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
+ ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
PTR_ERR(ib_mr), pd, mr_access_flags);
return ib_mr;
} /* end ehca_get_dma_mr() */
@@ -144,9 +210,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
u64 size;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
- u32 num_pages_mr;
- u32 num_pages_4k; /* 4k portion "pages" */
if ((num_phys_buf <= 0) || !phys_buf_array) {
ehca_err(pd->device, "bad input values: num_phys_buf=%x "
@@ -177,7 +240,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
}
if ((size == 0) ||
(((u64)iova_start + size) < (u64)iova_start)) {
- ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+ ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
size, iova_start);
ib_mr = ERR_PTR(-EINVAL);
goto reg_phys_mr_exit0;
@@ -190,12 +253,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
goto reg_phys_mr_exit0;
}
- /* determine number of MR pages */
- num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
-
/* register MR on HCA */
if (ehca_mr_is_maxmr(size, iova_start)) {
e_mr->flags |= EHCA_MR_FLAG_MAXMR;
@@ -207,17 +264,30 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
goto reg_phys_mr_exit1;
}
} else {
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.num_phys_buf = num_phys_buf;
- pginfo.phys_buf_array = phys_buf_array;
- pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) /
- EHCA_PAGESIZE);
+ struct ehca_mr_pginfo pginfo;
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hw_pgsize;
+
+ num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
+ PAGE_SIZE);
+ /* for kernel space we try most possible pgsize */
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
+ hw_pgsize);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.phy.num_phys_buf = num_phys_buf;
+ pginfo.u.phy.phys_buf_array = phys_buf_array;
+ pginfo.next_hwpage =
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
+ &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
if (ret) {
ib_mr = ERR_PTR(ret);
goto reg_phys_mr_exit1;
@@ -231,7 +301,7 @@ reg_phys_mr_exit1:
ehca_mr_delete(e_mr);
reg_phys_mr_exit0:
if (IS_ERR(ib_mr))
- ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
+ ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
"num_phys_buf=%x mr_access_flags=%x iova_start=%p",
PTR_ERR(ib_mr), pd, phys_buf_array,
num_phys_buf, mr_access_flags, iova_start);
@@ -240,9 +310,8 @@ reg_phys_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
- struct ib_umem *region,
- int mr_access_flags,
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int mr_access_flags,
struct ib_udata *udata)
{
struct ib_mr *ib_mr;
@@ -250,20 +319,17 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
struct ehca_shca *shca =
container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
- int ret;
- u32 num_pages_mr;
- u32 num_pages_4k; /* 4k portion "pages" */
+ struct ehca_mr_pginfo pginfo;
+ int ret, page_shift;
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hwpage_size;
if (!pd) {
ehca_gen_err("bad pd=%p", pd);
return ERR_PTR(-EFAULT);
}
- if (!region) {
- ehca_err(pd->device, "bad input values: region=%p", region);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_user_mr_exit0;
- }
+
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
!(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -277,17 +343,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
- if (region->page_size != PAGE_SIZE) {
- ehca_err(pd->device, "page size not supported, "
- "region->page_size=%x", region->page_size);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_user_mr_exit0;
- }
- if ((region->length == 0) ||
- ((region->virt_base + region->length) < region->virt_base)) {
- ehca_err(pd->device, "bad input values: length=%lx "
- "virt_base=%lx", region->length, region->virt_base);
+ if (length == 0 || virt + length < virt) {
+ ehca_err(pd->device, "bad input values: length=%llx "
+ "virt_base=%llx", length, virt);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
@@ -299,40 +358,80 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
goto reg_user_mr_exit0;
}
- /* determine number of MR pages */
- num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+ e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(e_mr->umem)) {
+ ib_mr = (void *)e_mr->umem;
+ goto reg_user_mr_exit1;
+ }
+
+ if (e_mr->umem->page_size != PAGE_SIZE) {
+ ehca_err(pd->device, "page size not supported, "
+ "e_mr->umem->page_size=%x", e_mr->umem->page_size);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_user_mr_exit2;
+ }
+ /* determine number of MR pages */
+ num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
+ /* select proper hw_pgsize */
+ page_shift = PAGE_SHIFT;
+ if (e_mr->umem->hugetlb) {
+ /* determine page_shift, clamp between 4K and 16M */
+ page_shift = (fls64(length - 1) + 3) & ~3;
+ page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
+ EHCA_MR_PGSHIFT16M);
+ }
+ hwpage_size = 1UL << page_shift;
+
+ /* now that we have the desired page size, shift until it's
+ * supported, too. 4K is always supported, so this terminates.
+ */
+ while (!(hwpage_size & shca->hca_cap_mr_pgsize))
+ hwpage_size >>= 4;
+
+reg_user_mr_fallback:
+ num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
/* register MR on HCA */
- pginfo.type = EHCA_MR_PGI_USER;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.region = region;
- pginfo.next_4k = region->offset / EHCA_PAGESIZE;
- pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
- (&region->chunk_list),
- list);
-
- ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
- region->length, mr_access_flags, e_pd, &pginfo,
- &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_USER;
+ pginfo.hwpage_size = hwpage_size;
+ pginfo.num_kpages = num_kpages;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.usr.region = e_mr->umem;
+ pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+ pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
+ ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
+ e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+ &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
+ if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
+ ehca_warn(pd->device, "failed to register mr "
+ "with hwpage_size=%llx", hwpage_size);
+ ehca_info(pd->device, "try to register mr with "
+ "kpage_size=%lx", PAGE_SIZE);
+ /*
+ * this means kpages are not contiguous for a hw page
+ * try kernel page size as fallback solution
+ */
+ hwpage_size = PAGE_SIZE;
+ goto reg_user_mr_fallback;
+ }
if (ret) {
ib_mr = ERR_PTR(ret);
- goto reg_user_mr_exit1;
+ goto reg_user_mr_exit2;
}
/* successful registration of all pages */
return &e_mr->ib.ib_mr;
+reg_user_mr_exit2:
+ ib_umem_release(e_mr->umem);
reg_user_mr_exit1:
ehca_mr_delete(e_mr);
reg_user_mr_exit0:
if (IS_ERR(ib_mr))
- ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
- " udata=%p",
- PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+ ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
+ PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr;
} /* end ehca_reg_user_mr() */
@@ -351,25 +450,15 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
struct ehca_shca *shca =
container_of(mr->device, struct ehca_shca, ib_device);
struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
u64 new_size;
u64 *new_start;
u32 new_acl;
struct ehca_pd *new_pd;
u32 tmp_lkey, tmp_rkey;
unsigned long sl_flags;
- u32 num_pages_mr = 0;
- u32 num_pages_4k = 0; /* 4k portion "pages" */
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
- u32 cur_pid = current->tgid;
-
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- (my_pd->ownpid != cur_pid)) {
- ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
+ u32 num_kpages = 0;
+ u32 num_hwpages = 0;
+ struct ehca_mr_pginfo pginfo;
if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
/* TODO not supported, because PHYP rereg hCall needs pages */
@@ -412,7 +501,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
goto rereg_phys_mr_exit0;
}
if (!phys_buf_array || num_phys_buf <= 0) {
- ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
+ ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
" phys_buf_array=%p num_phys_buf=%x",
mr_rereg_mask, phys_buf_array, num_phys_buf);
ret = -EINVAL;
@@ -436,12 +525,14 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
/* set requested values dependent on rereg request */
spin_lock_irqsave(&e_mr->mrlock, sl_flags);
- new_start = e_mr->start; /* new == old address */
- new_size = e_mr->size; /* new == old length */
- new_acl = e_mr->acl; /* new == old access control */
- new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
+ new_start = e_mr->start;
+ new_size = e_mr->size;
+ new_acl = e_mr->acl;
+ new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
+
new_start = iova_start; /* change address */
/* check physical buffer list and calculate size */
ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
@@ -451,22 +542,24 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
goto rereg_phys_mr_exit1;
if ((new_size == 0) ||
(((u64)iova_start + new_size) < (u64)iova_start)) {
- ehca_err(mr->device, "bad input values: new_size=%lx "
+ ehca_err(mr->device, "bad input values: new_size=%llx "
"iova_start=%p", new_size, iova_start);
ret = -EINVAL;
goto rereg_phys_mr_exit1;
}
- num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.num_phys_buf = num_phys_buf;
- pginfo.phys_buf_array = phys_buf_array;
- pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) /
- EHCA_PAGESIZE);
+ num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
+ new_size, PAGE_SIZE);
+ num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
+ new_size, hw_pgsize);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.phy.num_phys_buf = num_phys_buf;
+ pginfo.u.phy.phys_buf_array = phys_buf_array;
+ pginfo.next_hwpage =
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
}
if (mr_rereg_mask & IB_MR_REREG_ACCESS)
new_acl = mr_access_flags;
@@ -488,7 +581,7 @@ rereg_phys_mr_exit1:
spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
rereg_phys_mr_exit0:
if (ret)
- ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
+ ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
"phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
"iova_start=%p",
ret, mr, mr_rereg_mask, pd, phys_buf_array,
@@ -505,18 +598,8 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
struct ehca_shca *shca =
container_of(mr->device, struct ehca_shca, ib_device);
struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
- u32 cur_pid = current->tgid;
unsigned long sl_flags;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
-
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- (my_pd->ownpid != cur_pid)) {
- ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- ret = -EINVAL;
- goto query_mr_exit0;
- }
+ struct ehca_mr_hipzout_parms hipzout;
if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
@@ -530,25 +613,25 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
- "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
+ "hca_hndl=%llx mr_hndl=%llx lkey=%x",
h_ret, mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca_mrmw_map_hrc_query_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto query_mr_exit1;
}
- mr_attr->pd = mr->pd;
+ mr_attr->pd = mr->pd;
mr_attr->device_virt_addr = hipzout.vaddr;
- mr_attr->size = hipzout.len;
- mr_attr->lkey = hipzout.lkey;
- mr_attr->rkey = hipzout.rkey;
+ mr_attr->size = hipzout.len;
+ mr_attr->lkey = hipzout.lkey;
+ mr_attr->rkey = hipzout.rkey;
ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
query_mr_exit1:
spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
query_mr_exit0:
if (ret)
- ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
+ ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
ret, mr, mr_attr);
return ret;
} /* end ehca_query_mr() */
@@ -562,16 +645,6 @@ int ehca_dereg_mr(struct ib_mr *mr)
struct ehca_shca *shca =
container_of(mr->device, struct ehca_shca, ib_device);
struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
- u32 cur_pid = current->tgid;
-
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- (my_pd->ownpid != cur_pid)) {
- ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- ret = -EINVAL;
- goto dereg_mr_exit0;
- }
if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
@@ -590,26 +663,29 @@ int ehca_dereg_mr(struct ib_mr *mr)
/* TODO: BUSY: MR still has bound window(s) */
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
- "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+ ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
+ "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto dereg_mr_exit0;
}
+ if (e_mr->umem)
+ ib_umem_release(e_mr->umem);
+
/* successful deregistration */
ehca_mr_delete(e_mr);
dereg_mr_exit0:
if (ret)
- ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
+ ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
return ret;
} /* end ehca_dereg_mr() */
/*----------------------------------------------------------------------*/
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *ib_mw;
u64 h_ret;
@@ -617,7 +693,10 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca =
container_of(pd->device, struct ehca_shca, ib_device);
- struct ehca_mw_hipzout_parms hipzout = {{0},0};
+ struct ehca_mw_hipzout_parms hipzout;
+
+ if (type != IB_MW_TYPE_1)
+ return ERR_PTR(-EINVAL);
e_mw = ehca_mw_new();
if (!e_mw) {
@@ -628,10 +707,10 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
e_pd->fw_pd, &hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
- "shca=%p hca_hndl=%lx mw=%p",
+ ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
+ "shca=%p hca_hndl=%llx mw=%p",
h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
- ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
+ ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
goto alloc_mw_exit1;
}
/* successful MW allocation */
@@ -643,7 +722,7 @@ alloc_mw_exit1:
ehca_mw_delete(e_mw);
alloc_mw_exit0:
if (IS_ERR(ib_mw))
- ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
+ ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
return ib_mw;
} /* end ehca_alloc_mw() */
@@ -670,11 +749,11 @@ int ehca_dealloc_mw(struct ib_mw *mw)
h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
if (h_ret != H_SUCCESS) {
- ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
- "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+ ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
+ "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
e_mw->ipz_mw_handle.handle);
- return ehca_mrmw_map_hrc_free_mw(h_ret);
+ return ehca2ib_return_code(h_ret);
}
/* successful deallocation */
ehca_mw_delete(e_mw);
@@ -694,7 +773,8 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
struct ehca_mr *e_fmr;
int ret;
u32 tmp_lkey, tmp_rkey;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
+ u64 hw_pgsize;
/* check other parameters */
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
@@ -724,8 +804,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
ib_fmr = ERR_PTR(-EINVAL);
goto alloc_fmr_exit0;
}
- if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
- ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
+
+ hw_pgsize = 1 << fmr_attr->page_shift;
+ if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
fmr_attr->page_shift);
ib_fmr = ERR_PTR(-EINVAL);
@@ -740,16 +821,23 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
e_fmr->flags |= EHCA_MR_FLAG_FMR;
/* register MR on HCA */
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.hwpage_size = hw_pgsize;
+ /*
+ * pginfo.num_hwpages==0, ie register_rpages() will not be called
+ * but deferred to map_phys_fmr()
+ */
ret = ehca_reg_mr(shca, e_fmr, NULL,
fmr_attr->max_pages * (1 << fmr_attr->page_shift),
mr_access_flags, e_pd, &pginfo,
- &tmp_lkey, &tmp_rkey);
+ &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
if (ret) {
ib_fmr = ERR_PTR(ret);
goto alloc_fmr_exit1;
}
/* successful */
+ e_fmr->hwpage_size = hw_pgsize;
e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
e_fmr->fmr_max_pages = fmr_attr->max_pages;
e_fmr->fmr_max_maps = fmr_attr->max_maps;
@@ -759,10 +847,6 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
alloc_fmr_exit1:
ehca_mr_delete(e_fmr);
alloc_fmr_exit0:
- if (IS_ERR(ib_fmr))
- ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
- "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
- mr_access_flags, fmr_attr);
return ib_fmr;
} /* end ehca_alloc_fmr() */
@@ -778,7 +862,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
container_of(fmr->device, struct ehca_shca, ib_device);
struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
u32 tmp_lkey, tmp_rkey;
if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
@@ -792,7 +876,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
goto map_phys_fmr_exit0;
if (iova % e_fmr->fmr_page_size) {
/* only whole-numbered pages */
- ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+ ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
iova, e_fmr->fmr_page_size);
ret = -EINVAL;
goto map_phys_fmr_exit0;
@@ -804,14 +888,18 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
}
- pginfo.type = EHCA_MR_PGI_FMR;
- pginfo.num_pages = list_len;
- pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
- pginfo.page_list = page_list;
- pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) /
- EHCA_PAGESIZE);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_FMR;
+ pginfo.num_kpages = list_len;
+ pginfo.hwpage_size = e_fmr->hwpage_size;
+ pginfo.num_hwpages =
+ list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
+ pginfo.u.fmr.page_list = page_list;
+ pginfo.next_hwpage =
+ (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
+ pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
- ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
+ ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
list_len * e_fmr->fmr_page_size,
e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
if (ret)
@@ -825,9 +913,8 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
map_phys_fmr_exit0:
if (ret)
- ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
- "iova=%lx",
- ret, fmr, page_list, list_len, iova);
+ ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
+ "iova=%llx", ret, fmr, page_list, list_len, iova);
return ret;
} /* end ehca_map_phys_fmr() */
@@ -846,11 +933,6 @@ int ehca_unmap_fmr(struct list_head *fmr_list)
/* check all FMR belong to same SHCA, and check internal flag */
list_for_each_entry(ib_fmr, fmr_list, list) {
prev_shca = shca;
- if (!ib_fmr) {
- ehca_gen_err("bad fmr=%p in list", ib_fmr);
- ret = -EINVAL;
- goto unmap_fmr_exit0;
- }
shca = container_of(ib_fmr->device, struct ehca_shca,
ib_device);
e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
@@ -889,7 +971,7 @@ int ehca_unmap_fmr(struct list_head *fmr_list)
unmap_fmr_exit0:
if (ret)
- ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+ ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
ret, fmr_list, num_fmr, unmap_fmr_cnt);
return ret;
} /* end ehca_unmap_fmr() */
@@ -913,11 +995,11 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
if (h_ret != H_SUCCESS) {
- ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
- "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+ ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
+ "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle, fmr->lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto free_fmr_exit0;
}
/* successful deregistration */
@@ -926,12 +1008,16 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
free_fmr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
+ ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
return ret;
} /* end ehca_dealloc_fmr() */
/*----------------------------------------------------------------------*/
+static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ struct ehca_mr_pginfo *pginfo);
+
int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_mr *e_mr,
u64 *iova_start,
@@ -940,40 +1026,48 @@ int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_pd *e_pd,
struct ehca_mr_pginfo *pginfo,
u32 *lkey, /*OUT*/
- u32 *rkey) /*OUT*/
+ u32 *rkey, /*OUT*/
+ enum ehca_reg_type reg_type)
{
int ret;
u64 h_ret;
u32 hipz_acl;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
if (ehca_use_hp_mr == 1)
- hipz_acl |= 0x00000001;
+ hipz_acl |= 0x00000001;
h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
(u64)iova_start, size, hipz_acl,
e_pd->fw_pd, &hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
- "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
- ret = ehca_mrmw_map_hrc_alloc(h_ret);
+ ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
+ "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
+ ret = ehca2ib_return_code(h_ret);
goto ehca_reg_mr_exit0;
}
e_mr->ipz_mr_handle = hipzout.handle;
- ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+ if (reg_type == EHCA_REG_BUSMAP_MR)
+ ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
+ else if (reg_type == EHCA_REG_MR)
+ ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+ else
+ ret = -EINVAL;
+
if (ret)
goto ehca_reg_mr_exit1;
/* successful registration */
- e_mr->num_pages = pginfo->num_pages;
- e_mr->num_4k = pginfo->num_4k;
- e_mr->start = iova_start;
- e_mr->size = size;
- e_mr->acl = acl;
+ e_mr->num_kpages = pginfo->num_kpages;
+ e_mr->num_hwpages = pginfo->num_hwpages;
+ e_mr->hwpage_size = pginfo->hwpage_size;
+ e_mr->start = iova_start;
+ e_mr->size = size;
+ e_mr->acl = acl;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
return 0;
@@ -981,22 +1075,22 @@ int ehca_reg_mr(struct ehca_shca *shca,
ehca_reg_mr_exit1:
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
- "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
- "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
+ ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
+ "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
+ "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
h_ret, shca, e_mr, iova_start, size, acl, e_pd,
- hipzout.lkey, pginfo, pginfo->num_pages,
- pginfo->num_4k, ret);
+ hipzout.lkey, pginfo, pginfo->num_kpages,
+ pginfo->num_hwpages, ret);
ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
"not recoverable");
}
ehca_reg_mr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
- "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
- "num_pages=%lx num_4k=%lx",
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
+ "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
+ "num_kpages=%llx num_hwpages=%llx",
ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
- pginfo->num_pages, pginfo->num_4k);
+ pginfo->num_kpages, pginfo->num_hwpages);
return ret;
} /* end ehca_reg_mr() */
@@ -1013,78 +1107,75 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
u32 i;
u64 *kpage;
- kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ if (!pginfo->num_hwpages) /* in case of fmr */
+ return 0;
+
+ kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!kpage) {
ehca_err(&shca->ib_device, "kpage alloc failed");
ret = -ENOMEM;
goto ehca_reg_mr_rpages_exit0;
}
- /* max 512 pages per shot */
- for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
+ /* max MAX_RPAGES ehca mr pages per register call */
+ for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
- if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
- rnum = pginfo->num_4k % 512; /* last shot */
+ if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
+ rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
if (rnum == 0)
- rnum = 512; /* last shot is full */
+ rnum = MAX_RPAGES; /* last shot is full */
} else
- rnum = 512;
+ rnum = MAX_RPAGES;
+
+ ret = ehca_set_pagebuf(pginfo, rnum, kpage);
+ if (ret) {
+ ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+ "bad rc, ret=%i rnum=%x kpage=%p",
+ ret, rnum, kpage);
+ goto ehca_reg_mr_rpages_exit1;
+ }
if (rnum > 1) {
- ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
- if (ret) {
- ehca_err(&shca->ib_device, "ehca_set_pagebuf "
- "bad rc, ret=%x rnum=%x kpage=%p",
- ret, rnum, kpage);
- ret = -EFAULT;
- goto ehca_reg_mr_rpages_exit1;
- }
- rpage = virt_to_abs(kpage);
+ rpage = __pa(kpage);
if (!rpage) {
ehca_err(&shca->ib_device, "kpage=%p i=%x",
kpage, i);
ret = -EFAULT;
goto ehca_reg_mr_rpages_exit1;
}
- } else { /* rnum==1 */
- ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
- if (ret) {
- ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
- "bad rc, ret=%x i=%x", ret, i);
- ret = -EFAULT;
- goto ehca_reg_mr_rpages_exit1;
- }
- }
+ } else
+ rpage = *kpage;
- h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
- 0, /* pagesize 4k */
- 0, rpage, rnum);
+ h_ret = hipz_h_register_rpage_mr(
+ shca->ipz_hca_handle, e_mr,
+ ehca_encode_hwpage_size(pginfo->hwpage_size),
+ 0, rpage, rnum);
- if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+ if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
/*
* check for 'registration complete'==H_SUCCESS
* and for 'page registered'==H_PAGE_REGISTERED
*/
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "last "
- "hipz_reg_rpage_mr failed, h_ret=%lx "
- "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+ "hipz_reg_rpage_mr failed, h_ret=%lli "
+ "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
" lkey=%x", h_ret, e_mr, i,
shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
e_mr->ib.ib_mr.lkey);
- ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
+ ret = ehca2ib_return_code(h_ret);
break;
} else
ret = 0;
} else if (h_ret != H_PAGE_REGISTERED) {
ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
- "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
- "mr_hndl=%lx", h_ret, e_mr, i,
+ "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
+ "mr_hndl=%llx", h_ret, e_mr, i,
e_mr->ib.ib_mr.lkey,
shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle);
- ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
+ ret = ehca2ib_return_code(h_ret);
break;
} else
ret = 0;
@@ -1092,12 +1183,12 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
ehca_reg_mr_rpages_exit1:
- kfree(kpage);
+ ehca_free_fw_ctrlblock(kpage);
ehca_reg_mr_rpages_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
- "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
- pginfo->num_pages, pginfo->num_4k);
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
+ "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
+ pginfo, pginfo->num_kpages, pginfo->num_hwpages);
return ret;
} /* end ehca_reg_mr_rpages() */
@@ -1119,12 +1210,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
u64 *kpage;
u64 rpage;
struct ehca_mr_pginfo pginfo_save;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
- kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+ kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!kpage) {
ehca_err(&shca->ib_device, "kpage alloc failed");
ret = -ENOMEM;
@@ -1132,15 +1223,15 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
}
pginfo_save = *pginfo;
- ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
+ ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
if (ret) {
ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
- "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
- e_mr, pginfo, pginfo->type, pginfo->num_pages,
- pginfo->num_4k,kpage);
+ "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
+ "kpage=%p", e_mr, pginfo, pginfo->type,
+ pginfo->num_kpages, pginfo->num_hwpages, kpage);
goto ehca_rereg_mr_rereg1_exit1;
}
- rpage = virt_to_abs(kpage);
+ rpage = __pa(kpage);
if (!rpage) {
ehca_err(&shca->ib_device, "kpage=%p", kpage);
ret = -EFAULT;
@@ -1156,13 +1247,13 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
* (MW bound or MR is shared)
*/
ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
- "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
+ "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
*pginfo = pginfo_save;
ret = -EAGAIN;
- } else if ((u64*)hipzout.vaddr != iova_start) {
+ } else if ((u64 *)hipzout.vaddr != iova_start) {
ehca_err(&shca->ib_device, "PHYP changed iova_start in "
- "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
- "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+ "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
+ "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
e_mr->ib.ib_mr.lkey, hipzout.lkey);
ret = -EFAULT;
@@ -1171,23 +1262,24 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
* successful reregistration
* note: start and start_out are identical for eServer HCAs
*/
- e_mr->num_pages = pginfo->num_pages;
- e_mr->num_4k = pginfo->num_4k;
- e_mr->start = iova_start;
- e_mr->size = size;
- e_mr->acl = acl;
+ e_mr->num_kpages = pginfo->num_kpages;
+ e_mr->num_hwpages = pginfo->num_hwpages;
+ e_mr->hwpage_size = pginfo->hwpage_size;
+ e_mr->start = iova_start;
+ e_mr->size = size;
+ e_mr->acl = acl;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
}
ehca_rereg_mr_rereg1_exit1:
- kfree(kpage);
+ ehca_free_fw_ctrlblock(kpage);
ehca_rereg_mr_rereg1_exit0:
if ( ret && (ret != -EAGAIN) )
- ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
- "pginfo=%p num_pages=%lx num_4k=%lx",
- ret, *lkey, *rkey, pginfo, pginfo->num_pages,
- pginfo->num_4k);
+ ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
+ "pginfo=%p num_kpages=%llx num_hwpages=%llx",
+ ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
+ pginfo->num_hwpages);
return ret;
} /* end ehca_rereg_mr_rereg1() */
@@ -1209,10 +1301,12 @@ int ehca_rereg_mr(struct ehca_shca *shca,
int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
/* first determine reregistration hCall(s) */
- if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
- (pginfo->num_4k > e_mr->num_4k)) {
- ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
- "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
+ if ((pginfo->num_hwpages > MAX_RPAGES) ||
+ (e_mr->num_hwpages > MAX_RPAGES) ||
+ (pginfo->num_hwpages > e_mr->num_hwpages)) {
+ ehca_dbg(&shca->ib_device, "Rereg3 case, "
+ "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
+ pginfo->num_hwpages, e_mr->num_hwpages);
rereg_1_hcall = 0;
rereg_3_hcall = 1;
}
@@ -1243,12 +1337,12 @@ int ehca_rereg_mr(struct ehca_shca *shca,
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_free_mr failed, "
- "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+ "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
"mr->lkey=%x",
h_ret, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
e_mr->ib.ib_mr.lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto ehca_rereg_mr_exit0;
}
/* clean ehca_mr_t, without changing struct ib_mr and lock */
@@ -1257,13 +1351,14 @@ int ehca_rereg_mr(struct ehca_shca *shca,
/* set some MR values */
e_mr->flags = save_mr.flags;
+ e_mr->hwpage_size = save_mr.hwpage_size;
e_mr->fmr_page_size = save_mr.fmr_page_size;
e_mr->fmr_max_pages = save_mr.fmr_max_pages;
e_mr->fmr_max_maps = save_mr.fmr_max_maps;
e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
- e_pd, pginfo, lkey, rkey);
+ e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
if (ret) {
u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
memcpy(&e_mr->flags, &(save_mr.flags),
@@ -1274,11 +1369,11 @@ int ehca_rereg_mr(struct ehca_shca *shca,
ehca_rereg_mr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
- "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
- "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
+ "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
+ "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
"rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
- acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
+ acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
rereg_1_hcall, rereg_3_hcall);
return ret;
} /* end ehca_rereg_mr() */
@@ -1290,97 +1385,84 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
{
int ret = 0;
u64 h_ret;
- int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
- int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
struct ehca_pd *e_pd =
container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
struct ehca_mr save_fmr;
u32 tmp_lkey, tmp_rkey;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_pginfo pginfo;
+ struct ehca_mr_hipzout_parms hipzout;
+ struct ehca_mr save_mr;
- /* first check if reregistration hCall can be used for unmap */
- if (e_fmr->fmr_max_pages > 512) {
- rereg_1_hcall = 0;
- rereg_3_hcall = 1;
- }
-
- if (rereg_1_hcall) {
+ if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
/*
* note: after using rereg hcall with len=0,
* rereg hcall must be used again for registering pages
*/
h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
0, 0, e_pd->fw_pd, 0, &hipzout);
- if (h_ret != H_SUCCESS) {
- /*
- * should not happen, because length checked above,
- * FMRs are not shared and no MW bound to FMRs
- */
- ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
- "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
- "mr_hndl=%lx lkey=%x lkey_out=%x",
- h_ret, e_fmr, shca->ipz_hca_handle.handle,
- e_fmr->ipz_mr_handle.handle,
- e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
- rereg_3_hcall = 1;
- } else {
+ if (h_ret == H_SUCCESS) {
/* successful reregistration */
e_fmr->start = NULL;
e_fmr->size = 0;
tmp_lkey = hipzout.lkey;
tmp_rkey = hipzout.rkey;
+ return 0;
}
+ /*
+ * should not happen, because length checked above,
+ * FMRs are not shared and no MW bound to FMRs
+ */
+ ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+ "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
+ "mr_hndl=%llx lkey=%x lkey_out=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle,
+ e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+ /* try free and rereg */
}
- if (rereg_3_hcall) {
- struct ehca_mr save_mr;
-
- /* first free old FMR */
- h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
- if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_free_mr failed, "
- "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
- "lkey=%x",
- h_ret, e_fmr, shca->ipz_hca_handle.handle,
- e_fmr->ipz_mr_handle.handle,
- e_fmr->ib.ib_fmr.lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
- goto ehca_unmap_one_fmr_exit0;
- }
- /* clean ehca_mr_t, without changing lock */
- save_fmr = *e_fmr;
- ehca_mr_deletenew(e_fmr);
-
- /* set some MR values */
- e_fmr->flags = save_fmr.flags;
- e_fmr->fmr_page_size = save_fmr.fmr_page_size;
- e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
- e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
- e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
- e_fmr->acl = save_fmr.acl;
-
- pginfo.type = EHCA_MR_PGI_FMR;
- pginfo.num_pages = 0;
- pginfo.num_4k = 0;
- ret = ehca_reg_mr(shca, e_fmr, NULL,
- (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
- e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
- &tmp_rkey);
- if (ret) {
- u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
- memcpy(&e_fmr->flags, &(save_mr.flags),
- sizeof(struct ehca_mr) - offset);
- goto ehca_unmap_one_fmr_exit0;
- }
+ /* first free old FMR */
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+ "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
+ "lkey=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle,
+ e_fmr->ib.ib_fmr.lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto ehca_unmap_one_fmr_exit0;
+ }
+ /* clean ehca_mr_t, without changing lock */
+ save_fmr = *e_fmr;
+ ehca_mr_deletenew(e_fmr);
+
+ /* set some MR values */
+ e_fmr->flags = save_fmr.flags;
+ e_fmr->hwpage_size = save_fmr.hwpage_size;
+ e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+ e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+ e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+ e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+ e_fmr->acl = save_fmr.acl;
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_FMR;
+ ret = ehca_reg_mr(shca, e_fmr, NULL,
+ (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+ e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+ &tmp_rkey, EHCA_REG_MR);
+ if (ret) {
+ u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+ memcpy(&e_fmr->flags, &(save_mr.flags),
+ sizeof(struct ehca_mr) - offset);
}
ehca_unmap_one_fmr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
- "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
- ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
- rereg_1_hcall, rereg_3_hcall);
+ ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
+ "fmr_max_pages=%x",
+ ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
return ret;
} /* end ehca_unmap_one_fmr() */
@@ -1398,31 +1480,32 @@ int ehca_reg_smr(struct ehca_shca *shca,
int ret = 0;
u64 h_ret;
u32 hipz_acl;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
&hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+ ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
"shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
- "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
e_origmr->ib.ib_mr.lkey);
- ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto ehca_reg_smr_exit0;
}
/* successful registration */
- e_newmr->num_pages = e_origmr->num_pages;
- e_newmr->num_4k = e_origmr->num_4k;
- e_newmr->start = iova_start;
- e_newmr->size = e_origmr->size;
- e_newmr->acl = acl;
+ e_newmr->num_kpages = e_origmr->num_kpages;
+ e_newmr->num_hwpages = e_origmr->num_hwpages;
+ e_newmr->hwpage_size = e_origmr->hwpage_size;
+ e_newmr->start = iova_start;
+ e_newmr->size = e_origmr->size;
+ e_newmr->acl = acl;
e_newmr->ipz_mr_handle = hipzout.handle;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
@@ -1430,13 +1513,97 @@ int ehca_reg_smr(struct ehca_shca *shca,
ehca_reg_smr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
"e_newmr=%p iova_start=%p acl=%x e_pd=%p",
ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
return ret;
} /* end ehca_reg_smr() */
/*----------------------------------------------------------------------*/
+static inline void *ehca_calc_sectbase(int top, int dir, int idx)
+{
+ unsigned long ret = idx;
+ ret |= dir << EHCA_DIR_INDEX_SHIFT;
+ ret |= top << EHCA_TOP_INDEX_SHIFT;
+ return __va(ret << SECTION_SIZE_BITS);
+}
+
+#define ehca_bmap_valid(entry) \
+ ((u64)entry != (u64)EHCA_INVAL_ADDR)
+
+static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
+ struct ehca_shca *shca, struct ehca_mr *mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ u64 h_ret = 0;
+ unsigned long page = 0;
+ u64 rpage = __pa(kpage);
+ int page_count;
+
+ void *sectbase = ehca_calc_sectbase(top, dir, idx);
+ if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
+ ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
+ "hwpage_size does not fit to "
+ "section start address");
+ }
+ page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
+
+ while (page < page_count) {
+ u64 rnum;
+ for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
+ rnum++) {
+ void *pg = sectbase + ((page++) * pginfo->hwpage_size);
+ kpage[rnum] = __pa(pg);
+ }
+
+ h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
+ ehca_encode_hwpage_size(pginfo->hwpage_size),
+ 0, rpage, rnum);
+
+ if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
+ ehca_err(&shca->ib_device, "register_rpage_mr failed");
+ return h_ret;
+ }
+ }
+ return h_ret;
+}
+
+static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
+ struct ehca_shca *shca, struct ehca_mr *mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ u64 hret = H_SUCCESS;
+ int idx;
+
+ for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
+ continue;
+
+ hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
+ pginfo);
+ if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
+ return hret;
+ }
+ return hret;
+}
+
+static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
+ struct ehca_mr *mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ u64 hret = H_SUCCESS;
+ int dir;
+
+ for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
+ continue;
+
+ hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
+ if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
+ return hret;
+ }
+ return hret;
+}
/* register internal max-MR to internal SHCA */
int ehca_reg_internal_maxmr(
@@ -1448,10 +1615,16 @@ int ehca_reg_internal_maxmr(
struct ehca_mr *e_mr;
u64 *iova_start;
u64 size_maxmr;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
struct ib_phys_buf ib_pbuf;
- u32 num_pages_mr;
- u32 num_pages_4k; /* 4k portion "pages" */
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hw_pgsize;
+
+ if (!ehca_bmap) {
+ ret = -EFAULT;
+ goto ehca_reg_internal_maxmr_exit0;
+ }
e_mr = ehca_mr_new();
if (!e_mr) {
@@ -1462,29 +1635,32 @@ int ehca_reg_internal_maxmr(
e_mr->flags |= EHCA_MR_FLAG_MAXMR;
/* register internal max-MR on HCA */
- size_maxmr = (u64)high_memory - PAGE_OFFSET;
- iova_start = (u64*)KERNELBASE;
+ size_maxmr = ehca_mr_len;
+ iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
ib_pbuf.addr = 0;
ib_pbuf.size = size_maxmr;
- num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
-
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.num_phys_buf = 1;
- pginfo.phys_buf_array = &ib_pbuf;
+ num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
+ PAGE_SIZE);
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
+ hw_pgsize);
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.u.phy.num_phys_buf = 1;
+ pginfo.u.phy.phys_buf_array = &ib_pbuf;
ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
+ &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
if (ret) {
ehca_err(&shca->ib_device, "reg of internal max MR failed, "
- "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
- "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
- num_pages_mr, num_pages_4k);
+ "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
+ "num_hwpages=%x", e_mr, iova_start, size_maxmr,
+ num_kpages, num_hwpages);
goto ehca_reg_internal_maxmr_exit1;
}
@@ -1501,7 +1677,7 @@ ehca_reg_internal_maxmr_exit1:
ehca_mr_delete(e_mr);
ehca_reg_internal_maxmr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
ret, shca, e_pd, e_maxmr);
return ret;
} /* end ehca_reg_internal_maxmr() */
@@ -1519,28 +1695,29 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
u64 h_ret;
struct ehca_mr *e_origmr = shca->maxmr;
u32 hipz_acl;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
&hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
- "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
+ "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
h_ret, e_origmr, shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
e_origmr->ib.ib_mr.lkey);
- return ehca_mrmw_map_hrc_reg_smr(h_ret);
+ return ehca2ib_return_code(h_ret);
}
/* successful registration */
- e_newmr->num_pages = e_origmr->num_pages;
- e_newmr->num_4k = e_origmr->num_4k;
- e_newmr->start = iova_start;
- e_newmr->size = e_origmr->size;
- e_newmr->acl = acl;
+ e_newmr->num_kpages = e_origmr->num_kpages;
+ e_newmr->num_hwpages = e_origmr->num_hwpages;
+ e_newmr->hwpage_size = e_origmr->hwpage_size;
+ e_newmr->start = iova_start;
+ e_newmr->size = e_origmr->size;
+ e_newmr->acl = acl;
e_newmr->ipz_mr_handle = hipzout.handle;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
@@ -1568,7 +1745,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
if (ret) {
ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
- "ret=%x e_maxmr=%p shca=%p lkey=%x",
+ "ret=%i e_maxmr=%p shca=%p lkey=%x",
ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
shca->maxmr = e_maxmr;
goto ehca_dereg_internal_maxmr_exit0;
@@ -1578,7 +1755,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
ehca_dereg_internal_maxmr_exit0:
if (ret)
- ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
+ ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
ret, shca, shca->maxmr);
return ret;
} /* end ehca_dereg_internal_maxmr() */
@@ -1605,28 +1782,28 @@ int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
/* check first buffer */
if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
- "pbuf->addr=%lx pbuf->size=%lx",
+ "pbuf->addr=%llx pbuf->size=%llx",
iova_start, pbuf->addr, pbuf->size);
return -EINVAL;
}
if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
(num_phys_buf > 1)) {
- ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
- "pbuf->size=%lx", pbuf->addr, pbuf->size);
+ ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
+ "pbuf->size=%llx", pbuf->addr, pbuf->size);
return -EINVAL;
}
for (i = 0; i < num_phys_buf; i++) {
if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
- ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
- "pbuf->size=%lx",
+ ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
+ "pbuf->size=%llx",
i, pbuf->addr, pbuf->size);
return -EINVAL;
}
if (((i > 0) && /* not 1st */
(i < (num_phys_buf - 1)) && /* not last */
(pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
- ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+ ehca_gen_err("bad size, i=%x pbuf->size=%llx",
i, pbuf->size);
return -EINVAL;
}
@@ -1659,7 +1836,7 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
page = page_list;
for (i = 0; i < list_len; i++) {
if (*page % e_fmr->fmr_page_size) {
- ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+ ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
"fmr_page_size=%x", i, *page, page, e_fmr,
e_fmr->fmr_page_size);
return -EINVAL;
@@ -1672,299 +1849,295 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
/*----------------------------------------------------------------------*/
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
- u32 number,
- u64 *kpage)
+/* PAGE_SIZE >= pginfo->hwpage_size */
+static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
- struct ib_phys_buf *pbuf;
- u64 *fmrlist;
- u64 num4k, pgaddr, offs4k;
- u32 i = 0;
+ u64 pgaddr;
u32 j = 0;
+ int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
+ pgaddr = page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT;
+ *kpage = pgaddr + (pginfo->next_hwpage *
+ pginfo->hwpage_size);
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx "
+ "sg_dma_address=%llx "
+ "entry=%llx next_hwpage=%llx",
+ pgaddr, (u64)sg_dma_address(*sg),
+ pginfo->u.usr.next_nmap,
+ pginfo->next_hwpage);
+ return -EFAULT;
+ }
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ kpage++;
+ if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.usr.next_nmap)++;
+ pginfo->next_hwpage = 0;
+ *sg = sg_next(*sg);
+ }
+ j++;
+ if (j >= number)
+ break;
+ }
- if (pginfo->type == EHCA_MR_PGI_PHYS) {
- /* loop over desired phys_buf_array entries */
- while (i < number) {
- pbuf = pginfo->phys_buf_array + pginfo->next_buf;
- num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
- offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
- while (pginfo->next_4k < offs4k + num4k) {
- /* sanity check */
- if ((pginfo->page_cnt >= pginfo->num_pages) ||
- (pginfo->page_4k_cnt >= pginfo->num_4k)) {
- ehca_gen_err("page_cnt >= num_pages, "
- "page_cnt=%lx "
- "num_pages=%lx "
- "page_4k_cnt=%lx "
- "num_4k=%lx i=%x",
- pginfo->page_cnt,
- pginfo->num_pages,
- pginfo->page_4k_cnt,
- pginfo->num_4k, i);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
- }
- *kpage = phys_to_abs(
- (pbuf->addr & EHCA_PAGEMASK)
- + (pginfo->next_4k * EHCA_PAGESIZE));
- if ( !(*kpage) && pbuf->addr ) {
- ehca_gen_err("pbuf->addr=%lx "
- "pbuf->size=%lx "
- "next_4k=%lx", pbuf->addr,
- pbuf->size,
- pginfo->next_4k);
+ return ret;
+}
+
+/*
+ * check given pages for contiguous layout
+ * last page addr is returned in prev_pgaddr for further check
+ */
+static int ehca_check_kpages_per_ate(struct scatterlist **sg,
+ int num_pages,
+ u64 *prev_pgaddr)
+{
+ for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
+ u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
+ if (ehca_debug_level >= 3)
+ ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
+ *(u64 *)__va(pgaddr));
+ if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
+ ehca_gen_err("uncontiguous page found pgaddr=%llx "
+ "prev_pgaddr=%llx entries_left_in_hwpage=%x",
+ pgaddr, *prev_pgaddr, num_pages);
+ return -EINVAL;
+ }
+ *prev_pgaddr = pgaddr;
+ }
+ return 0;
+}
+
+/* PAGE_SIZE < pginfo->hwpage_size */
+static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret = 0;
+ u64 pgaddr, prev_pgaddr;
+ u32 j = 0;
+ int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
+ int nr_kpages = kpages_per_hwpage;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
+
+ if (nr_kpages == kpages_per_hwpage) {
+ pgaddr = (page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT);
+ *kpage = pgaddr;
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx entry=%llx",
+ pgaddr, pginfo->u.usr.next_nmap);
+ ret = -EFAULT;
+ return ret;
+ }
+ /*
+ * The first page in a hwpage must be aligned;
+ * the first MR page is exempt from this rule.
+ */
+ if (pgaddr & (pginfo->hwpage_size - 1)) {
+ if (pginfo->hwpage_cnt) {
+ ehca_gen_err(
+ "invalid alignment "
+ "pgaddr=%llx entry=%llx "
+ "mr_pgsize=%llx",
+ pgaddr, pginfo->u.usr.next_nmap,
+ pginfo->hwpage_size);
ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
+ return ret;
}
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k %
- (PAGE_SIZE / EHCA_PAGESIZE) == 0)
- (pginfo->page_cnt)++;
- kpage++;
- i++;
- if (i >= number) break;
+ /* first MR page */
+ pginfo->kpage_cnt =
+ (pgaddr &
+ (pginfo->hwpage_size - 1)) >>
+ PAGE_SHIFT;
+ nr_kpages -= pginfo->kpage_cnt;
+ *kpage = pgaddr &
+ ~(pginfo->hwpage_size - 1);
}
- if (pginfo->next_4k >= offs4k + num4k) {
- (pginfo->next_buf)++;
- pginfo->next_4k = 0;
+ if (ehca_debug_level >= 3) {
+ u64 val = *(u64 *)__va(pgaddr);
+ ehca_gen_dbg("kpage=%llx page=%llx "
+ "value=%016llx",
+ *kpage, pgaddr, val);
}
+ prev_pgaddr = pgaddr;
+ *sg = sg_next(*sg);
+ pginfo->kpage_cnt++;
+ pginfo->u.usr.next_nmap++;
+ nr_kpages--;
+ if (!nr_kpages)
+ goto next_kpage;
+ continue;
}
- } else if (pginfo->type == EHCA_MR_PGI_USER) {
- /* loop over desired chunk entries */
- chunk = pginfo->next_chunk;
- prev_chunk = pginfo->next_chunk;
- list_for_each_entry_continue(chunk,
- (&(pginfo->region->chunk_list)),
- list) {
- for (i = pginfo->next_nmap; i < chunk->nmap; ) {
- pgaddr = ( page_to_pfn(chunk->page_list[i].page)
- << PAGE_SHIFT );
- *kpage = phys_to_abs(pgaddr +
- (pginfo->next_4k *
- EHCA_PAGESIZE));
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%lx "
- "chunk->page_list[i]=%lx "
- "i=%x next_4k=%lx mr=%p",
- pgaddr,
- (u64)sg_dma_address(
- &chunk->
- page_list[i]),
- i, pginfo->next_4k, e_mr);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
- }
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- kpage++;
- if (pginfo->next_4k %
- (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_nmap)++;
- pginfo->next_4k = 0;
- i++;
- }
- j++;
- if (j >= number) break;
+
+ ret = ehca_check_kpages_per_ate(sg, nr_kpages,
+ &prev_pgaddr);
+ if (ret)
+ return ret;
+ pginfo->kpage_cnt += nr_kpages;
+ pginfo->u.usr.next_nmap += nr_kpages;
+
+next_kpage:
+ nr_kpages = kpages_per_hwpage;
+ (pginfo->hwpage_cnt)++;
+ kpage++;
+ j++;
+ if (j >= number)
+ break;
+ }
+
+ return ret;
+}
+
+static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
+ u32 number, u64 *kpage)
+{
+ int ret = 0;
+ struct ib_phys_buf *pbuf;
+ u64 num_hw, offs_hw;
+ u32 i = 0;
+
+ /* loop over desired phys_buf_array entries */
+ while (i < number) {
+ pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
+ num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
+ pbuf->size, pginfo->hwpage_size);
+ offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
+ pginfo->hwpage_size;
+ while (pginfo->next_hwpage < offs_hw + num_hw) {
+ /* sanity check */
+ if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
+ (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
+ ehca_gen_err("kpage_cnt >= num_kpages, "
+ "kpage_cnt=%llx num_kpages=%llx "
+ "hwpage_cnt=%llx "
+ "num_hwpages=%llx i=%x",
+ pginfo->kpage_cnt,
+ pginfo->num_kpages,
+ pginfo->hwpage_cnt,
+ pginfo->num_hwpages, i);
+ return -EFAULT;
}
- if ((pginfo->next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->next_nmap >= chunk->nmap) {
- pginfo->next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
- break;
- else
- prev_chunk = chunk;
- }
- pginfo->next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->region->chunk_list)),
- list);
- } else if (pginfo->type == EHCA_MR_PGI_FMR) {
- /* loop over desired page_list entries */
- fmrlist = pginfo->page_list + pginfo->next_listelem;
- for (i = 0; i < number; i++) {
- *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
- pginfo->next_4k * EHCA_PAGESIZE);
- if ( !(*kpage) ) {
- ehca_gen_err("*fmrlist=%lx fmrlist=%p "
- "next_listelem=%lx next_4k=%lx",
- *fmrlist, fmrlist,
- pginfo->next_listelem,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
+ *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size);
+ if ( !(*kpage) && pbuf->addr ) {
+ ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
+ "next_hwpage=%llx", pbuf->addr,
+ pbuf->size, pginfo->next_hwpage);
+ return -EFAULT;
}
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ if (PAGE_SIZE >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (PAGE_SIZE / pginfo->hwpage_size) == 0)
+ (pginfo->kpage_cnt)++;
+ } else
+ pginfo->kpage_cnt += pginfo->hwpage_size /
+ PAGE_SIZE;
kpage++;
- if (pginfo->next_4k %
- (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_listelem)++;
- fmrlist++;
- pginfo->next_4k = 0;
- }
+ i++;
+ if (i >= number) break;
+ }
+ if (pginfo->next_hwpage >= offs_hw + num_hw) {
+ (pginfo->u.phy.next_buf)++;
+ pginfo->next_hwpage = 0;
}
- } else {
- ehca_gen_err("bad pginfo->type=%x", pginfo->type);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
}
-
-ehca_set_pagebuf_exit0:
- if (ret)
- ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
- "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
- "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
- "next_listelem=%lx region=%p next_chunk=%p "
- "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
- pginfo->num_pages, pginfo->num_4k,
- pginfo->next_buf, pginfo->next_4k, number, kpage,
- pginfo->page_cnt, pginfo->page_4k_cnt, i,
- pginfo->next_listelem, pginfo->region,
- pginfo->next_chunk, pginfo->next_nmap);
return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
+}
-/* setup 1 page from page info page buffer */
-int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
- u64 *rpage)
+static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
+ u32 number, u64 *kpage)
{
int ret = 0;
- struct ib_phys_buf *tmp_pbuf;
u64 *fmrlist;
- struct ib_umem_chunk *chunk;
- struct ib_umem_chunk *prev_chunk;
- u64 pgaddr, num4k, offs4k;
-
- if (pginfo->type == EHCA_MR_PGI_PHYS) {
- /* sanity check */
- if ((pginfo->page_cnt >= pginfo->num_pages) ||
- (pginfo->page_4k_cnt >= pginfo->num_4k)) {
- ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
- "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
- pginfo->page_cnt, pginfo->num_pages,
- pginfo->page_4k_cnt, pginfo->num_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
- }
- tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
- num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
- offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
- *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
- (pginfo->next_4k * EHCA_PAGESIZE));
- if ( !(*rpage) && tmp_pbuf->addr ) {
- ehca_gen_err("tmp_pbuf->addr=%lx"
- " tmp_pbuf->size=%lx next_4k=%lx",
- tmp_pbuf->addr, tmp_pbuf->size,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
- }
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
- (pginfo->page_cnt)++;
- if (pginfo->next_4k >= offs4k + num4k) {
- (pginfo->next_buf)++;
- pginfo->next_4k = 0;
+ u32 i;
+
+ /* loop over desired page_list entries */
+ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
+ for (i = 0; i < number; i++) {
+ *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
+ pginfo->next_hwpage * pginfo->hwpage_size;
+ if ( !(*kpage) ) {
+ ehca_gen_err("*fmrlist=%llx fmrlist=%p "
+ "next_listelem=%llx next_hwpage=%llx",
+ *fmrlist, fmrlist,
+ pginfo->u.fmr.next_listelem,
+ pginfo->next_hwpage);
+ return -EFAULT;
}
- } else if (pginfo->type == EHCA_MR_PGI_USER) {
- chunk = pginfo->next_chunk;
- prev_chunk = pginfo->next_chunk;
- list_for_each_entry_continue(chunk,
- (&(pginfo->region->chunk_list)),
- list) {
- pgaddr = ( page_to_pfn(chunk->page_list[
- pginfo->next_nmap].page)
- << PAGE_SHIFT);
- *rpage = phys_to_abs(pgaddr +
- (pginfo->next_4k * EHCA_PAGESIZE));
- if ( !(*rpage) ) {
- ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
- " next_nmap=%lx next_4k=%lx mr=%p",
- pgaddr, (u64)sg_dma_address(
- &chunk->page_list[
- pginfo->
- next_nmap]),
- pginfo->next_nmap, pginfo->next_4k,
- e_mr);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
- }
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k %
- (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_nmap)++;
- pginfo->next_4k = 0;
- }
- if (pginfo->next_nmap >= chunk->nmap) {
- pginfo->next_nmap = 0;
- prev_chunk = chunk;
+ (pginfo->hwpage_cnt)++;
+ if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (pginfo->u.fmr.fmr_pgsize /
+ pginfo->hwpage_size) == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.fmr.next_listelem)++;
+ fmrlist++;
+ pginfo->next_hwpage = 0;
+ } else
+ (pginfo->next_hwpage)++;
+ } else {
+ unsigned int cnt_per_hwpage = pginfo->hwpage_size /
+ pginfo->u.fmr.fmr_pgsize;
+ unsigned int j;
+ u64 prev = *kpage;
+ /* check if adrs are contiguous */
+ for (j = 1; j < cnt_per_hwpage; j++) {
+ u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
+ if (prev + pginfo->u.fmr.fmr_pgsize != p) {
+ ehca_gen_err("uncontiguous fmr pages "
+ "found prev=%llx p=%llx "
+ "idx=%x", prev, p, i + j);
+ return -EINVAL;
+ }
+ prev = p;
}
- break;
- }
- pginfo->next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->region->chunk_list)),
- list);
- } else if (pginfo->type == EHCA_MR_PGI_FMR) {
- fmrlist = pginfo->page_list + pginfo->next_listelem;
- *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
- pginfo->next_4k * EHCA_PAGESIZE);
- if ( !(*rpage) ) {
- ehca_gen_err("*fmrlist=%lx fmrlist=%p "
- "next_listelem=%lx next_4k=%lx",
- *fmrlist, fmrlist, pginfo->next_listelem,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
+ pginfo->kpage_cnt += cnt_per_hwpage;
+ pginfo->u.fmr.next_listelem += cnt_per_hwpage;
+ fmrlist += cnt_per_hwpage;
}
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k %
- (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_listelem)++;
- pginfo->next_4k = 0;
- }
- } else {
+ kpage++;
+ }
+ return ret;
+}
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret;
+
+ switch (pginfo->type) {
+ case EHCA_MR_PGI_PHYS:
+ ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
+ break;
+ case EHCA_MR_PGI_USER:
+ ret = PAGE_SIZE >= pginfo->hwpage_size ?
+ ehca_set_pagebuf_user1(pginfo, number, kpage) :
+ ehca_set_pagebuf_user2(pginfo, number, kpage);
+ break;
+ case EHCA_MR_PGI_FMR:
+ ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
+ break;
+ default:
ehca_gen_err("bad pginfo->type=%x", pginfo->type);
ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
+ break;
}
-
-ehca_set_pagebuf_1_exit0:
- if (ret)
- ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
- "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
- "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
- "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
- pginfo, pginfo->type, pginfo->num_pages,
- pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
- rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
- pginfo->next_listelem, pginfo->region,
- pginfo->next_chunk, pginfo->next_nmap);
return ret;
-} /* end ehca_set_pagebuf_1() */
+} /* end ehca_set_pagebuf() */
/*----------------------------------------------------------------------*/
@@ -1976,8 +2149,8 @@ int ehca_mr_is_maxmr(u64 size,
u64 *iova_start)
{
/* a MR is treated as max-MR only if it fits following: */
- if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
- (iova_start == (void*)KERNELBASE)) {
+ if ((size == ehca_mr_len) &&
+ (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
ehca_gen_dbg("this is a max-MR");
return 1;
} else
@@ -2006,9 +2179,9 @@ void ehca_mrmw_map_acl(int ib_acl,
/*----------------------------------------------------------------------*/
/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
+void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
{
- return; /* HCA supports only 4k */
+ *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
} /* end ehca_mrmw_set_pgsize_hipz_acl() */
/*----------------------------------------------------------------------*/
@@ -2037,225 +2210,384 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
/*----------------------------------------------------------------------*/
/*
- * map HIPZ rc to IB retcodes for MR/MW allocations
- * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
+ * MR destructor and constructor
+ * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
+ * except struct ib_mr and spinlock
*/
-int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
+void ehca_mr_deletenew(struct ehca_mr *mr)
{
- switch (hipz_rc) {
- case H_SUCCESS: /* successful completion */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RT_PARM: /* invalid resource type */
- case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
- case H_MLENGTH_PARM: /* invalid memory length */
- case H_MEM_ACCESS_PARM: /* invalid access controls */
- case H_CONSTRAINED: /* resource constraint */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+ mr->flags = 0;
+ mr->num_kpages = 0;
+ mr->num_hwpages = 0;
+ mr->acl = 0;
+ mr->start = NULL;
+ mr->fmr_page_size = 0;
+ mr->fmr_max_pages = 0;
+ mr->fmr_max_maps = 0;
+ mr->fmr_map_cnt = 0;
+ memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
+ memset(&mr->galpas, 0, sizeof(mr->galpas));
+} /* end ehca_mr_deletenew() */
+
+int ehca_init_mrmw_cache(void)
+{
+ mr_cache = kmem_cache_create("ehca_cache_mr",
+ sizeof(struct ehca_mr), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!mr_cache)
+ return -ENOMEM;
+ mw_cache = kmem_cache_create("ehca_cache_mw",
+ sizeof(struct ehca_mw), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!mw_cache) {
+ kmem_cache_destroy(mr_cache);
+ mr_cache = NULL;
+ return -ENOMEM;
}
-} /* end ehca_mrmw_map_hrc_alloc() */
+ return 0;
+}
-/*----------------------------------------------------------------------*/
+void ehca_cleanup_mrmw_cache(void)
+{
+ if (mr_cache)
+ kmem_cache_destroy(mr_cache);
+ if (mw_cache)
+ kmem_cache_destroy(mw_cache);
+}
-/*
- * map HIPZ rc to IB retcodes for MR register rpage
- * Used for hipz_h_register_rpage_mr at registering last page
- */
-int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
+static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
+ int dir)
{
- switch (hipz_rc) {
- case H_SUCCESS: /* registration complete */
- return 0;
- case H_PAGE_REGISTERED: /* page registered */
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
-/* case H_QT_PARM: invalid queue type */
- case H_PARAMETER: /*
- * invalid logical address,
- * or count zero or greater 512
- */
- case H_TABLE_FULL: /* page table full */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+ if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
+ ehca_top_bmap->dir[dir] =
+ kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
+ if (!ehca_top_bmap->dir[dir])
+ return -ENOMEM;
+ /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
+ memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
}
-} /* end ehca_mrmw_map_hrc_rrpg_last() */
+ return 0;
+}
-/*----------------------------------------------------------------------*/
+static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
+{
+ if (!ehca_bmap_valid(ehca_bmap->top[top])) {
+ ehca_bmap->top[top] =
+ kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
+ if (!ehca_bmap->top[top])
+ return -ENOMEM;
+ /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
+ memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
+ }
+ return ehca_init_top_bmap(ehca_bmap->top[top], dir);
+}
-/*
- * map HIPZ rc to IB retcodes for MR register rpage
- * Used for hipz_h_register_rpage_mr at registering one page, but not last page
- */
-int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
+static inline int ehca_calc_index(unsigned long i, unsigned long s)
{
- switch (hipz_rc) {
- case H_PAGE_REGISTERED: /* page registered */
- return 0;
- case H_SUCCESS: /* registration complete */
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
-/* case H_QT_PARM: invalid queue type */
- case H_PARAMETER: /*
- * invalid logical address,
- * or count zero or greater 512
- */
- case H_TABLE_FULL: /* page table full */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+ return (i >> s) & EHCA_INDEX_MASK;
+}
+
+void ehca_destroy_busmap(void)
+{
+ int top, dir;
+
+ if (!ehca_bmap)
+ return;
+
+ for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]))
+ continue;
+ for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
+ continue;
+
+ kfree(ehca_bmap->top[top]->dir[dir]);
+ }
+
+ kfree(ehca_bmap->top[top]);
}
-} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
-/*----------------------------------------------------------------------*/
+ kfree(ehca_bmap);
+ ehca_bmap = NULL;
+}
-/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
-int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
+static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
{
- switch (hipz_rc) {
- case H_SUCCESS: /* successful completion */
+ unsigned long i, start_section, end_section;
+ int top, dir, idx;
+
+ if (!nr_pages)
return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+
+ if (!ehca_bmap) {
+ ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
+ if (!ehca_bmap)
+ return -ENOMEM;
+ /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
+ memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
}
-} /* end ehca_mrmw_map_hrc_query_mr() */
-/*----------------------------------------------------------------------*/
-/*----------------------------------------------------------------------*/
+ start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
+ end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
+ for (i = start_section; i < end_section; i++) {
+ int ret;
+ top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
+ dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
+ idx = i & EHCA_INDEX_MASK;
-/*
- * map HIPZ rc to IB retcodes for freeing MR resource
- * Used for hipz_h_free_resource_mr
- */
-int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
+ ret = ehca_init_bmap(ehca_bmap, top, dir);
+ if (ret) {
+ ehca_destroy_busmap();
+ return ret;
+ }
+ ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
+ ehca_mr_len += EHCA_SECTSIZE;
+ }
+ return 0;
+}
+
+static int ehca_is_hugepage(unsigned long pfn)
{
- switch (hipz_rc) {
- case H_SUCCESS: /* resource freed */
+ int page_order;
+
+ if (pfn & EHCA_HUGEPAGE_PFN_MASK)
return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- case H_R_STATE: /* invalid resource state */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_RESOURCE: /* Resource in use */
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+
+ page_order = compound_order(pfn_to_page(pfn));
+ if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
+ return 0;
+
+ return 1;
+}
+
+static int ehca_create_busmap_callback(unsigned long initial_pfn,
+ unsigned long total_nr_pages, void *arg)
+{
+ int ret;
+ unsigned long pfn, start_pfn, end_pfn, nr_pages;
+
+ if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
+ return ehca_update_busmap(initial_pfn, total_nr_pages);
+
+ /* Given chunk is >= 16GB -> check for hugepages */
+ start_pfn = initial_pfn;
+ end_pfn = initial_pfn + total_nr_pages;
+ pfn = start_pfn;
+
+ while (pfn < end_pfn) {
+ if (ehca_is_hugepage(pfn)) {
+ /* Add mem found in front of the hugepage */
+ nr_pages = pfn - start_pfn;
+ ret = ehca_update_busmap(start_pfn, nr_pages);
+ if (ret)
+ return ret;
+ /* Skip the hugepage */
+ pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
+ start_pfn = pfn;
+ } else
+ pfn += (EHCA_SECTSIZE / PAGE_SIZE);
}
-} /* end ehca_mrmw_map_hrc_free_mr() */
-/*----------------------------------------------------------------------*/
+ /* Add mem found behind the hugepage(s) */
+ nr_pages = pfn - start_pfn;
+ return ehca_update_busmap(start_pfn, nr_pages);
+}
-/*
- * map HIPZ rc to IB retcodes for freeing MW resource
- * Used for hipz_h_free_resource_mw
- */
-int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
+int ehca_create_busmap(void)
{
- switch (hipz_rc) {
- case H_SUCCESS: /* resource freed */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- case H_R_STATE: /* invalid resource state */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_RESOURCE: /* Resource in use */
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+ int ret;
+
+ ehca_mr_len = 0;
+ ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+ ehca_create_busmap_callback);
+ return ret;
+}
+
+static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ int top;
+ u64 hret, *kpage;
+
+ kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!kpage) {
+ ehca_err(&shca->ib_device, "kpage alloc failed");
+ return -ENOMEM;
+ }
+ for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]))
+ continue;
+ hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
+ if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
+ break;
}
-} /* end ehca_mrmw_map_hrc_free_mw() */
-/*----------------------------------------------------------------------*/
+ ehca_free_fw_ctrlblock(kpage);
-/*
- * map HIPZ rc to IB retcodes for SMR registrations
- * Used for hipz_h_register_smr.
- */
-int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
+ if (hret == H_SUCCESS)
+ return 0; /* Everything is fine */
+ else {
+ ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
+ "h_ret=%lli e_mr=%p top=%x lkey=%x "
+ "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
+ e_mr->ib.ib_mr.lkey,
+ shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle);
+ return ehca2ib_return_code(hret);
+ }
+}
+
+static u64 ehca_map_vaddr(void *caddr)
{
- switch (hipz_rc) {
- case H_SUCCESS: /* successful completion */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- case H_MEM_PARM: /* invalid MR virtual address */
- case H_MEM_ACCESS_PARM: /* invalid access controls */
- case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
+ int top, dir, idx;
+ unsigned long abs_addr, offset;
+ u64 entry;
+
+ if (!ehca_bmap)
+ return EHCA_INVAL_ADDR;
+
+ abs_addr = __pa(caddr);
+ top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
+ if (!ehca_bmap_valid(ehca_bmap->top[top]))
+ return EHCA_INVAL_ADDR;
+
+ dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
+ return EHCA_INVAL_ADDR;
+
+ idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
+
+ entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
+ if (ehca_bmap_valid(entry)) {
+ offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
+ return entry | offset;
+ } else
+ return EHCA_INVAL_ADDR;
+}
+
+static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == EHCA_INVAL_ADDR;
+}
+
+static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
+ size_t size, enum dma_data_direction direction)
+{
+ if (cpu_addr)
+ return ehca_map_vaddr(cpu_addr);
+ else
+ return EHCA_INVAL_ADDR;
+}
+
+static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ /* This is only a stub; nothing to be done here */
+}
+
+static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ u64 addr;
+
+ if (offset + size > PAGE_SIZE)
+ return EHCA_INVAL_ADDR;
+
+ addr = ehca_map_vaddr(page_address(page));
+ if (!ehca_dma_mapping_error(dev, addr))
+ addr += offset;
+
+ return addr;
+}
+
+static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ /* This is only a stub; nothing to be done here */
+}
+
+static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ u64 addr;
+ addr = ehca_map_vaddr(sg_virt(sg));
+ if (ehca_dma_mapping_error(dev, addr))
+ return 0;
+
+ sg->dma_address = addr;
+ sg->dma_length = sg->length;
}
-} /* end ehca_mrmw_map_hrc_reg_smr() */
+ return nents;
+}
-/*----------------------------------------------------------------------*/
+static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ /* This is only a stub; nothing to be done here */
+}
-/*
- * MR destructor and constructor
- * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
- * except struct ib_mr and spinlock
- */
-void ehca_mr_deletenew(struct ehca_mr *mr)
+static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
{
- mr->flags = 0;
- mr->num_pages = 0;
- mr->num_4k = 0;
- mr->acl = 0;
- mr->start = NULL;
- mr->fmr_page_size = 0;
- mr->fmr_max_pages = 0;
- mr->fmr_max_maps = 0;
- mr->fmr_map_cnt = 0;
- memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
- memset(&mr->galpas, 0, sizeof(mr->galpas));
- mr->nr_of_pages = 0;
- mr->pagearray = NULL;
-} /* end ehca_mr_deletenew() */
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+}
-int ehca_init_mrmw_cache(void)
+static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
{
- mr_cache = kmem_cache_create("ehca_cache_mr",
- sizeof(struct ehca_mr), 0,
- SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (!mr_cache)
- return -ENOMEM;
- mw_cache = kmem_cache_create("ehca_cache_mw",
- sizeof(struct ehca_mw), 0,
- SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (!mw_cache) {
- kmem_cache_destroy(mr_cache);
- mr_cache = NULL;
- return -ENOMEM;
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+}
+
+static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+ u64 dma_addr;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p) {
+ addr = page_address(p);
+ dma_addr = ehca_map_vaddr(addr);
+ if (ehca_dma_mapping_error(dev, dma_addr)) {
+ free_pages((unsigned long)addr, get_order(size));
+ return NULL;
+ }
+ if (dma_handle)
+ *dma_handle = dma_addr;
+ return addr;
}
- return 0;
+ return NULL;
}
-void ehca_cleanup_mrmw_cache(void)
+static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
{
- if (mr_cache)
- kmem_cache_destroy(mr_cache);
- if (mw_cache)
- kmem_cache_destroy(mw_cache);
+ if (cpu_addr && size)
+ free_pages((unsigned long)cpu_addr, get_order(size));
}
+
+
+struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
+ .mapping_error = ehca_dma_mapping_error,
+ .map_single = ehca_dma_map_single,
+ .unmap_single = ehca_dma_unmap_single,
+ .map_page = ehca_dma_map_page,
+ .unmap_page = ehca_dma_unmap_page,
+ .map_sg = ehca_dma_map_sg,
+ .unmap_sg = ehca_dma_unmap_sg,
+ .sync_single_for_cpu = ehca_dma_sync_single_for_cpu,
+ .sync_single_for_device = ehca_dma_sync_single_for_device,
+ .alloc_coherent = ehca_dma_alloc_coherent,
+ .free_coherent = ehca_dma_free_coherent,
+};
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
index d936e40a574..50d8b51306d 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.h
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -42,6 +42,11 @@
#ifndef _EHCA_MRMW_H_
#define _EHCA_MRMW_H_
+enum ehca_reg_type {
+ EHCA_REG_MR,
+ EHCA_REG_BUSMAP_MR
+};
+
int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_mr *e_mr,
u64 *iova_start,
@@ -50,7 +55,8 @@ int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_pd *e_pd,
struct ehca_mr_pginfo *pginfo,
u32 *lkey,
- u32 *rkey);
+ u32 *rkey,
+ enum ehca_reg_type reg_type);
int ehca_reg_mr_rpages(struct ehca_shca *shca,
struct ehca_mr *e_mr,
@@ -101,40 +107,26 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
int list_len);
-int ehca_set_pagebuf(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
+int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
u32 number,
u64 *kpage);
-int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
- u64 *rpage);
-
int ehca_mr_is_maxmr(u64 size,
u64 *iova_start);
void ehca_mrmw_map_acl(int ib_acl,
u32 *hipz_acl);
-void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
int *ib_acl);
-int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
+void ehca_mr_deletenew(struct ehca_mr *mr);
-int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
+int ehca_create_busmap(void);
-void ehca_mr_deletenew(struct ehca_mr *mr);
+void ehca_destroy_busmap(void);
+extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
#endif /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index 2c3cdc6f7b3..351577a6670 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -38,7 +38,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <asm/current.h>
+#include <linux/slab.h>
#include "ehca_tools.h"
#include "ehca_iverbs.h"
@@ -49,16 +49,20 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
struct ib_ucontext *context, struct ib_udata *udata)
{
struct ehca_pd *pd;
+ int i;
- pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+ pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
if (!pd) {
ehca_err(device, "device=%p context=%p out of memory",
device, context);
return ERR_PTR(-ENOMEM);
}
- memset(pd, 0, sizeof(struct ehca_pd));
- pd->ownpid = current->tgid;
+ for (i = 0; i < 2; i++) {
+ INIT_LIST_HEAD(&pd->free[i]);
+ INIT_LIST_HEAD(&pd->full[i]);
+ }
+ mutex_init(&pd->lock);
/*
* Kernel PD: when device = -1, 0
@@ -80,18 +84,24 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
int ehca_dealloc_pd(struct ib_pd *pd)
{
- u32 cur_pid = current->tgid;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+ int i, leftovers = 0;
+ struct ipz_small_queue_page *page, *tmp;
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
+ for (i = 0; i < 2; i++) {
+ list_splice(&my_pd->full[i], &my_pd->free[i]);
+ list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
+ leftovers = 1;
+ free_page(page->page);
+ kmem_cache_free(small_qp_cache, page);
+ }
}
- kmem_cache_free(pd_cache,
- container_of(pd, struct ehca_pd, ib_pd));
+ if (leftovers)
+ ehca_warn(pd->device,
+ "Some small queue pages were not freed");
+
+ kmem_cache_free(pd_cache, my_pd);
return 0;
}
@@ -101,7 +111,7 @@ int ehca_init_pd_cache(void)
pd_cache = kmem_cache_create("ehca_cache_pd",
sizeof(struct ehca_pd), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!pd_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
index 8707d297ce4..90c4efa6758 100644
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -46,20 +46,20 @@
#include "ehca_tools.h"
-/* virtual scatter gather entry to specify remote adresses with length */
+/* virtual scatter gather entry to specify remote addresses with length */
struct ehca_vsgentry {
u64 vaddr;
u32 lkey;
u32 length;
};
-#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7)
-#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3)
-#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12)
-#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31)
-#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47)
-#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55)
-#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63)
+#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7)
+#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3)
+#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12)
+#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31)
+#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47)
+#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55)
+#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63)
/*
* Unreliable Datagram Address Vector Format
@@ -148,7 +148,7 @@ struct ehca_wqe {
u32 immediate_data;
union {
struct {
- u64 remote_virtual_adress;
+ u64 remote_virtual_address;
u32 rkey;
u32 reserved;
u64 atomic_1st_op_dma_len;
@@ -206,13 +206,14 @@ struct ehca_wqe {
};
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
-#define WC_IMM_DATA EHCA_BMASK_IBM(1,1)
-#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2)
-#define WC_SE_BIT EHCA_BMASK_IBM(3,3)
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
+#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1)
+#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2)
+#define WC_SE_BIT EHCA_BMASK_IBM(3, 3)
#define WC_STATUS_ERROR_BIT 0x80000000
#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
#define WC_STATUS_PURGE_BIT 0x10
+#define WC_SEND_RECEIVE_BIT 0x80
struct ehca_cqe {
u64 work_request_id;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 4394123cdbd..2e89356c46f 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -3,7 +3,9 @@
*
* QP functions
*
- * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Authors: Joachim Fenkes <fenkes@de.ibm.com>
+ * Stefan Roscher <stefan.roscher@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
* Hoang-Nam Nguyen <hnguyen@de.ibm.com>
* Reinhard Ernst <rernst@de.ibm.com>
* Heiko J Schick <schickhj@de.ibm.com>
@@ -41,8 +43,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-
-#include <asm/current.h>
+#include <linux/slab.h>
#include "ehca_classes.h"
#include "ehca_tools.h"
@@ -56,9 +57,7 @@ static struct kmem_cache *qp_cache;
/*
* attributes not supported by query qp
*/
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
- IB_QP_MAX_QP_RD_ATOMIC | \
- IB_QP_ACCESS_FLAGS | \
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \
IB_QP_EN_SQD_ASYNC_NOTIFY)
/*
@@ -234,13 +233,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
return index;
}
-enum ehca_service_type {
- ST_RC = 0,
- ST_UC = 1,
- ST_RD = 2,
- ST_UD = 3
-};
-
/*
* ibqptype2servicetype returns hcp service type corresponding to given
* ib qp type used by create_qp()
@@ -259,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
return ST_UD;
case IB_QPT_RAW_IPV6:
return -EINVAL;
- case IB_QPT_RAW_ETY:
+ case IB_QPT_RAW_ETHERTYPE:
return -EINVAL;
default:
ehca_gen_err("Invalid ibqptype=%x", ibqptype);
@@ -268,309 +260,493 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
}
/*
- * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ * init userspace queue info from ipz_queue data
+ */
+static inline void queue2resp(struct ipzu_queue_resp *resp,
+ struct ipz_queue *queue)
+{
+ resp->qe_size = queue->qe_size;
+ resp->act_nr_of_sg = queue->act_nr_of_sg;
+ resp->queue_length = queue->queue_length;
+ resp->pagesize = queue->pagesize;
+ resp->toggle_state = queue->toggle_state;
+ resp->offset = queue->offset;
+}
+
+/*
+ * init_qp_queue initializes/constructs r/squeue and registers queue pages.
*/
-static inline int init_qp_queues(struct ehca_shca *shca,
- struct ehca_qp *my_qp,
- int nr_sq_pages,
- int nr_rq_pages,
- int swqe_size,
- int rwqe_size,
- int nr_send_sges, int nr_receive_sges)
+static inline int init_qp_queue(struct ehca_shca *shca,
+ struct ehca_pd *pd,
+ struct ehca_qp *my_qp,
+ struct ipz_queue *queue,
+ int q_type,
+ u64 expected_hret,
+ struct ehca_alloc_queue_parms *parms,
+ int wqe_size)
{
- int ret, cnt, ipz_rc;
+ int ret, cnt, ipz_rc, nr_q_pages;
void *vpage;
u64 rpage, h_ret;
struct ib_device *ib_dev = &shca->ib_device;
struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
- ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
- nr_sq_pages,
- EHCA_PAGESIZE, swqe_size, nr_send_sges);
- if (!ipz_rc) {
- ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
- ipz_rc);
- return -EBUSY;
+ if (!parms->queue_size)
+ return 0;
+
+ if (parms->is_small) {
+ nr_q_pages = 1;
+ ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
+ 128 << parms->page_size,
+ wqe_size, parms->act_nr_sges, 1);
+ } else {
+ nr_q_pages = parms->queue_size;
+ ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
+ EHCA_PAGESIZE, wqe_size,
+ parms->act_nr_sges, 0);
}
- ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
- nr_rq_pages,
- EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
if (!ipz_rc) {
- ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
+ ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
ipz_rc);
- ret = -EBUSY;
- goto init_qp_queues0;
+ return -EBUSY;
}
- /* register SQ pages */
- for (cnt = 0; cnt < nr_sq_pages; cnt++) {
- vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+
+ /* register queue pages */
+ for (cnt = 0; cnt < nr_q_pages; cnt++) {
+ vpage = ipz_qpageit_get_inc(queue);
if (!vpage) {
- ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+ ehca_err(ib_dev, "ipz_qpageit_get_inc() "
"failed p_vpage= %p", vpage);
ret = -EINVAL;
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
- rpage = virt_to_abs(vpage);
+ rpage = __pa(vpage);
h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
my_qp->ipz_qp_handle,
- &my_qp->pf, 0, 0,
- rpage, 1,
+ NULL, 0, q_type,
+ rpage, parms->is_small ? 0 : 1,
my_qp->galpas.kernel);
- if (h_ret < H_SUCCESS) {
- ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
- " failed rc=%lx", h_ret);
- ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
- }
- }
-
- ipz_qeit_reset(&my_qp->ipz_squeue);
-
- /* register RQ pages */
- for (cnt = 0; cnt < nr_rq_pages; cnt++) {
- vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
- if (!vpage) {
- ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
- "failed p_vpage = %p", vpage);
- ret = -EINVAL;
- goto init_qp_queues1;
- }
-
- rpage = virt_to_abs(vpage);
-
- h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
- my_qp->ipz_qp_handle,
- &my_qp->pf, 0, 1,
- rpage, 1,my_qp->galpas.kernel);
- if (h_ret < H_SUCCESS) {
- ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
- "rc=%lx", h_ret);
- ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
- }
- if (cnt == (nr_rq_pages - 1)) { /* last page! */
- if (h_ret != H_SUCCESS) {
- ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
- "h_ret= %lx ", h_ret);
+ if (cnt == (nr_q_pages - 1)) { /* last page! */
+ if (h_ret != expected_hret) {
+ ehca_err(ib_dev, "hipz_qp_register_rpage() "
+ "h_ret=%lli", h_ret);
ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
if (vpage) {
ehca_err(ib_dev, "ipz_qpageit_get_inc() "
"should not succeed vpage=%p", vpage);
ret = -EINVAL;
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
} else {
if (h_ret != H_PAGE_REGISTERED) {
- ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
- "h_ret= %lx ", h_ret);
+ ehca_err(ib_dev, "hipz_qp_register_rpage() "
+ "h_ret=%lli", h_ret);
ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
}
}
- ipz_qeit_reset(&my_qp->ipz_rqueue);
+ ipz_qeit_reset(queue);
return 0;
-init_qp_queues1:
- ipz_queue_dtor(&my_qp->ipz_rqueue);
-init_qp_queues0:
- ipz_queue_dtor(&my_qp->ipz_squeue);
+init_qp_queue1:
+ ipz_queue_dtor(pd, queue);
return ret;
}
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
{
- static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
- static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
- struct ehca_qp *my_qp;
+ if (is_llqp)
+ return 128 << act_nr_sge;
+ else
+ return offsetof(struct ehca_wqe,
+ u.nud.sg_list[act_nr_sge]);
+}
+
+static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
+ int req_nr_sge, int is_llqp)
+{
+ u32 wqe_size, q_size;
+ int act_nr_sge = req_nr_sge;
+
+ if (!is_llqp)
+ /* round up #SGEs so WQE size is a power of 2 */
+ for (act_nr_sge = 4; act_nr_sge <= 252;
+ act_nr_sge = 4 + 2 * act_nr_sge)
+ if (act_nr_sge >= req_nr_sge)
+ break;
+
+ wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
+ q_size = wqe_size * (queue->max_wr + 1);
+
+ if (q_size <= 512)
+ queue->page_size = 2;
+ else if (q_size <= 1024)
+ queue->page_size = 3;
+ else
+ queue->page_size = 0;
+
+ queue->is_small = (queue->page_size != 0);
+}
+
+/* needs to be called with cq->spinlock held */
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
+{
+ struct list_head *list, *node;
+
+ /* TODO: support low latency QPs */
+ if (qp->ext_type == EQPT_LLQP)
+ return;
+
+ if (on_sq) {
+ list = &qp->send_cq->sqp_err_list;
+ node = &qp->sq_err_node;
+ } else {
+ list = &qp->recv_cq->rqp_err_list;
+ node = &qp->rq_err_node;
+ }
+
+ if (list_empty(node))
+ list_add_tail(node, list);
+
+ return;
+}
+
+static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->spinlock, flags);
+
+ if (!list_empty(node))
+ list_del_init(node);
+
+ spin_unlock_irqrestore(&cq->spinlock, flags);
+}
+
+static void reset_queue_map(struct ehca_queue_map *qmap)
+{
+ int i;
+
+ qmap->tail = qmap->entries - 1;
+ qmap->left_to_poll = 0;
+ qmap->next_wqe_idx = 0;
+ for (i = 0; i < qmap->entries; i++) {
+ qmap->map[i].reported = 1;
+ qmap->map[i].cqe_req = 0;
+ }
+}
+
+/*
+ * Create an ib_qp struct that is either a QP or an SRQ, depending on
+ * the value of the is_srq parameter. If init_attr and srq_init_attr share
+ * fields, the field out of init_attr is used.
+ */
+static struct ehca_qp *internal_create_qp(
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata, int is_srq)
+{
+ struct ehca_qp *my_qp, *my_srq = NULL;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
ib_device);
struct ib_ucontext *context = NULL;
u64 h_ret;
- int max_send_sge, max_recv_sge, ret;
+ int is_llqp = 0, has_srq = 0, is_user = 0;
+ int qp_type, max_send_sge, max_recv_sge, ret;
/* h_call's out parameters */
struct ehca_alloc_qp_parms parms;
- u32 swqe_size = 0, rwqe_size = 0;
- u8 daqp_completion, isdaqp;
+ u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
unsigned long flags;
+ if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
+ ehca_err(pd->device, "Unable to create QP, max number of %i "
+ "QPs reached.", shca->max_num_qps);
+ ehca_err(pd->device, "To increase the maximum number of QPs "
+ "use the number_of_qps module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ if (init_attr->create_flags) {
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ memset(&parms, 0, sizeof(parms));
+ qp_type = init_attr->qp_type;
+
if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
init_attr->sq_sig_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
- /* save daqp completion bits */
- daqp_completion = init_attr->qp_type & 0x60;
- /* save daqp bit */
- isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
- init_attr->qp_type = init_attr->qp_type & 0x1F;
+ /* save LLQP info */
+ if (qp_type & 0x80) {
+ is_llqp = 1;
+ parms.ext_type = EQPT_LLQP;
+ parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
+ }
+ qp_type &= 0x1F;
+ init_attr->qp_type &= 0x1F;
- if (init_attr->qp_type != IB_QPT_UD &&
- init_attr->qp_type != IB_QPT_SMI &&
- init_attr->qp_type != IB_QPT_GSI &&
- init_attr->qp_type != IB_QPT_UC &&
- init_attr->qp_type != IB_QPT_RC) {
- ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
- return ERR_PTR(-EINVAL);
+ /* handle SRQ base QPs */
+ if (init_attr->srq) {
+ my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+ if (qp_type == IB_QPT_UC) {
+ ehca_err(pd->device, "UC with SRQ not supported");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ has_srq = 1;
+ parms.ext_type = EQPT_SRQBASE;
+ parms.srq_qpn = my_srq->real_qp_num;
}
- if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
- && isdaqp) {
- ehca_err(pd->device, "unsupported LL QP Type=%x",
- init_attr->qp_type);
+
+ if (is_llqp && has_srq) {
+ ehca_err(pd->device, "LLQPs can't have an SRQ");
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
- } else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
- (init_attr->cap.max_send_wr > 255 ||
- init_attr->cap.max_recv_wr > 255 )) {
- ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
- "or max_rq_wr=%x for QP Type=%x",
- init_attr->cap.max_send_wr,
- init_attr->cap.max_recv_wr,init_attr->qp_type);
- return ERR_PTR(-EINVAL);
- } else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
- init_attr->cap.max_send_wr > 255) {
- ehca_err(pd->device,
- "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
- init_attr->cap.max_send_wr, init_attr->qp_type);
+ }
+
+ /* handle SRQs */
+ if (is_srq) {
+ parms.ext_type = EQPT_SRQ;
+ parms.srq_limit = srq_init_attr->attr.srq_limit;
+ if (init_attr->cap.max_recv_sge > 3) {
+ ehca_err(pd->device, "no more than three SGEs "
+ "supported for SRQ pd=%p max_sge=%x",
+ pd, init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /* check QP type */
+ if (qp_type != IB_QPT_UD &&
+ qp_type != IB_QPT_UC &&
+ qp_type != IB_QPT_RC &&
+ qp_type != IB_QPT_SMI &&
+ qp_type != IB_QPT_GSI) {
+ ehca_err(pd->device, "wrong QP Type=%x", qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
- if (pd->uobject && udata)
- context = pd->uobject->context;
+ if (is_llqp) {
+ switch (qp_type) {
+ case IB_QPT_RC:
+ if ((init_attr->cap.max_send_wr > 255) ||
+ (init_attr->cap.max_recv_wr > 255)) {
+ ehca_err(pd->device,
+ "Invalid Number of max_sq_wr=%x "
+ "or max_rq_wr=%x for RC LLQP",
+ init_attr->cap.max_send_wr,
+ init_attr->cap.max_recv_wr);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ case IB_QPT_UD:
+ if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
+ ehca_err(pd->device, "UD LLQP not supported "
+ "by this adapter");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-ENOSYS);
+ }
+ if (!(init_attr->cap.max_send_sge <= 5
+ && init_attr->cap.max_send_sge >= 1
+ && init_attr->cap.max_recv_sge <= 5
+ && init_attr->cap.max_recv_sge >= 1)) {
+ ehca_err(pd->device,
+ "Invalid Number of max_send_sge=%x "
+ "or max_recv_sge=%x for UD LLQP",
+ init_attr->cap.max_send_sge,
+ init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ } else if (init_attr->cap.max_send_wr > 255) {
+ ehca_err(pd->device,
+ "Invalid Number of "
+ "max_send_wr=%x for UD QP_TYPE=%x",
+ init_attr->cap.max_send_wr, qp_type);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ default:
+ ehca_err(pd->device, "unsupported LL QP Type=%x",
+ qp_type);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
+ || qp_type == IB_QPT_GSI) ? 250 : 252;
+
+ if (init_attr->cap.max_send_sge > max_sge
+ || init_attr->cap.max_recv_sge > max_sge) {
+ ehca_err(pd->device, "Invalid number of SGEs requested "
+ "send_sge=%x recv_sge=%x max_sge=%x",
+ init_attr->cap.max_send_sge,
+ init_attr->cap.max_recv_sge, max_sge);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ }
- my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+ my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
if (!my_qp) {
ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-ENOMEM);
}
- memset(my_qp, 0, sizeof(struct ehca_qp));
- memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
+ if (pd->uobject && udata) {
+ is_user = 1;
+ context = pd->uobject->context;
+ }
+
+ atomic_set(&my_qp->nr_events, 0);
+ init_waitqueue_head(&my_qp->wait_completion);
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
-
- my_qp->recv_cq =
- container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
- my_qp->send_cq =
- container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
- my_qp->init_attr = *init_attr;
-
- do {
- if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
+ my_qp->qp_type = qp_type;
+ my_qp->ext_type = parms.ext_type;
+ my_qp->state = IB_QPS_RESET;
+
+ if (init_attr->recv_cq)
+ my_qp->recv_cq =
+ container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+ if (init_attr->send_cq)
+ my_qp->send_cq =
+ container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
+
+ idr_preload(GFP_KERNEL);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
+
+ ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
+ if (ret >= 0)
+ my_qp->token = ret;
+
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ idr_preload_end();
+ if (ret < 0) {
+ if (ret == -ENOSPC) {
+ ret = -EINVAL;
+ ehca_err(pd->device, "Invalid number of qp");
+ } else {
ret = -ENOMEM;
- ehca_err(pd->device, "Can't reserve idr resources.");
- goto create_qp_exit0;
+ ehca_err(pd->device, "Can't allocate new idr entry.");
}
-
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
- ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
- } while (ret == -EAGAIN);
-
- if (ret) {
- ret = -ENOMEM;
- ehca_err(pd->device, "Can't allocate new idr entry.");
goto create_qp_exit0;
}
- parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+ if (has_srq)
+ parms.srq_token = my_qp->token;
+
+ parms.servicetype = ibqptype2servicetype(qp_type);
if (parms.servicetype < 0) {
ret = -EINVAL;
- ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
- goto create_qp_exit0;
+ ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
+ goto create_qp_exit1;
}
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- parms.sigtype = HCALL_SIGT_EVERY;
- else
- parms.sigtype = HCALL_SIGT_BY_WQE;
+ /* Always signal by WQE so we can hide circ. WQEs */
+ parms.sigtype = HCALL_SIGT_BY_WQE;
/* UD_AV CIRCUMVENTION */
max_send_sge = init_attr->cap.max_send_sge;
max_recv_sge = init_attr->cap.max_recv_sge;
- if (IB_QPT_UD == init_attr->qp_type ||
- IB_QPT_GSI == init_attr->qp_type ||
- IB_QPT_SMI == init_attr->qp_type) {
+ if (parms.servicetype == ST_UD && !is_llqp) {
max_send_sge += 2;
max_recv_sge += 2;
}
- parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
- parms.daqp_ctrl = isdaqp | daqp_completion;
+ parms.token = my_qp->token;
+ parms.eq_handle = shca->eq.ipz_eq_handle;
parms.pd = my_pd->fw_pd;
- parms.max_recv_sge = max_recv_sge;
- parms.max_send_sge = max_send_sge;
-
- h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
-
+ if (my_qp->send_cq)
+ parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
+ if (my_qp->recv_cq)
+ parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
+
+ parms.squeue.max_wr = init_attr->cap.max_send_wr;
+ parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
+ parms.squeue.max_sge = max_send_sge;
+ parms.rqueue.max_sge = max_recv_sge;
+
+ /* RC QPs need one more SWQE for unsolicited ack circumvention */
+ if (qp_type == IB_QPT_RC)
+ parms.squeue.max_wr++;
+
+ if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
+ if (HAS_SQ(my_qp))
+ ehca_determine_small_queue(
+ &parms.squeue, max_send_sge, is_llqp);
+ if (HAS_RQ(my_qp))
+ ehca_determine_small_queue(
+ &parms.rqueue, max_recv_sge, is_llqp);
+ parms.qp_storage =
+ (parms.squeue.is_small || parms.rqueue.is_small);
+ }
+
+ h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
if (h_ret != H_SUCCESS) {
- ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
+ ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
h_ret);
ret = ehca2ib_return_code(h_ret);
goto create_qp_exit1;
}
- switch (init_attr->qp_type) {
+ ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
+ my_qp->ipz_qp_handle = parms.qp_handle;
+ my_qp->galpas = parms.galpas;
+
+ swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
+ rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
+
+ switch (qp_type) {
case IB_QPT_RC:
- if (isdaqp == 0) {
- swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
- (parms.act_nr_send_sges)]);
- rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
- (parms.act_nr_recv_sges)]);
- } else { /* for daqp we need to use msg size, not wqe size */
- swqe_size = da_rc_msg_size[max_send_sge];
- rwqe_size = da_rc_msg_size[max_recv_sge];
- parms.act_nr_send_sges = 1;
- parms.act_nr_recv_sges = 1;
+ if (is_llqp) {
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
}
+ /* hide the extra WQE */
+ parms.squeue.act_nr_wqes--;
break;
- case IB_QPT_UC:
- swqe_size = offsetof(struct ehca_wqe,
- u.nud.sg_list[parms.act_nr_send_sges]);
- rwqe_size = offsetof(struct ehca_wqe,
- u.nud.sg_list[parms.act_nr_recv_sges]);
- break;
-
case IB_QPT_UD:
case IB_QPT_GSI:
case IB_QPT_SMI:
/* UD circumvention */
- parms.act_nr_recv_sges -= 2;
- parms.act_nr_send_sges -= 2;
- if (isdaqp) {
- swqe_size = da_ud_sq_msg_size[max_send_sge];
- rwqe_size = da_rc_msg_size[max_recv_sge];
- parms.act_nr_send_sges = 1;
- parms.act_nr_recv_sges = 1;
+ if (is_llqp) {
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
} else {
- swqe_size = offsetof(struct ehca_wqe,
- u.ud_av.sg_list[parms.act_nr_send_sges]);
- rwqe_size = offsetof(struct ehca_wqe,
- u.ud_av.sg_list[parms.act_nr_recv_sges]);
+ parms.squeue.act_nr_sges -= 2;
+ parms.rqueue.act_nr_sges -= 2;
}
- if (IB_QPT_GSI == init_attr->qp_type ||
- IB_QPT_SMI == init_attr->qp_type) {
- parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
- parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
- parms.act_nr_send_sges = init_attr->cap.max_send_sge;
- parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
- my_qp->real_qp_num =
- (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+ if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
+ parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
+ parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
+ parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
+ parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
+ ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
}
break;
@@ -579,148 +755,312 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
break;
}
- /* initializes r/squeue and registers queue pages */
- ret = init_qp_queues(shca, my_qp,
- parms.nr_sq_pages, parms.nr_rq_pages,
- swqe_size, rwqe_size,
- parms.act_nr_send_sges, parms.act_nr_recv_sges);
- if (ret) {
- ehca_err(pd->device,
- "Couldn't initialize r/squeue and pages ret=%x", ret);
- goto create_qp_exit2;
+ /* initialize r/squeue and register queue pages */
+ if (HAS_SQ(my_qp)) {
+ ret = init_qp_queue(
+ shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
+ HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
+ &parms.squeue, swqe_size);
+ if (ret) {
+ ehca_err(pd->device, "Couldn't initialize squeue "
+ "and pages ret=%i", ret);
+ goto create_qp_exit2;
+ }
+
+ if (!is_user) {
+ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
+ my_qp->ipz_squeue.qe_size;
+ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->sq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit3;
+ }
+ INIT_LIST_HEAD(&my_qp->sq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->sq_map);
+ }
+ }
+
+ if (HAS_RQ(my_qp)) {
+ ret = init_qp_queue(
+ shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
+ H_SUCCESS, &parms.rqueue, rwqe_size);
+ if (ret) {
+ ehca_err(pd->device, "Couldn't initialize rqueue "
+ "and pages ret=%i", ret);
+ goto create_qp_exit4;
+ }
+ if (!is_user) {
+ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+ my_qp->ipz_rqueue.qe_size;
+ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->rq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit5;
+ }
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->rq_map);
+ }
+ } else if (init_attr->srq && !is_user) {
+ /* this is a base QP, use the queue map of the SRQ */
+ my_qp->rq_map = my_srq->rq_map;
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+
+ my_qp->ipz_rqueue = my_srq->ipz_rqueue;
}
- my_qp->ib_qp.pd = &my_pd->ib_pd;
- my_qp->ib_qp.device = my_pd->ib_pd.device;
+ if (is_srq) {
+ my_qp->ib_srq.pd = &my_pd->ib_pd;
+ my_qp->ib_srq.device = my_pd->ib_pd.device;
- my_qp->ib_qp.recv_cq = init_attr->recv_cq;
- my_qp->ib_qp.send_cq = init_attr->send_cq;
+ my_qp->ib_srq.srq_context = init_attr->qp_context;
+ my_qp->ib_srq.event_handler = init_attr->event_handler;
+ } else {
+ my_qp->ib_qp.qp_num = ib_qp_num;
+ my_qp->ib_qp.pd = &my_pd->ib_pd;
+ my_qp->ib_qp.device = my_pd->ib_pd.device;
- my_qp->ib_qp.qp_num = my_qp->real_qp_num;
- my_qp->ib_qp.qp_type = init_attr->qp_type;
+ my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+ my_qp->ib_qp.send_cq = init_attr->send_cq;
- my_qp->qp_type = init_attr->qp_type;
- my_qp->ib_qp.srq = init_attr->srq;
+ my_qp->ib_qp.qp_type = qp_type;
+ my_qp->ib_qp.srq = init_attr->srq;
- my_qp->ib_qp.qp_context = init_attr->qp_context;
- my_qp->ib_qp.event_handler = init_attr->event_handler;
+ my_qp->ib_qp.qp_context = init_attr->qp_context;
+ my_qp->ib_qp.event_handler = init_attr->event_handler;
+ }
init_attr->cap.max_inline_data = 0; /* not supported yet */
- init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
- init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
- init_attr->cap.max_send_sge = parms.act_nr_send_sges;
- init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+ init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
+ init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
+ init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
+ init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
+ my_qp->init_attr = *init_attr;
+
+ if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
+ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+ &my_qp->ib_qp;
+ if (ehca_nr_ports < 0) {
+ /* alloc array to cache subsequent modify qp parms
+ * for autodetect mode
+ */
+ my_qp->mod_qp_parm =
+ kzalloc(EHCA_MOD_QP_PARM_MAX *
+ sizeof(*my_qp->mod_qp_parm),
+ GFP_KERNEL);
+ if (!my_qp->mod_qp_parm) {
+ ehca_err(pd->device,
+ "Could not alloc mod_qp_parm");
+ goto create_qp_exit5;
+ }
+ }
+ }
/* NOTE: define_apq0() not supported yet */
- if (init_attr->qp_type == IB_QPT_GSI) {
+ if (qp_type == IB_QPT_GSI) {
h_ret = ehca_define_sqp(shca, my_qp, init_attr);
if (h_ret != H_SUCCESS) {
- ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
- h_ret);
+ kfree(my_qp->mod_qp_parm);
+ my_qp->mod_qp_parm = NULL;
+ /* the QP pointer is no longer valid */
+ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+ NULL;
ret = ehca2ib_return_code(h_ret);
- goto create_qp_exit3;
+ goto create_qp_exit6;
}
}
- if (init_attr->send_cq) {
- struct ehca_cq *cq = container_of(init_attr->send_cq,
- struct ehca_cq, ib_cq);
- ret = ehca_cq_assign_qp(cq, my_qp);
+
+ if (my_qp->send_cq) {
+ ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
if (ret) {
- ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
- ret);
- goto create_qp_exit3;
+ ehca_err(pd->device,
+ "Couldn't assign qp to send_cq ret=%i", ret);
+ goto create_qp_exit7;
}
- my_qp->send_cq = cq;
}
+
/* copy queues, galpa data to user space */
if (context && udata) {
- struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
- struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
struct ehca_create_qp_resp resp;
- struct vm_area_struct * vma;
memset(&resp, 0, sizeof(resp));
resp.qp_num = my_qp->real_qp_num;
resp.token = my_qp->token;
resp.qp_type = my_qp->qp_type;
+ resp.ext_type = my_qp->ext_type;
resp.qkey = my_qp->qkey;
resp.real_qp_num = my_qp->real_qp_num;
- /* rqueue properties */
- resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
- resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
- resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
- resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
- resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
- ipz_rqueue->queue_length,
- (void**)&resp.ipz_rqueue.queue,
- &vma);
- if (ret) {
- ehca_err(pd->device, "Could not mmap rqueue pages");
- goto create_qp_exit3;
- }
- my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
- /* squeue properties */
- resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
- resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
- resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
- resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
- resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
- ipz_squeue->queue_length,
- (void**)&resp.ipz_squeue.queue,
- &vma);
- if (ret) {
- ehca_err(pd->device, "Could not mmap squeue pages");
- goto create_qp_exit4;
- }
- my_qp->uspace_squeue = resp.ipz_squeue.queue;
- /* fw_handle */
- resp.galpas = my_qp->galpas;
- ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
- (void**)&resp.galpas.kernel.fw_handle,
- &vma);
- if (ret) {
- ehca_err(pd->device, "Could not mmap fw_handle");
- goto create_qp_exit5;
- }
- my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+
+ if (HAS_SQ(my_qp))
+ queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
+ if (HAS_RQ(my_qp))
+ queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+ resp.fw_handle_ofs = (u32)
+ (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
- goto create_qp_exit6;
+ goto create_qp_exit8;
}
}
- return &my_qp->ib_qp;
+ return my_qp;
+
+create_qp_exit8:
+ ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
+
+create_qp_exit7:
+ kfree(my_qp->mod_qp_parm);
create_qp_exit6:
- ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+ if (HAS_RQ(my_qp) && !is_user)
+ vfree(my_qp->rq_map.map);
create_qp_exit5:
- ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
+ if (HAS_RQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
create_qp_exit4:
- ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
+ if (HAS_SQ(my_qp) && !is_user)
+ vfree(my_qp->sq_map.map);
create_qp_exit3:
- ipz_queue_dtor(&my_qp->ipz_rqueue);
- ipz_queue_dtor(&my_qp->ipz_squeue);
+ if (HAS_SQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
create_qp_exit2:
hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
create_qp_exit1:
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
idr_remove(&ehca_qp_idr, my_qp->token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
create_qp_exit0:
kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(ret);
}
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct ehca_qp *ret;
+
+ ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
+ return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
+}
+
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ struct ib_uobject *uobject);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_qp_init_attr qp_init_attr;
+ struct ehca_qp *my_qp;
+ struct ib_srq *ret;
+ struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+ ib_device);
+ struct hcp_modify_qp_control_block *mqpcb;
+ u64 hret, update_mask;
+
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+ return ERR_PTR(-ENOSYS);
+
+ /* For common attributes, internal_create_qp() takes its info
+ * out of qp_init_attr, so copy all common attrs there.
+ */
+ memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+ qp_init_attr.event_handler = srq_init_attr->event_handler;
+ qp_init_attr.qp_context = srq_init_attr->srq_context;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
+ qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
+
+ my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
+ if (IS_ERR(my_qp))
+ return (struct ib_srq *)my_qp;
+
+ /* copy back return values */
+ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
+ srq_init_attr->attr.max_sge = 3;
+
+ /* drive SRQ into RTR state */
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!mqpcb) {
+ ehca_err(pd->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+ ret = ERR_PTR(-ENOMEM);
+ goto create_srq1;
+ }
+
+ mqpcb->qp_state = EHCA_QPS_INIT;
+ mqpcb->prim_phys_port = 1;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not modify SRQ to INIT "
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ mqpcb->qp_enable = 1;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not enable SRQ "
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ mqpcb->qp_state = EHCA_QPS_RTR;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not modify SRQ to RTR "
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ ehca_free_fw_ctrlblock(mqpcb);
+
+ return &my_qp->ib_srq;
+
+create_srq2:
+ ret = ERR_PTR(ehca2ib_return_code(hret));
+ ehca_free_fw_ctrlblock(mqpcb);
+
+create_srq1:
+ internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
+
+ return ret;
+}
+
/*
* prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
* set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
@@ -732,8 +1072,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
u64 h_ret;
struct ipz_queue *squeue;
void *bad_send_wqe_p, *bad_send_wqe_v;
- void *squeue_start_p, *squeue_end_p;
- void *squeue_start_v, *squeue_end_v;
+ u64 q_ofs;
struct ehca_wqe *wqe;
int qp_num = my_qp->ib_qp.qp_num;
@@ -743,38 +1082,35 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
&bad_send_wqe_p, NULL, 2);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
- " ehca_qp=%p qp_num=%x h_ret=%lx",
+ " ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
- bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
+ bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
qp_num, bad_send_wqe_p);
/* convert wqe pointer to vadr */
- bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
- if (ehca_debug_level)
+ bad_send_wqe_v = __va((u64)bad_send_wqe_p);
+ if (ehca_debug_level >= 2)
ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
squeue = &my_qp->ipz_squeue;
- squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
- squeue_end_p = squeue_start_p+squeue->queue_length;
- squeue_start_v = abs_to_virt((u64)squeue_start_p);
- squeue_end_v = abs_to_virt((u64)squeue_end_p);
- ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
- qp_num, squeue_start_v, squeue_end_v);
+ if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
+ ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
+ " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
+ return -EFAULT;
+ }
/* loop sets wqe's purge bit */
- wqe = (struct ehca_wqe*)bad_send_wqe_v;
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
*bad_wqe_cnt = 0;
while (wqe->optype != 0xff && wqe->wqef != 0xff) {
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
wqe->nr_of_data_seg = 0; /* suppress data access */
wqe->wqef = WQEF_PURGE; /* WQE to be purged */
- wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
+ q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
*bad_wqe_cnt = (*bad_wqe_cnt)+1;
- if ((void*)wqe >= squeue_end_v) {
- wqe = squeue_start_v;
- }
}
/*
* bad wqe will be reprocessed and ignored when pol_cq() is called,
@@ -787,6 +1123,111 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
return 0;
}
+static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
+ struct ehca_queue_map *qmap)
+{
+ void *wqe_v;
+ u64 q_ofs;
+ u32 wqe_idx;
+ unsigned int tail_idx;
+
+ /* convert real to abs address */
+ wqe_p = wqe_p & (~(1UL << 63));
+
+ wqe_v = __va(wqe_p);
+
+ if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
+ ehca_gen_err("Invalid offset for calculating left cqes "
+ "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
+ return -EFAULT;
+ }
+
+ tail_idx = next_index(qmap->tail, qmap->entries);
+ wqe_idx = q_ofs / ipz_queue->qe_size;
+
+ /* check all processed wqes, whether a cqe is requested or not */
+ while (tail_idx != wqe_idx) {
+ if (qmap->map[tail_idx].cqe_req)
+ qmap->left_to_poll++;
+ tail_idx = next_index(tail_idx, qmap->entries);
+ }
+ /* save index in queue, where we have to start flushing */
+ qmap->next_wqe_idx = wqe_idx;
+ return 0;
+}
+
+static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
+{
+ u64 h_ret;
+ void *send_wqe_p, *recv_wqe_p;
+ int ret;
+ unsigned long flags;
+ int qp_num = my_qp->ib_qp.qp_num;
+
+ /* this hcall is not supported on base QPs */
+ if (my_qp->ext_type != EQPT_SRQBASE) {
+ /* get send and receive wqe pointer */
+ h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle, &my_qp->pf,
+ &send_wqe_p, &recv_wqe_p, 4);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "disable_and_get_wqe() "
+ "failed ehca_qp=%p qp_num=%x h_ret=%lli",
+ my_qp, qp_num, h_ret);
+ return ehca2ib_return_code(h_ret);
+ }
+
+ /*
+ * acquire lock to ensure that nobody is polling the cq which
+ * could mean that the qmap->tail pointer is in an
+ * inconsistent state.
+ */
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
+ &my_qp->sq_map);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+ if (ret)
+ return ret;
+
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
+ &my_qp->rq_map);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ if (ret)
+ return ret;
+ } else {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ my_qp->sq_map.left_to_poll = 0;
+ my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+ my_qp->sq_map.entries);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ my_qp->rq_map.left_to_poll = 0;
+ my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+ my_qp->rq_map.entries);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ }
+
+ /* this assures flush cqes being generated only for pending wqes */
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 1);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ if (HAS_RQ(my_qp)) {
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 0);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
+ flags);
+ }
+ }
+
+ return 0;
+}
+
/*
* internal_modify_qp with circumvention to handle aqp0 properly
* smi_reset2init indicates if this is an internal reset-to-init-call for
@@ -807,12 +1248,13 @@ static int internal_modify_qp(struct ib_qp *ibqp,
u64 update_mask;
u64 h_ret;
int bad_wqe_cnt = 0;
+ int is_user = 0;
int squeue_locked = 0;
- unsigned long spl_flags = 0;
+ unsigned long flags = 0;
/* do query_qp to obtain current attr values */
- mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
- if (mqpcb == NULL) {
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!mqpcb) {
ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
"ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
return -ENOMEM;
@@ -824,11 +1266,13 @@ static int internal_modify_qp(struct ib_qp *ibqp,
mqpcb, my_qp->galpas.kernel);
if (h_ret != H_SUCCESS) {
ehca_err(ibqp->device, "hipz_h_query_qp() failed "
- "ehca_qp=%p qp_num=%x h_ret=%lx",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, ibqp->qp_num, h_ret);
ret = ehca2ib_return_code(h_ret);
goto modify_qp_exit1;
}
+ if (ibqp->uobject)
+ is_user = 1;
qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
@@ -860,7 +1304,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ibqp, &smiqp_attr, smiqp_attr_mask, 1);
if (smirc) {
ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
- "ehca_modify_qp() rc=%x", smirc);
+ "ehca_modify_qp() rc=%i", smirc);
ret = H_PARAMETER;
goto modify_qp_exit1;
}
@@ -878,14 +1322,14 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit1;
}
- ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
+ ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
"new qp_state=%x attribute_mask=%x",
my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
if (!smi_reset2init &&
!ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
- attr_mask)) {
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
ret = -EINVAL;
ehca_err(ibqp->device,
"Invalid qp transition new_state=%x cur_state=%x "
@@ -894,7 +1338,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit1;
}
- if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
+ mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
+ if (mqpcb->qp_state)
update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
else {
ret = -EINVAL;
@@ -929,19 +1374,30 @@ static int internal_modify_qp(struct ib_qp *ibqp,
"ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
my_qp, ibqp->qp_num, statetrans);
+ /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
+ * in non-LL UD QPs.
+ */
+ if ((my_qp->qp_type == IB_QPT_UD) &&
+ (my_qp->ext_type != EQPT_LLQP) &&
+ (statetrans == IB_QPST_INIT2RTR) &&
+ (shca->hw_level >= 0x22)) {
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+ mqpcb->send_grh_flag = 1;
+ }
+
/* sqe -> rts: set purge bit of bad wqe before actual trans */
if ((my_qp->qp_type == IB_QPT_UD ||
my_qp->qp_type == IB_QPT_GSI ||
my_qp->qp_type == IB_QPT_SMI) &&
statetrans == IB_QPST_SQE2RTS) {
/* mark next free wqe if kernel */
- if (my_qp->uspace_squeue == 0) {
+ if (!ibqp->uobject) {
struct ehca_wqe *wqe;
/* lock send queue */
- spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
squeue_locked = 1;
/* mark next free wqe */
- wqe = (struct ehca_wqe*)
+ wqe = (struct ehca_wqe *)
ipz_qeit_get(&my_qp->ipz_squeue);
wqe->optype = wqe->wqef = 0xff;
ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
@@ -950,7 +1406,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
if (ret) {
ehca_err(ibqp->device, "prepare_sqe_rts() failed "
- "ehca_qp=%p qp_num=%x ret=%x",
+ "ehca_qp=%p qp_num=%x ret=%i",
my_qp, ibqp->qp_num, ret);
goto modify_qp_exit2;
}
@@ -976,10 +1432,19 @@ static int internal_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (attr->pkey_index >= 16) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid pkey_index=%x. "
+ "ehca_qp=%p qp_num=%x max_pkey_index=f",
+ attr->pkey_index, my_qp, ibqp->qp_num);
+ goto modify_qp_exit2;
+ }
mqpcb->prim_p_key_idx = attr->pkey_index;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
}
if (attr_mask & IB_QP_PORT) {
+ struct ehca_sport *sport;
+ struct ehca_qp *aqp1;
if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
ret = -EINVAL;
ehca_err(ibqp->device, "Invalid port=%x. "
@@ -988,6 +1453,29 @@ static int internal_modify_qp(struct ib_qp *ibqp,
shca->num_ports);
goto modify_qp_exit2;
}
+ sport = &shca->sport[attr->port_num - 1];
+ if (!sport->ibqp_sqp[IB_QPT_GSI]) {
+ /* should not occur */
+ ret = -EFAULT;
+ ehca_err(ibqp->device, "AQP1 was not created for "
+ "port=%x", attr->port_num);
+ goto modify_qp_exit2;
+ }
+ aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
+ struct ehca_qp, ib_qp);
+ if (ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_SMI &&
+ aqp1->mod_qp_parm) {
+ /*
+ * firmware will reject this modify_qp() because
+ * port is not activated/initialized fully
+ */
+ ret = -EFAULT;
+ ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
+ "either port is being activated (try again) "
+ "or cabling issue", attr->port_num);
+ goto modify_qp_exit2;
+ }
mqpcb->prim_phys_port = attr->port_num;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
}
@@ -996,10 +1484,6 @@ static int internal_modify_qp(struct ib_qp *ibqp,
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
}
if (attr_mask & IB_QP_AV) {
- int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate);
- int ehca_mult = ib_rate_to_mult(shca->sport[my_qp->
- init_attr.port_num].rate);
-
mqpcb->dlid = attr->ah_attr.dlid;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
@@ -1007,22 +1491,27 @@ static int internal_modify_qp(struct ib_qp *ibqp,
mqpcb->service_level = attr->ah_attr.sl;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
- if (ah_mult < ehca_mult)
- mqpcb->max_static_rate = (ah_mult > 0) ?
- ((ehca_mult - 1) / ah_mult) : 0;
- else
- mqpcb->max_static_rate = 0;
-
+ if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
+ attr->ah_attr.static_rate,
+ &mqpcb->max_static_rate)) {
+ ret = -EINVAL;
+ goto modify_qp_exit2;
+ }
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
/*
+ * Always supply the GRH flag, even if it's zero, to give the
+ * hypervisor a clear "yes" or "no" instead of a "perhaps"
+ */
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+
+ /*
* only if GRH is TRUE we might consider SOURCE_GID_IDX
* and DEST_GID otherwise phype will return H_ATTR_PARM!!!
*/
if (attr->ah_attr.ah_flags == IB_AH_GRH) {
- mqpcb->send_grh_flag = 1 << 31;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+ mqpcb->send_grh_flag = 1;
+
mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
@@ -1043,6 +1532,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_PATH_MTU) {
+ /* store ld(MTU) */
+ my_qp->mtu_shift = attr->path_mtu + 7;
mqpcb->path_mtu = attr->path_mtu;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
}
@@ -1076,54 +1567,80 @@ static int internal_modify_qp(struct ib_qp *ibqp,
(MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
}
if (attr_mask & IB_QP_ALT_PATH) {
- int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate);
- int ehca_mult = ib_rate_to_mult(
- shca->sport[my_qp->init_attr.port_num].rate);
+ if (attr->alt_port_num < 1
+ || attr->alt_port_num > shca->num_ports) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid alt_port=%x. "
+ "ehca_qp=%p qp_num=%x num_ports=%x",
+ attr->alt_port_num, my_qp, ibqp->qp_num,
+ shca->num_ports);
+ goto modify_qp_exit2;
+ }
+ mqpcb->alt_phys_port = attr->alt_port_num;
+
+ if (attr->alt_pkey_index >= 16) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
+ "ehca_qp=%p qp_num=%x max_pkey_index=f",
+ attr->pkey_index, my_qp, ibqp->qp_num);
+ goto modify_qp_exit2;
+ }
+ mqpcb->alt_p_key_idx = attr->alt_pkey_index;
+ mqpcb->timeout_al = attr->alt_timeout;
mqpcb->dlid_al = attr->alt_ah_attr.dlid;
- update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
mqpcb->service_level_al = attr->alt_ah_attr.sl;
- update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
- if (ah_mult < ehca_mult)
- mqpcb->max_static_rate = (ah_mult > 0) ?
- ((ehca_mult - 1) / ah_mult) : 0;
- else
- mqpcb->max_static_rate_al = 0;
+ if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
+ attr->alt_ah_attr.static_rate,
+ &mqpcb->max_static_rate_al)) {
+ ret = -EINVAL;
+ goto modify_qp_exit2;
+ }
+
+ /* OpenIB doesn't support alternate retry counts - copy them */
+ mqpcb->retry_count_al = mqpcb->retry_count;
+ mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
+
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
- update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
+ /*
+ * Always supply the GRH flag, even if it's zero, to give the
+ * hypervisor a clear "yes" or "no" instead of a "perhaps"
+ */
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
/*
* only if GRH is TRUE we might consider SOURCE_GID_IDX
* and DEST_GID otherwise phype will return H_ATTR_PARM!!!
*/
if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
- mqpcb->send_grh_flag_al = 1 << 31;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
- mqpcb->source_gid_idx_al =
- attr->alt_ah_attr.grh.sgid_index;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
+ mqpcb->send_grh_flag_al = 1;
for (cnt = 0; cnt < 16; cnt++)
mqpcb->dest_gid_al.byte[cnt] =
attr->alt_ah_attr.grh.dgid.raw[cnt];
-
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
+ mqpcb->source_gid_idx_al =
+ attr->alt_ah_attr.grh.sgid_index;
mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
mqpcb->traffic_class_al =
attr->alt_ah_attr.grh.traffic_class;
+
update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
}
}
@@ -1145,7 +1662,16 @@ static int internal_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_PATH_MIG_STATE) {
- mqpcb->path_migration_state = attr->path_mig_state;
+ if (attr->path_mig_state != IB_MIG_REARM
+ && attr->path_mig_state != IB_MIG_MIGRATED) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid mig_state=%x",
+ attr->path_mig_state);
+ goto modify_qp_exit2;
+ }
+ mqpcb->path_migration_state = attr->path_mig_state + 1;
+ if (attr->path_mig_state == IB_MIG_REARM)
+ my_qp->mig_armed = 1;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
}
@@ -1160,7 +1686,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
/* no support for max_send/recv_sge yet */
}
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
@@ -1171,8 +1697,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
- ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
- "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
+ ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
+ "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
@@ -1204,15 +1730,34 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ret = ehca2ib_return_code(h_ret);
ehca_err(ibqp->device, "ENABLE in context of "
"RESET_2_INIT failed! Maybe you didn't get "
- "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
+ "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
}
+ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
+ && !is_user) {
+ ret = check_for_left_cqes(my_qp, shca);
+ if (ret)
+ goto modify_qp_exit2;
+ }
if (statetrans == IB_QPST_ANY2RESET) {
ipz_qeit_reset(&my_qp->ipz_rqueue);
ipz_qeit_reset(&my_qp->ipz_squeue);
+
+ if (qp_cur_state == IB_QPS_ERR && !is_user) {
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ if (HAS_RQ(my_qp))
+ del_from_err_list(my_qp->recv_cq,
+ &my_qp->rq_err_node);
+ }
+ if (!is_user)
+ reset_queue_map(&my_qp->sq_map);
+
+ if (HAS_RQ(my_qp) && !is_user)
+ reset_queue_map(&my_qp->rq_map);
}
if (attr_mask & IB_QP_QKEY)
@@ -1220,12 +1765,12 @@ static int internal_modify_qp(struct ib_qp *ibqp,
modify_qp_exit2:
if (squeue_locked) { /* this means: sqe -> rts */
- spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
my_qp->sqerr_purgeflag = 1;
}
modify_qp_exit1:
- kfree(mqpcb);
+ ehca_free_fw_ctrlblock(mqpcb);
return ret;
}
@@ -1233,19 +1778,110 @@ modify_qp_exit1:
int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
{
+ int ret = 0;
+
+ struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+ ib_device);
struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
- struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
- ib_pd);
- u32 cur_pid = current->tgid;
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
+ /* The if-block below caches qp_attr to be modified for GSI and SMI
+ * qps during the initialization by ib_mad. When the respective port
+ * is activated, ie we got an event PORT_ACTIVE, we'll replay the
+ * cached modify calls sequence, see ehca_recover_sqs() below.
+ * Why that is required:
+ * 1) If one port is connected, older code requires that port one
+ * to be connected and module option nr_ports=1 to be given by
+ * user, which is very inconvenient for end user.
+ * 2) Firmware accepts modify_qp() only if respective port has become
+ * active. Older code had a wait loop of 30sec create_qp()/
+ * define_aqp1(), which is not appropriate in practice. This
+ * code now removes that wait loop, see define_aqp1(), and always
+ * reports all ports to ib_mad resp. users. Only activated ports
+ * will then usable for the users.
+ */
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+ int port = my_qp->init_attr.port_num;
+ struct ehca_sport *sport = &shca->sport[port - 1];
+ unsigned long flags;
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+ /* cache qp_attr only during init */
+ if (my_qp->mod_qp_parm) {
+ struct ehca_mod_qp_parm *p;
+ if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
+ ehca_err(&shca->ib_device,
+ "mod_qp_parm overflow state=%x port=%x"
+ " type=%x", attr->qp_state,
+ my_qp->init_attr.port_num,
+ ibqp->qp_type);
+ spin_unlock_irqrestore(&sport->mod_sqp_lock,
+ flags);
+ return -EINVAL;
+ }
+ p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
+ p->mask = attr_mask;
+ p->attr = *attr;
+ my_qp->mod_qp_parm_idx++;
+ ehca_dbg(&shca->ib_device,
+ "Saved qp_attr for state=%x port=%x type=%x",
+ attr->qp_state, my_qp->init_attr.port_num,
+ ibqp->qp_type);
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+ }
+
+ ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
+
+out:
+ if ((ret == 0) && (attr_mask & IB_QP_STATE))
+ my_qp->state = attr->qp_state;
+
+ return ret;
+}
+
+void ehca_recover_sqp(struct ib_qp *sqp)
+{
+ struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
+ int port = my_sqp->init_attr.port_num;
+ struct ib_qp_attr attr;
+ struct ehca_mod_qp_parm *qp_parm;
+ int i, qp_parm_idx, ret;
+ unsigned long flags, wr_cnt;
+
+ if (!my_sqp->mod_qp_parm)
+ return;
+ ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
+
+ qp_parm = my_sqp->mod_qp_parm;
+ qp_parm_idx = my_sqp->mod_qp_parm_idx;
+ for (i = 0; i < qp_parm_idx; i++) {
+ attr = qp_parm[i].attr;
+ ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
+ if (ret) {
+ ehca_err(sqp->device, "Could not modify SQP port=%x "
+ "qp_num=%x ret=%x", port, sqp->qp_num, ret);
+ goto free_qp_parm;
+ }
+ ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
+ port, sqp->qp_num, attr.qp_state);
+ }
+
+ /* re-trigger posted recv wrs */
+ wr_cnt = my_sqp->ipz_rqueue.current_q_offset /
+ my_sqp->ipz_rqueue.qe_size;
+ if (wr_cnt) {
+ spin_lock_irqsave(&my_sqp->spinlock_r, flags);
+ hipz_update_rqa(my_sqp, wr_cnt);
+ spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
+ ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
+ port, sqp->qp_num, wr_cnt);
}
- return internal_modify_qp(ibqp, attr, attr_mask, 0);
+free_qp_parm:
+ kfree(qp_parm);
+ /* this prevents subsequent calls to modify_qp() to cache qp_attr */
+ my_sqp->mod_qp_parm = NULL;
}
int ehca_query_qp(struct ib_qp *qp,
@@ -1253,33 +1889,23 @@ int ehca_query_qp(struct ib_qp *qp,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
- ib_pd);
struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
ib_device);
struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
struct hcp_modify_qp_control_block *qpcb;
- u32 cur_pid = current->tgid;
int cnt, ret = 0;
u64 h_ret;
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
- }
-
if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
- ehca_err(qp->device,"Invalid attribute mask "
+ ehca_err(qp->device, "Invalid attribute mask "
"ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
my_qp, qp->qp_num, qp_attr_mask);
return -EINVAL;
}
- qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL );
+ qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!qpcb) {
- ehca_err(qp->device,"Out of memory for qpcb "
+ ehca_err(qp->device, "Out of memory for qpcb "
"ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
return -ENOMEM;
}
@@ -1291,8 +1917,8 @@ int ehca_query_qp(struct ib_qp *qp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
- ehca_err(qp->device,"hipz_h_query_qp() failed "
- "ehca_qp=%p qp_num=%x h_ret=%lx",
+ ehca_err(qp->device, "hipz_h_query_qp() failed "
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, qp->qp_num, h_ret);
goto query_qp_exit1;
}
@@ -1302,7 +1928,7 @@ int ehca_query_qp(struct ib_qp *qp,
if (qp_attr->cur_qp_state == -EINVAL) {
ret = -EINVAL;
- ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
+ ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
"ehca_qp=%p qp_num=%x",
qpcb->qp_state, my_qp, qp->qp_num);
goto query_qp_exit1;
@@ -1313,7 +1939,7 @@ int ehca_query_qp(struct ib_qp *qp,
qp_attr->qkey = qpcb->qkey;
qp_attr->path_mtu = qpcb->path_mtu;
- qp_attr->path_mig_state = qpcb->path_migration_state;
+ qp_attr->path_mig_state = qpcb->path_migration_state - 1;
qp_attr->rq_psn = qpcb->receive_psn;
qp_attr->sq_psn = qpcb->send_psn;
qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
@@ -1335,22 +1961,19 @@ int ehca_query_qp(struct ib_qp *qp,
qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
qp_attr->dest_qp_num = qpcb->dest_qp_nr;
- qp_attr->pkey_index =
- EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
-
- qp_attr->port_num =
- EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
-
+ qp_attr->pkey_index = qpcb->prim_p_key_idx;
+ qp_attr->port_num = qpcb->prim_phys_port;
qp_attr->timeout = qpcb->timeout;
qp_attr->retry_cnt = qpcb->retry_count;
qp_attr->rnr_retry = qpcb->rnr_retry_count;
- qp_attr->alt_pkey_index =
- EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
-
+ qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
qp_attr->alt_port_num = qpcb->alt_phys_port;
qp_attr->alt_timeout = qpcb->timeout_al;
+ qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
+ qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
+
/* primary av */
qp_attr->ah_attr.sl = qpcb->service_level;
@@ -1397,73 +2020,169 @@ int ehca_query_qp(struct ib_qp *qp,
if (qp_init_attr)
*qp_init_attr = my_qp->init_attr;
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
query_qp_exit1:
- kfree(qpcb);
+ ehca_free_fw_ctrlblock(qpcb);
return ret;
}
-int ehca_destroy_qp(struct ib_qp *ibqp)
+int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
{
- struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
- struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+ struct ehca_qp *my_qp =
+ container_of(ibsrq, struct ehca_qp, ib_srq);
+ struct ehca_shca *shca =
+ container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
+ struct hcp_modify_qp_control_block *mqpcb;
+ u64 update_mask;
+ u64 h_ret;
+ int ret = 0;
+
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!mqpcb) {
+ ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+ return -ENOMEM;
+ }
+
+ update_mask = 0;
+ if (attr_mask & IB_SRQ_LIMIT) {
+ attr_mask &= ~IB_SRQ_LIMIT;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
+ mqpcb->curr_srq_limit = attr->srq_limit;
+ mqpcb->qp_aff_asyn_ev_log_reg =
+ EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
+ }
+
+ /* by now, all bits in attr_mask should have been cleared */
+ if (attr_mask) {
+ ehca_err(ibsrq->device, "invalid attribute mask bits set "
+ "attr_mask=%x", attr_mask);
+ ret = -EINVAL;
+ goto modify_srq_exit0;
+ }
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+ h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
+ NULL, update_mask, mqpcb,
+ my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
+ "ehca_qp=%p qp_num=%x",
+ h_ret, my_qp, my_qp->real_qp_num);
+ }
+
+modify_srq_exit0:
+ ehca_free_fw_ctrlblock(mqpcb);
+
+ return ret;
+}
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
+{
+ struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
+ struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
ib_device);
+ struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+ struct hcp_modify_qp_control_block *qpcb;
+ int ret = 0;
+ u64 h_ret;
+
+ qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!qpcb) {
+ ehca_err(srq->device, "Out of memory for qpcb "
+ "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
+ NULL, qpcb, my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(srq->device, "hipz_h_query_qp() failed "
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
+ my_qp, my_qp->real_qp_num, h_ret);
+ goto query_srq_exit1;
+ }
+
+ srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
+ srq_attr->max_sge = 3;
+ srq_attr->srq_limit = qpcb->curr_srq_limit;
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+query_srq_exit1:
+ ehca_free_fw_ctrlblock(qpcb);
+
+ return ret;
+}
+
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ struct ib_uobject *uobject)
+{
+ struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
ib_pd);
- u32 cur_pid = current->tgid;
- u32 qp_num = ibqp->qp_num;
+ struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
+ u32 qp_num = my_qp->real_qp_num;
int ret;
u64 h_ret;
u8 port_num;
+ int is_user = 0;
enum ib_qp_type qp_type;
unsigned long flags;
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
+ if (uobject) {
+ is_user = 1;
+ if (my_qp->mm_count_galpa ||
+ my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
+ ehca_err(dev, "Resources still referenced in "
+ "user space qp_num=%x", qp_num);
+ return -EINVAL;
+ }
}
if (my_qp->send_cq) {
- ret = ehca_cq_unassign_qp(my_qp->send_cq,
- my_qp->real_qp_num);
+ ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
if (ret) {
- ehca_err(ibqp->device, "Couldn't unassign qp from "
- "send_cq ret=%x qp_num=%x cq_num=%x", ret,
- my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+ ehca_err(dev, "Couldn't unassign qp from "
+ "send_cq ret=%i qp_num=%x cq_num=%x", ret,
+ qp_num, my_qp->send_cq->cq_number);
return ret;
}
}
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
idr_remove(&ehca_qp_idr, my_qp->token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
- /* un-mmap if vma alloc */
- if (my_qp->uspace_rqueue) {
- ret = ehca_munmap(my_qp->uspace_rqueue,
- my_qp->ipz_rqueue.queue_length);
- if (ret)
- ehca_err(ibqp->device, "Could not munmap rqueue "
- "qp_num=%x", qp_num);
- ret = ehca_munmap(my_qp->uspace_squeue,
- my_qp->ipz_squeue.queue_length);
- if (ret)
- ehca_err(ibqp->device, "Could not munmap squeue "
- "qp_num=%x", qp_num);
- ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
- if (ret)
- ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
- qp_num);
- }
+ /*
+ * SRQs will never get into an error list and do not have a recv_cq,
+ * so we need to skip them here.
+ */
+ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
+ del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
+
+ if (HAS_SQ(my_qp) && !is_user)
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ /* now wait until all pending events have completed */
+ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
- ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+ ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
"ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
return ehca2ib_return_code(h_ret);
}
@@ -1471,11 +2190,19 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
port_num = my_qp->init_attr.port_num;
qp_type = my_qp->init_attr.qp_type;
+ if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+ kfree(my_qp->mod_qp_parm);
+ my_qp->mod_qp_parm = NULL;
+ shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+ }
+
/* no support for IB_QPT_SMI yet */
if (qp_type == IB_QPT_GSI) {
struct ib_event event;
- ehca_info(ibqp->device, "device %s: port %x is inactive.",
- shca->ib_device.name, port_num);
+ ehca_info(dev, "device %s: port %x is inactive.",
+ shca->ib_device.name, port_num);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port_num;
@@ -1483,18 +2210,41 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
ib_dispatch_event(&event);
}
- ipz_queue_dtor(&my_qp->ipz_rqueue);
- ipz_queue_dtor(&my_qp->ipz_squeue);
+ if (HAS_RQ(my_qp)) {
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+ if (!is_user)
+ vfree(my_qp->rq_map.map);
+ }
+ if (HAS_SQ(my_qp)) {
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
+ if (!is_user)
+ vfree(my_qp->sq_map.map);
+ }
kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
return 0;
}
+int ehca_destroy_qp(struct ib_qp *qp)
+{
+ return internal_destroy_qp(qp->device,
+ container_of(qp, struct ehca_qp, ib_qp),
+ qp->uobject);
+}
+
+int ehca_destroy_srq(struct ib_srq *srq)
+{
+ return internal_destroy_qp(srq->device,
+ container_of(srq, struct ehca_qp, ib_srq),
+ srq->uobject);
+}
+
int ehca_init_qp_cache(void)
{
qp_cache = kmem_cache_create("ehca_cache_qp",
sizeof(struct ehca_qp), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!qp_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index b46bda1bf85..47f94984353 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -3,8 +3,9 @@
*
* post_send/recv, poll_cq, req_notify
*
- * Authors: Waleri Fomin <fomin@de.ibm.com>
- * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
* Reinhard Ernst <rernst@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
@@ -41,7 +42,6 @@
*/
-#include <asm-powerpc/system.h>
#include "ehca_classes.h"
#include "ehca_tools.h"
#include "ehca_qes.h"
@@ -49,9 +49,28 @@
#include "hcp_if.h"
#include "hipz_fns.h"
+/* in RC traffic, insert an empty RDMA READ every this many packets */
+#define ACK_CIRC_THRESHOLD 2000000
+
+static u64 replace_wr_id(u64 wr_id, u16 idx)
+{
+ u64 ret;
+
+ ret = wr_id & ~QMAP_IDX_MASK;
+ ret |= idx & QMAP_IDX_MASK;
+
+ return ret;
+}
+
+static u16 get_app_wr_id(u64 wr_id)
+{
+ return wr_id & QMAP_IDX_MASK;
+}
+
static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
struct ehca_wqe *wqe_p,
- struct ib_recv_wr *recv_wr)
+ struct ib_recv_wr *recv_wr,
+ u32 rq_map_idx)
{
u8 cnt_ds;
if (unlikely((recv_wr->num_sge < 0) ||
@@ -65,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = recv_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
wqe_p->nr_of_data_seg = recv_wr->num_sge;
for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
@@ -77,9 +96,10 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
recv_wr->sg_list[cnt_ds].length;
}
- if (ehca_debug_level) {
- ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
- ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+ if (ehca_debug_level >= 3) {
+ ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
+ ipz_rqueue);
+ ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
}
return 0;
@@ -98,7 +118,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
struct ib_sge *sge = send_wr->sg_list;
ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
- "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+ "send_flags=%x opcode=%x", idx, send_wr->wr_id,
send_wr->num_sge, send_wr->send_flags,
send_wr->opcode);
if (mad_hdr) {
@@ -115,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
mad_hdr->attr_mod);
}
for (j = 0; j < send_wr->num_sge; j++) {
- u8 *data = (u8 *) abs_to_virt(sge->addr);
+ u8 *data = __va(sge->addr);
ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
"lkey=%x",
idx, j, data, sge->length, sge->lkey);
@@ -133,12 +153,15 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
static inline int ehca_write_swqe(struct ehca_qp *qp,
struct ehca_wqe *wqe_p,
- const struct ib_send_wr *send_wr)
+ const struct ib_send_wr *send_wr,
+ u32 sq_map_idx,
+ int hidden)
{
u32 idx;
u64 dma_length;
struct ehca_av *my_av;
u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+ struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
if (unlikely((send_wr->num_sge < 0) ||
(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
@@ -151,7 +174,11 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = send_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
+
+ qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 0;
switch (send_wr->opcode) {
case IB_WR_SEND:
@@ -174,13 +201,17 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
wqe_p->wr_flag = 0;
- if (send_wr->send_flags & IB_SEND_SIGNALED)
+ if ((send_wr->send_flags & IB_SEND_SIGNALED ||
+ qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
+ && !hidden) {
wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+ qmap_entry->cqe_req = 1;
+ }
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
/* this might not work as long as HW does not support it */
- wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+ wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
}
@@ -197,10 +228,14 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
wqe_p->local_ee_context_qkey = remote_qkey;
- if (!send_wr->wr.ud.ah) {
+ if (unlikely(!send_wr->wr.ud.ah)) {
ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
return -EINVAL;
}
+ if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
+ ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
+ return -EINVAL;
+ }
my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
wqe_p->u.ud_av.ud_av = my_av->av;
@@ -233,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* no break is intentional here */
case IB_QPT_RC:
/* TODO: atomic not implemented */
- wqe_p->u.nud.remote_virtual_adress =
+ wqe_p->u.nud.remote_virtual_address =
send_wr->wr.rdma.remote_addr;
wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
@@ -253,6 +288,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
} /* eof idx */
wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+ /* unsolicited ack circumvention */
+ if (send_wr->opcode == IB_WR_RDMA_READ) {
+ /* on RDMA read, switch on and reset counters */
+ qp->message_count = qp->packet_count = 0;
+ qp->unsol_ack_circ = 1;
+ } else
+ /* else estimate #packets */
+ qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
+
break;
default:
@@ -260,7 +304,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
return -EINVAL;
}
- if (ehca_debug_level) {
+ if (ehca_debug_level >= 3) {
ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
}
@@ -353,125 +397,214 @@ static inline void map_ib_wc_status(u32 cqe_status,
*wc_status = IB_WC_SUCCESS;
}
+static inline int post_one_send(struct ehca_qp *my_qp,
+ struct ib_send_wr *cur_send_wr,
+ int hidden)
+{
+ struct ehca_wqe *wqe_p;
+ int ret;
+ u32 sq_map_idx;
+ u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+
+ /* get pointer next to free WQE */
+ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+ if (unlikely(!wqe_p)) {
+ /* too many posted work requests: queue overflow */
+ ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
+ "qp_num=%x", my_qp->ib_qp.qp_num);
+ return -ENOMEM;
+ }
+
+ /*
+ * Get the index of the WQE in the send queue. The same index is used
+ * for writing into the sq_map.
+ */
+ sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
+
+ /* write a SEND WQE into the QUEUE */
+ ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
+ /*
+ * if something failed,
+ * reset the free entry pointer to the start value
+ */
+ if (unlikely(ret)) {
+ my_qp->ipz_squeue.current_q_offset = start_offset;
+ ehca_err(my_qp->ib_qp.device, "Could not write WQE "
+ "qp_num=%x", my_qp->ib_qp.qp_num);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ib_send_wr *cur_send_wr;
- struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
- unsigned long spl_flags;
+ unsigned long flags;
+
+ /* Reject WR if QP is in RESET, INIT or RTR state */
+ if (unlikely(my_qp->state < IB_QPS_RTS)) {
+ ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
+ my_qp->state, qp->qp_num);
+ ret = -EINVAL;
+ goto out;
+ }
/* LOCK the QUEUE */
- spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
+
+ /* Send an empty extra RDMA read if:
+ * 1) there has been an RDMA read on this connection before
+ * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
+ * 3) we can be sure that any previous extra RDMA read has been
+ * processed so we don't overflow the SQ
+ */
+ if (unlikely(my_qp->unsol_ack_circ &&
+ my_qp->packet_count > ACK_CIRC_THRESHOLD &&
+ my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
+ /* insert an empty RDMA READ to fix up the remote QP state */
+ struct ib_send_wr circ_wr;
+ memset(&circ_wr, 0, sizeof(circ_wr));
+ circ_wr.opcode = IB_WR_RDMA_READ;
+ post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
+ wqe_cnt++;
+ ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
+ my_qp->message_count = my_qp->packet_count = 0;
+ }
/* loop processes list of send reqs */
- for (cur_send_wr = send_wr; cur_send_wr != NULL;
- cur_send_wr = cur_send_wr->next) {
- u64 start_offset = my_qp->ipz_squeue.current_q_offset;
- /* get pointer next to free WQE */
- wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
- if (unlikely(!wqe_p)) {
- /* too many posted work requests: queue overflow */
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
- if (wqe_cnt == 0) {
- ret = -ENOMEM;
- ehca_err(qp->device, "Too many posted WQEs "
- "qp_num=%x", qp->qp_num);
- }
- goto post_send_exit0;
- }
- /* write a SEND WQE into the QUEUE */
- ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
- /*
- * if something failed,
- * reset the free entry pointer to the start value
- */
+ while (send_wr) {
+ ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
- my_qp->ipz_squeue.current_q_offset = start_offset;
- *bad_send_wr = cur_send_wr;
- if (wqe_cnt == 0) {
- ret = -EINVAL;
- ehca_err(qp->device, "Could not write WQE "
- "qp_num=%x", qp->qp_num);
- }
goto post_send_exit0;
}
wqe_cnt++;
- ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
- my_qp, qp->qp_num, wqe_cnt);
- } /* eof for cur_send_wr */
+ send_wr = send_wr->next;
+ }
post_send_exit0:
- /* UNLOCK the QUEUE */
- spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
iosync(); /* serialize GAL register access */
hipz_update_sqa(my_qp, wqe_cnt);
+ if (unlikely(ret || ehca_debug_level >= 2))
+ ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
+ my_qp, qp->qp_num, wqe_cnt, ret);
+ my_qp->message_count += wqe_cnt;
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+ if (ret)
+ *bad_send_wr = send_wr;
return ret;
}
-int ehca_post_recv(struct ib_qp *qp,
- struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr)
+static int internal_post_recv(struct ehca_qp *my_qp,
+ struct ib_device *dev,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
{
- struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
- unsigned long spl_flags;
+ u32 rq_map_idx;
+ unsigned long flags;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (unlikely(!HAS_RQ(my_qp))) {
+ ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
+ my_qp, my_qp->real_qp_num, my_qp->ext_type);
+ ret = -ENODEV;
+ goto out;
+ }
/* LOCK the QUEUE */
- spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+ spin_lock_irqsave(&my_qp->spinlock_r, flags);
- /* loop processes list of send reqs */
- for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
- cur_recv_wr = cur_recv_wr->next) {
+ /* loop processes list of recv reqs */
+ while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_recv_wr)
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -ENOMEM;
- ehca_err(qp->device, "Too many posted WQEs "
- "qp_num=%x", qp->qp_num);
- }
+ ret = -ENOMEM;
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
+ /*
+ * Get the index of the WQE in the recv queue. The same index
+ * is used for writing into the rq_map.
+ */
+ rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
+
/* write a RECV WQE into the QUEUE */
- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
+ rq_map_idx);
/*
* if something failed,
* reset the free entry pointer to the start value
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -EINVAL;
- ehca_err(qp->device, "Could not write WQE "
- "qp_num=%x", qp->qp_num);
- }
+ ret = -EINVAL;
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
+
+ qmap_entry = &my_qp->rq_map.map[rq_map_idx];
+ qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 1;
+
wqe_cnt++;
- ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
- my_qp, qp->qp_num, wqe_cnt);
- } /* eof for cur_recv_wr */
+ recv_wr = recv_wr->next;
+ } /* eof for recv_wr */
post_recv_exit0:
- spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
iosync(); /* serialize GAL register access */
hipz_update_rqa(my_qp, wqe_cnt);
+ if (unlikely(ret || ehca_debug_level >= 2))
+ ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
+ my_qp, my_qp->real_qp_num, wqe_cnt, ret);
+ spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+ if (ret)
+ *bad_recv_wr = recv_wr;
+
return ret;
}
+int ehca_post_recv(struct ib_qp *qp,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+
+ /* Reject WR if QP is in RESET state */
+ if (unlikely(my_qp->state == IB_QPS_RESET)) {
+ ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
+ my_qp->state, qp->qp_num);
+ *bad_recv_wr = recv_wr;
+ return -EINVAL;
+ }
+
+ return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
+}
+
+int ehca_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
+ srq->device, recv_wr, bad_recv_wr);
+}
+
/*
* ib_wc_opcode table converts ehca wc opcode to ib
* Since we use zero to indicate invalid opcode, the actual ib opcode must
@@ -491,19 +624,23 @@ static const u8 ib_wc_opcode[255] = {
/* internal function to poll one entry of cq */
static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
{
- int ret = 0;
+ int ret = 0, qmap_tail_idx;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
- int cqe_count = 0;
+ struct ehca_qp *my_qp;
+ struct ehca_qmap_entry *qmap_entry;
+ struct ehca_queue_map *qmap;
+ int cqe_count = 0, is_error;
-poll_cq_one_read_cqe:
+repoll:
cqe = (struct ehca_cqe *)
ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
if (!cqe) {
ret = -EAGAIN;
- ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
- "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
- goto poll_cq_one_exit0;
+ if (ehca_debug_level >= 3)
+ ehca_dbg(cq->device, "Completion queue is empty "
+ "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
+ goto poll_cq_one_exit0;
}
/* prevents loads being reordered across this point */
@@ -511,9 +648,11 @@ poll_cq_one_read_cqe:
cqe_count++;
if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
- struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+ struct ehca_qp *qp;
int purgeflag;
- unsigned long spl_flags;
+ unsigned long flags;
+
+ qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
if (!qp) {
ehca_err(cq->device, "cq_num=%x qp_num=%x "
"could not find qp -> ignore cqe",
@@ -521,17 +660,17 @@ poll_cq_one_read_cqe:
ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
my_cq->cq_number, cqe->local_qp_number);
/* ignore this purged cqe */
- goto poll_cq_one_read_cqe;
+ goto repoll;
}
- spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+ spin_lock_irqsave(&qp->spinlock_s, flags);
purgeflag = qp->sqerr_purgeflag;
- spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+ spin_unlock_irqrestore(&qp->spinlock_s, flags);
if (purgeflag) {
- ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
- "src_qp=%x",
+ ehca_dbg(cq->device,
+ "Got CQE with purged bit qp_num=%x src_qp=%x",
cqe->local_qp_number, cqe->remote_qp_number);
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
cqe->local_qp_number,
cqe->remote_qp_number);
@@ -540,15 +679,17 @@ poll_cq_one_read_cqe:
* that caused sqe and turn off purge flag
*/
qp->sqerr_purgeflag = 0;
- goto poll_cq_one_read_cqe;
+ goto repoll;
}
}
- /* tracing cqe */
- if (ehca_debug_level) {
+ is_error = cqe->status & WC_STATUS_ERROR_BIT;
+
+ /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
+ if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
ehca_dbg(cq->device,
- "Received COMPLETION ehca_cq=%p cq_num=%x -----",
- my_cq, my_cq->cq_number);
+ "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
+ is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
my_cq, my_cq->cq_number);
ehca_dbg(cq->device,
@@ -556,8 +697,62 @@ poll_cq_one_read_cqe:
my_cq, my_cq->cq_number);
}
- /* we got a completion! */
- wc->wr_id = cqe->work_request_id;
+ read_lock(&ehca_qp_idr_lock);
+ my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+ read_unlock(&ehca_qp_idr_lock);
+ if (!my_qp)
+ goto repoll;
+ wc->qp = &my_qp->ib_qp;
+
+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+ /* We got a send completion. */
+ qmap = &my_qp->sq_map;
+ else
+ /* We got a receive completion. */
+ qmap = &my_qp->rq_map;
+
+ /* advance the tail pointer */
+ qmap->tail = qmap_tail_idx;
+
+ if (is_error) {
+ /*
+ * set left_to_poll to 0 because in error state, we will not
+ * get any additional CQEs
+ */
+ my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+ my_qp->sq_map.entries);
+ my_qp->sq_map.left_to_poll = 0;
+ ehca_add_to_err_list(my_qp, 1);
+
+ my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+ my_qp->rq_map.entries);
+ my_qp->rq_map.left_to_poll = 0;
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+
+ qmap_entry = &qmap->map[qmap_tail_idx];
+ if (qmap_entry->reported) {
+ ehca_warn(cq->device, "Double cqe on qp_num=%#x",
+ my_qp->real_qp_num);
+ /* found a double cqe, discard it and read next one */
+ goto repoll;
+ }
+
+ wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
+ qmap_entry->reported = 1;
+
+ /* if left_to_poll is decremented to 0, add the QP to the error list */
+ if (qmap->left_to_poll > 0) {
+ qmap->left_to_poll--;
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ ehca_add_to_err_list(my_qp, 1);
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+ }
/* eval ib_wc_opcode */
wc->opcode = ib_wc_opcode[cqe->optype]-1;
@@ -569,34 +764,30 @@ poll_cq_one_read_cqe:
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
my_cq, my_cq->cq_number);
/* update also queue adder to throw away this entry!!! */
- goto poll_cq_one_exit0;
+ goto repoll;
}
+
/* eval ib_wc_status */
- if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+ if (unlikely(is_error)) {
/* complete with errors */
map_ib_wc_status(cqe->status, &wc->status);
wc->vendor_err = wc->status;
} else
wc->status = IB_WC_SUCCESS;
- wc->qp_num = cqe->local_qp_number;
wc->byte_len = cqe->nr_bytes_transferred;
wc->pkey_index = cqe->pkey_index;
wc->slid = cqe->rlid;
wc->dlid_path_bits = cqe->dlid;
wc->src_qp = cqe->remote_qp_number;
- wc->wc_flags = cqe->w_completion_flags;
- wc->imm_data = cpu_to_be32(cqe->immediate_data);
+ /*
+ * HW has "Immed data present" and "GRH present" in bits 6 and 5.
+ * SW defines those in bits 1 and 0, so we can just shift and mask.
+ */
+ wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
+ wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
- if (wc->status != IB_WC_SUCCESS)
- ehca_dbg(cq->device,
- "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
- "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
- "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
- cqe->status, cqe->local_qp_number,
- cqe->remote_qp_number, cqe->work_request_id, cqe);
-
poll_cq_one_exit0:
if (cqe_count > 0)
hipz_update_feca(my_cq, cqe_count);
@@ -604,13 +795,89 @@ poll_cq_one_exit0:
return ret;
}
+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
+ struct ib_wc *wc, int num_entries,
+ struct ipz_queue *ipz_queue, int on_sq)
+{
+ int nr = 0;
+ struct ehca_wqe *wqe;
+ u64 offset;
+ struct ehca_queue_map *qmap;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (on_sq)
+ qmap = &my_qp->sq_map;
+ else
+ qmap = &my_qp->rq_map;
+
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ while ((nr < num_entries) && (qmap_entry->reported == 0)) {
+ /* generate flush CQE */
+
+ memset(wc, 0, sizeof(*wc));
+
+ offset = qmap->next_wqe_idx * ipz_queue->qe_size;
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
+ if (!wqe) {
+ ehca_err(cq->device, "Invalid wqe offset=%#llx on "
+ "qp_num=%#x", offset, my_qp->real_qp_num);
+ return nr;
+ }
+
+ wc->wr_id = replace_wr_id(wqe->work_request_id,
+ qmap_entry->app_wr_id);
+
+ if (on_sq) {
+ switch (wqe->optype) {
+ case WQE_OPTYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case WQE_OPTYPE_RDMAWRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case WQE_OPTYPE_RDMAREAD:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ default:
+ ehca_err(cq->device, "Invalid optype=%x",
+ wqe->optype);
+ return nr;
+ }
+ } else
+ wc->opcode = IB_WC_RECV;
+
+ if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
+ wc->ex.imm_data = wqe->immediate_data;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ }
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+
+ wc->qp = &my_qp->ib_qp;
+
+ /* mark as reported and advance next_wqe pointer */
+ qmap_entry->reported = 1;
+ qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
+ qmap->entries);
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ wc++; nr++;
+ }
+
+ return nr;
+
+}
+
int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int nr;
+ struct ehca_qp *err_qp;
struct ib_wc *current_wc = wc;
int ret = 0;
- unsigned long spl_flags;
+ unsigned long flags;
+ int entries_left = num_entries;
if (num_entries < 1) {
ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -619,26 +886,52 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
goto poll_cq_exit0;
}
- spin_lock_irqsave(&my_cq->spinlock, spl_flags);
- for (nr = 0; nr < num_entries; nr++) {
+ spin_lock_irqsave(&my_cq->spinlock, flags);
+
+ /* generate flush cqes for send queues */
+ list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_squeue, 1);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ /* generate flush cqes for receive queues */
+ list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_rqueue, 0);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ for (nr = 0; nr < entries_left; nr++) {
ret = ehca_poll_cq_one(cq, current_wc);
if (ret)
break;
current_wc++;
} /* eof for nr */
- spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+ entries_left -= nr;
+
+ spin_unlock_irqrestore(&my_cq->spinlock, flags);
if (ret == -EAGAIN || !ret)
- ret = nr;
+ ret = num_entries - entries_left;
poll_cq_exit0:
return ret;
}
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+ int ret = 0;
- switch (cq_notify) {
+ switch (notify_flags & IB_CQ_SOLICITED_MASK) {
case IB_CQ_SOLICITED:
hipz_set_cqx_n0(my_cq, 1);
break;
@@ -649,5 +942,12 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
return -EINVAL;
}
- return 0;
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ unsigned long spl_flags;
+ spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+ ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
+ spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+ }
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 9f16e9c7939..dba8f9f8b99 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -39,15 +39,18 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <rdma/ib_mad.h>
-#include <linux/module.h>
-#include <linux/err.h>
#include "ehca_classes.h"
#include "ehca_tools.h"
-#include "ehca_qes.h"
#include "ehca_iverbs.h"
#include "hcp_if.h"
+#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002)
+#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004)
+#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008)
+
+#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
/**
* ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
@@ -82,10 +85,13 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
if (ret != H_SUCCESS) {
ehca_err(&shca->ib_device,
- "Can't define AQP1 for port %x. rc=%lx",
+ "Can't define AQP1 for port %x. h_ret=%lli",
port, ret);
return ret;
}
+ shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
+ ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
+ port, pma_qp_nr);
break;
default:
ehca_err(&shca->ib_device, "invalid qp_type=%x",
@@ -93,6 +99,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
return H_PARAMETER;
}
+ if (ehca_nr_ports < 0) /* autodetect mode */
+ return H_SUCCESS;
+
for (counter = 0;
shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
counter < ehca_port_act_time;
@@ -109,3 +118,120 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
return H_SUCCESS;
}
+
+struct ib_perf {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[40];
+ u8 data[192];
+} __attribute__ ((packed));
+
+/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
+struct tcslfl {
+ u32 tc:8;
+ u32 sl:4;
+ u32 fl:20;
+} __attribute__ ((packed));
+
+/* IP Version/TC/FL packed into 32 bits, as in GRH */
+struct vertcfl {
+ u32 ver:4;
+ u32 tc:8;
+ u32 fl:20;
+} __attribute__ ((packed));
+
+static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct ib_perf *in_perf = (struct ib_perf *)in_mad;
+ struct ib_perf *out_perf = (struct ib_perf *)out_mad;
+ struct ib_class_port_info *poi =
+ (struct ib_class_port_info *)out_perf->data;
+ struct tcslfl *tcslfl =
+ (struct tcslfl *)&poi->redirect_tcslfl;
+ struct ehca_shca *shca =
+ container_of(ibdev, struct ehca_shca, ib_device);
+ struct ehca_sport *sport = &shca->sport[port_num - 1];
+
+ ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
+
+ *out_mad = *in_mad;
+
+ if (in_perf->mad_hdr.class_version != 1) {
+ ehca_warn(ibdev, "Unsupported class_version=%x",
+ in_perf->mad_hdr.class_version);
+ out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
+ goto perf_reply;
+ }
+
+ switch (in_perf->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ case IB_MGMT_METHOD_SET:
+ /* set class port info for redirection */
+ out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
+ out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
+ memset(poi, 0, sizeof(*poi));
+ poi->base_version = 1;
+ poi->class_version = 1;
+ poi->resp_time_value = 18;
+
+ /* copy local routing information from WC where applicable */
+ tcslfl->sl = in_wc->sl;
+ poi->redirect_lid =
+ sport->saved_attr.lid | in_wc->dlid_path_bits;
+ poi->redirect_qp = sport->pma_qp_nr;
+ poi->redirect_qkey = IB_QP1_QKEY;
+
+ ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
+ &poi->redirect_pkey);
+
+ /* if request was globally routed, copy route info */
+ if (in_grh) {
+ struct vertcfl *vertcfl =
+ (struct vertcfl *)&in_grh->version_tclass_flow;
+ memcpy(poi->redirect_gid, in_grh->dgid.raw,
+ sizeof(poi->redirect_gid));
+ tcslfl->tc = vertcfl->tc;
+ tcslfl->fl = vertcfl->fl;
+ } else
+ /* else only fill in default GID */
+ ehca_query_gid(ibdev, port_num, 0,
+ (union ib_gid *)&poi->redirect_gid);
+
+ ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
+ sport->saved_attr.lid, sport->pma_qp_nr);
+ break;
+
+ case IB_MGMT_METHOD_GET_RESP:
+ return IB_MAD_RESULT_FAILURE;
+
+ default:
+ out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
+ break;
+ }
+
+perf_reply:
+ out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ int ret;
+
+ if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
+ return IB_MAD_RESULT_FAILURE;
+
+ /* accept only pma request */
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return IB_MAD_RESULT_SUCCESS;
+
+ ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
+ ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
+ in_mad, out_mad);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index 9f56bb846d9..d280b12aae6 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -54,15 +54,15 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
-#include <linux/version.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/device.h>
-#include <asm/abs_addr.h>
+#include <linux/atomic.h>
#include <asm/ibmebus.h>
#include <asm/io.h>
#include <asm/pgtable.h>
+#include <asm/hvcall.h>
extern int ehca_debug_level;
@@ -71,40 +71,37 @@ extern int ehca_debug_level;
if (unlikely(ehca_debug_level)) \
dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
"PU%04x EHCA_DBG:%s " format "\n", \
- get_paca()->paca_index, __FUNCTION__, \
+ raw_smp_processor_id(), __func__, \
## arg); \
} while (0)
#define ehca_info(ib_dev, format, arg...) \
dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
- get_paca()->paca_index, __FUNCTION__, ## arg)
+ raw_smp_processor_id(), __func__, ## arg)
#define ehca_warn(ib_dev, format, arg...) \
dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
- get_paca()->paca_index, __FUNCTION__, ## arg)
+ raw_smp_processor_id(), __func__, ## arg)
#define ehca_err(ib_dev, format, arg...) \
dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
- get_paca()->paca_index, __FUNCTION__, ## arg)
+ raw_smp_processor_id(), __func__, ## arg)
/* use this one only if no ib_dev available */
#define ehca_gen_dbg(format, arg...) \
do { \
if (unlikely(ehca_debug_level)) \
- printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
- get_paca()->paca_index, __FUNCTION__, ## arg); \
+ printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg); \
} while (0)
#define ehca_gen_warn(format, arg...) \
- do { \
- if (unlikely(ehca_debug_level)) \
- printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
- get_paca()->paca_index, __FUNCTION__, ## arg); \
- } while (0)
+ printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg)
#define ehca_gen_err(format, arg...) \
printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
- get_paca()->paca_index, __FUNCTION__, ## arg)
+ raw_smp_processor_id(), __func__, ## arg)
/**
* ehca_dmp - printk a memory block, whose length is n*8 bytes.
@@ -112,30 +109,30 @@ extern int ehca_debug_level;
* <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
*/
#define ehca_dmp(adr, len, format, args...) \
- do { \
- unsigned int x; \
+ do { \
+ unsigned int x; \
unsigned int l = (unsigned int)(len); \
- unsigned char *deb = (unsigned char*)(adr); \
+ unsigned char *deb = (unsigned char *)(adr); \
for (x = 0; x < l; x += 16) { \
- printk("EHCA_DMP:%s" format \
- " adr=%p ofs=%04x %016lx %016lx\n", \
- __FUNCTION__, ##args, deb, x, \
+ printk(KERN_INFO "EHCA_DMP:%s " format \
+ " adr=%p ofs=%04x %016llx %016llx\n", \
+ __func__, ##args, deb, x, \
*((u64 *)&deb[0]), *((u64 *)&deb[8])); \
deb += 16; \
} \
} while (0)
/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
/**
* EHCA_BMASK_SET - return value shifted and masked by mask
@@ -143,30 +140,16 @@ extern int ehca_debug_level;
* variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
* in variable
*/
-#define EHCA_BMASK_SET(mask,value) \
- ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+#define EHCA_BMASK_SET(mask, value) \
+ ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
/**
* EHCA_BMASK_GET - extract a parameter from value by mask
*/
-#define EHCA_BMASK_GET(mask,value) \
- (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
-
+#define EHCA_BMASK_GET(mask, value) \
+ (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
/* Converts ehca to ib return code */
-static inline int ehca2ib_return_code(u64 ehca_rc)
-{
- switch (ehca_rc) {
- case H_SUCCESS:
- return 0;
- case H_BUSY:
- return -EBUSY;
- case H_NO_MEM:
- return -ENOMEM;
- default:
- return -EINVAL;
- }
-}
-
+int ehca2ib_return_code(u64 ehca_rc);
#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index e08764e4aef..1a1d5d99fcf 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -40,7 +40,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <asm/current.h>
+#include <linux/slab.h>
#include "ehca_classes.h"
#include "ehca_iverbs.h"
@@ -68,244 +68,235 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context)
return 0;
}
-struct page *ehca_nopage(struct vm_area_struct *vma,
- unsigned long address, int *type)
+static void ehca_mm_open(struct vm_area_struct *vma)
{
- struct page *mypage = NULL;
- u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
- u32 idr_handle = fileoffset >> 32;
- u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
- u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
- u32 cur_pid = current->tgid;
- unsigned long flags;
- struct ehca_cq *cq;
- struct ehca_qp *qp;
- struct ehca_pd *pd;
- u64 offset;
- void *vaddr;
+ u32 *count = (u32 *)vma->vm_private_data;
+ if (!count) {
+ ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ return;
+ }
+ (*count)++;
+ if (!(*count))
+ ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+ vma->vm_start, vma->vm_end, *count);
+}
- switch (q_type) {
- case 1: /* CQ */
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- cq = idr_find(&ehca_cq_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+static void ehca_mm_close(struct vm_area_struct *vma)
+{
+ u32 *count = (u32 *)vma->vm_private_data;
+ if (!count) {
+ ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ return;
+ }
+ (*count)--;
+ ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+ vma->vm_start, vma->vm_end, *count);
+}
- /* make sure this mmap really belongs to the authorized user */
- if (!cq) {
- ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
- return NOPAGE_SIGBUS;
+static const struct vm_operations_struct vm_ops = {
+ .open = ehca_mm_open,
+ .close = ehca_mm_close,
+};
+
+static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
+ u32 *mm_count)
+{
+ int ret;
+ u64 vsize, physical;
+
+ vsize = vma->vm_end - vma->vm_start;
+ if (vsize < EHCA_PAGESIZE) {
+ ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
+ return -EINVAL;
+ }
+
+ physical = galpas->user.fw_handle;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
+ /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
+ ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
+ vma->vm_page_prot);
+ if (unlikely(ret)) {
+ ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
+ return -ENOMEM;
+ }
+
+ vma->vm_private_data = mm_count;
+ (*mm_count)++;
+ vma->vm_ops = &vm_ops;
+
+ return 0;
+}
+
+static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
+ u32 *mm_count)
+{
+ int ret;
+ u64 start, ofs;
+ struct page *page;
+
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ start = vma->vm_start;
+ for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
+ u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
+ page = virt_to_page(virt_addr);
+ ret = vm_insert_page(vma, start, page);
+ if (unlikely(ret)) {
+ ehca_gen_err("vm_insert_page() failed rc=%i", ret);
+ return ret;
}
+ start += PAGE_SIZE;
+ }
+ vma->vm_private_data = mm_count;
+ (*mm_count)++;
+ vma->vm_ops = &vm_ops;
- if (cq->ownpid != cur_pid) {
+ return 0;
+}
+
+static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
+ u32 rsrc_type)
+{
+ int ret;
+
+ switch (rsrc_type) {
+ case 0: /* galpa fw handle */
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
+ ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
+ if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
- "Invalid caller pid=%x ownpid=%x",
- cur_pid, cq->ownpid);
- return NOPAGE_SIGBUS;
+ "ehca_mmap_fw() failed rc=%i cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
}
+ break;
- if (rsrc_type == 2) {
- ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
- offset = address - vma->vm_start;
- vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
- ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
- offset, vaddr);
- mypage = virt_to_page(vaddr);
+ case 1: /* cq queue_addr */
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
+ ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_queue() failed rc=%i cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
}
break;
- case 2: /* QP */
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
- qp = idr_find(&ehca_qp_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ default:
+ ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
+ rsrc_type, cq->cq_number);
+ return -EINVAL;
+ }
- /* make sure this mmap really belongs to the authorized user */
- if (!qp) {
- ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
- return NOPAGE_SIGBUS;
+ return 0;
+}
+
+static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
+ u32 rsrc_type)
+{
+ int ret;
+
+ switch (rsrc_type) {
+ case 0: /* galpa fw handle */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
+ ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "remap_pfn_range() failed ret=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return -ENOMEM;
}
+ break;
- pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
- if (pd->ownpid != cur_pid) {
+ case 1: /* qp rqueue_addr */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
+ ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
+ &qp->mm_count_rqueue);
+ if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
- "Invalid caller pid=%x ownpid=%x",
- cur_pid, pd->ownpid);
- return NOPAGE_SIGBUS;
+ "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
}
+ break;
- if (rsrc_type == 2) { /* rqueue */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
- offset = address - vma->vm_start;
- vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
- ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
- offset, vaddr);
- mypage = virt_to_page(vaddr);
- } else if (rsrc_type == 3) { /* squeue */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
- offset = address - vma->vm_start;
- vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
- ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
- offset, vaddr);
- mypage = virt_to_page(vaddr);
+ case 2: /* qp squeue_addr */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
+ ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
+ &qp->mm_count_squeue);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
}
break;
default:
- ehca_gen_err("bad queue type %x", q_type);
- return NOPAGE_SIGBUS;
- }
-
- if (!mypage) {
- ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
- return NOPAGE_SIGBUS;
+ ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
+ rsrc_type, qp->ib_qp.qp_num);
+ return -EINVAL;
}
- get_page(mypage);
- return mypage;
+ return 0;
}
-static struct vm_operations_struct ehcau_vm_ops = {
- .nopage = ehca_nopage,
-};
-
int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
- u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
- u32 idr_handle = fileoffset >> 32;
- u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
- u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
- u32 cur_pid = current->tgid;
+ u64 fileoffset = vma->vm_pgoff;
+ u32 idr_handle = fileoffset & 0x1FFFFFF;
+ u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */
+ u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
u32 ret;
- u64 vsize, physical;
- unsigned long flags;
struct ehca_cq *cq;
struct ehca_qp *qp;
- struct ehca_pd *pd;
+ struct ib_uobject *uobject;
switch (q_type) {
- case 1: /* CQ */
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ case 0: /* CQ */
+ read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ read_unlock(&ehca_cq_idr_lock);
/* make sure this mmap really belongs to the authorized user */
if (!cq)
return -EINVAL;
- if (cq->ownpid != cur_pid) {
- ehca_err(cq->ib_cq.device,
- "Invalid caller pid=%x ownpid=%x",
- cur_pid, cq->ownpid);
- return -ENOMEM;
- }
-
if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
return -EINVAL;
- switch (rsrc_type) {
- case 1: /* galpa fw handle */
- ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
- vma->vm_flags |= VM_RESERVED;
- vsize = vma->vm_end - vma->vm_start;
- if (vsize != EHCA_PAGESIZE) {
- ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
- vma->vm_end - vma->vm_start);
- return -EINVAL;
- }
-
- physical = cq->galpas.user.fw_handle;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_IO | VM_RESERVED;
-
- ehca_dbg(cq->ib_cq.device,
- "vsize=%lx physical=%lx", vsize, physical);
- ret = remap_pfn_range(vma, vma->vm_start,
- physical >> PAGE_SHIFT, vsize,
- vma->vm_page_prot);
- if (ret) {
- ehca_err(cq->ib_cq.device,
- "remap_pfn_range() failed ret=%x",
- ret);
- return -ENOMEM;
- }
- break;
-
- case 2: /* cq queue_addr */
- ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &ehcau_vm_ops;
- break;
-
- default:
- ehca_err(cq->ib_cq.device, "bad resource type %x",
- rsrc_type);
- return -EINVAL;
+ ret = ehca_mmap_cq(vma, cq, rsrc_type);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_cq() failed rc=%i cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
}
break;
- case 2: /* QP */
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ case 1: /* QP */
+ read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ read_unlock(&ehca_qp_idr_lock);
/* make sure this mmap really belongs to the authorized user */
if (!qp)
return -EINVAL;
- pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
- if (pd->ownpid != cur_pid) {
- ehca_err(qp->ib_qp.device,
- "Invalid caller pid=%x ownpid=%x",
- cur_pid, pd->ownpid);
- return -ENOMEM;
- }
-
- if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+ uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
+ if (!uobject || uobject->context != context)
return -EINVAL;
- switch (rsrc_type) {
- case 1: /* galpa fw handle */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
- vma->vm_flags |= VM_RESERVED;
- vsize = vma->vm_end - vma->vm_start;
- if (vsize != EHCA_PAGESIZE) {
- ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
- vma->vm_end - vma->vm_start);
- return -EINVAL;
- }
-
- physical = qp->galpas.user.fw_handle;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_IO | VM_RESERVED;
-
- ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
- vsize, physical);
- ret = remap_pfn_range(vma, vma->vm_start,
- physical >> PAGE_SHIFT, vsize,
- vma->vm_page_prot);
- if (ret) {
- ehca_err(qp->ib_qp.device,
- "remap_pfn_range() failed ret=%x",
- ret);
- return -ENOMEM;
- }
- break;
-
- case 2: /* qp rqueue_addr */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &ehcau_vm_ops;
- break;
-
- case 3: /* qp squeue_addr */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &ehcau_vm_ops;
- break;
-
- default:
- ehca_err(qp->ib_qp.device, "bad resource type %x",
- rsrc_type);
- return -EINVAL;
+ ret = ehca_mmap_qp(vma, qp, rsrc_type);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_qp() failed rc=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
}
break;
@@ -316,77 +307,3 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return 0;
}
-
-int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
- struct vm_area_struct **vma)
-{
- down_write(&current->mm->mmap_sem);
- *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS,
- foffset);
- up_write(&current->mm->mmap_sem);
- if (!(*mapped)) {
- ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
- foffset, length);
- return -EINVAL;
- }
-
- *vma = find_vma(current->mm, (u64)*mapped);
- if (!(*vma)) {
- down_write(&current->mm->mmap_sem);
- do_munmap(current->mm, 0, length);
- up_write(&current->mm->mmap_sem);
- ehca_gen_err("couldn't find vma queue=%p", *mapped);
- return -EINVAL;
- }
- (*vma)->vm_flags |= VM_RESERVED;
- (*vma)->vm_ops = &ehcau_vm_ops;
-
- return 0;
-}
-
-int ehca_mmap_register(u64 physical, void **mapped,
- struct vm_area_struct **vma)
-{
- int ret;
- unsigned long vsize;
- /* ehca hw supports only 4k page */
- ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
- if (ret) {
- ehca_gen_err("could'nt mmap physical=%lx", physical);
- return ret;
- }
-
- (*vma)->vm_flags |= VM_RESERVED;
- vsize = (*vma)->vm_end - (*vma)->vm_start;
- if (vsize != EHCA_PAGESIZE) {
- ehca_gen_err("invalid vsize=%lx",
- (*vma)->vm_end - (*vma)->vm_start);
- return -EINVAL;
- }
-
- (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
- (*vma)->vm_flags |= VM_IO | VM_RESERVED;
-
- ret = remap_pfn_range((*vma), (*vma)->vm_start,
- physical >> PAGE_SHIFT, vsize,
- (*vma)->vm_page_prot);
- if (ret) {
- ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
- return -ENOMEM;
- }
-
- return 0;
-
-}
-
-int ehca_munmap(unsigned long addr, size_t len) {
- int ret = 0;
- struct mm_struct *mm = current->mm;
- if (mm) {
- down_write(&mm->mmap_sem);
- ret = do_munmap(mm, addr, len);
- up_write(&mm->mmap_sem);
- }
- return ret;
-}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df8..89517ffb438 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -5,6 +5,7 @@
*
* Authors: Christoph Raisch <raisch@de.ibm.com>
* Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
* Gerd Bayer <gerd.bayer@de.ibm.com>
* Waleri Fomin <fomin@de.ibm.com>
*
@@ -51,10 +52,13 @@
#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11)
#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12)
#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17)
#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18)
#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21)
#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23)
#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
+#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63)
#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15)
@@ -62,6 +66,12 @@
#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39)
#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47)
+#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63)
+#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64)
+#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63)
+
#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63)
#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15)
@@ -70,30 +80,15 @@
#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
-/* direct access qp controls */
-#define DAQP_CTRL_ENABLE 0x01
-#define DAQP_CTRL_SEND_COMP 0x20
-#define DAQP_CTRL_RECV_COMP 0x40
-
-static u32 get_longbusy_msecs(int longbusy_rc)
-{
- switch (longbusy_rc) {
- case H_LONG_BUSY_ORDER_1_MSEC:
- return 1;
- case H_LONG_BUSY_ORDER_10_MSEC:
- return 10;
- case H_LONG_BUSY_ORDER_100_MSEC:
- return 100;
- case H_LONG_BUSY_ORDER_1_SEC:
- return 1000;
- case H_LONG_BUSY_ORDER_10_SEC:
- return 10000;
- case H_LONG_BUSY_ORDER_100_SEC:
- return 100000;
- default:
- return 1;
- }
-}
+#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47)
+#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
+#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
+
+#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
+#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
+#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
+
+static DEFINE_SPINLOCK(hcall_lock);
static long ehca_plpar_hcall_norets(unsigned long opcode,
unsigned long arg1,
@@ -106,15 +101,23 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
{
long ret;
int i, sleep_msecs;
+ unsigned long flags = 0;
- ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
- "arg5=%lx arg6=%lx arg7=%lx",
- opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
+ opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
for (i = 0; i < 5; i++) {
+ /* serialize hCalls to work around firmware issue */
+ if (ehca_lock_hcalls)
+ spin_lock_irqsave(&hcall_lock, flags);
+
ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
arg5, arg6, arg7);
+ if (ehca_lock_hcalls)
+ spin_unlock_irqrestore(&hcall_lock, flags);
+
if (H_IS_LONG_BUSY(ret)) {
sleep_msecs = get_longbusy_msecs(ret);
msleep_interruptible(sleep_msecs);
@@ -122,16 +125,14 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
}
if (ret < H_SUCCESS)
- ehca_gen_err("opcode=%lx ret=%lx"
- " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
- " arg5=%lx arg6=%lx arg7=%lx ",
- opcode, ret,
- arg1, arg2, arg3, arg4, arg5,
- arg6, arg7);
-
- ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
- return ret;
+ ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
+ opcode, ret, arg1, arg2, arg3,
+ arg4, arg5, arg6, arg7);
+ else
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
+ return ret;
}
return H_BUSY;
@@ -151,49 +152,50 @@ static long ehca_plpar_hcall9(unsigned long opcode,
{
long ret;
int i, sleep_msecs;
+ unsigned long flags = 0;
- ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
- "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
- opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
- arg8, arg9);
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
+ arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9);
for (i = 0; i < 5; i++) {
+ /* serialize hCalls to work around firmware issue */
+ if (ehca_lock_hcalls)
+ spin_lock_irqsave(&hcall_lock, flags);
+
ret = plpar_hcall9(opcode, outs,
arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9);
+ if (ehca_lock_hcalls)
+ spin_unlock_irqrestore(&hcall_lock, flags);
+
if (H_IS_LONG_BUSY(ret)) {
sleep_msecs = get_longbusy_msecs(ret);
msleep_interruptible(sleep_msecs);
continue;
}
- if (ret < H_SUCCESS)
- ehca_gen_err("opcode=%lx ret=%lx"
- " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
- " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
- " arg9=%lx"
- " out1=%lx out2=%lx out3=%lx out4=%lx"
- " out5=%lx out6=%lx out7=%lx out8=%lx"
- " out9=%lx",
- opcode, ret,
- arg1, arg2, arg3, arg4, arg5,
- arg6, arg7, arg8, arg9,
- outs[0], outs[1], outs[2], outs[3],
+ if (ret < H_SUCCESS) {
+ ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
+ opcode, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9);
+ ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
+ ret, outs[0], outs[1], outs[2], outs[3],
+ outs[4], outs[5], outs[6], outs[7],
+ outs[8]);
+ } else if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
+ ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7],
outs[8]);
-
- ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
- "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
- "out9=%lx",
- opcode, ret, outs[0], outs[1], outs[2], outs[3],
- outs[4], outs[5], outs[6], outs[7], outs[8]);
return ret;
-
}
return H_BUSY;
}
+
u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
struct ehca_pfeq *pfeq,
const u32 neq_control,
@@ -204,7 +206,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
u32 *eq_ist)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
u64 allocate_controls;
/* resource type */
@@ -227,7 +229,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
*eq_ist = (u32)outs[5];
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resource - ret=%lx ", ret);
+ ehca_gen_err("Not enough resource - ret=%lli ", ret);
return ret;
}
@@ -247,8 +249,9 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
struct ehca_cq *cq,
struct ehca_alloc_cq_parms *param)
{
+ int rc;
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
@@ -261,81 +264,109 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
param->act_nr_of_entries = (u32)outs[3];
param->act_pages = (u32)outs[4];
- if (ret == H_SUCCESS)
- hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+ if (ret == H_SUCCESS) {
+ rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
+ if (rc) {
+ ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
+ rc, outs[5]);
+
+ ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ cq->ipz_cq_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+ ret = H_NO_MEM;
+ }
+ }
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resources. ret=%lx", ret);
+ ehca_gen_err("Not enough resources. ret=%lli", ret);
return ret;
}
u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
- struct ehca_qp *qp,
- struct ehca_alloc_qp_parms *parms)
+ struct ehca_alloc_qp_parms *parms, int is_user)
{
+ int rc;
u64 ret;
- u64 allocate_controls;
- u64 max_r10_reg;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
- u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
- u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
- int daqp_ctrl = parms->daqp_ctrl;
+ u64 allocate_controls, max_r10_reg, r11, r12;
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
allocate_controls =
- EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
- (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+ EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
+ parms->squeue.page_size)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
+ parms->rqueue.page_size)
| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
- (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+ !!(parms->ll_comp_flags & LLQP_RECV_COMP))
| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
- (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+ !!(parms->ll_comp_flags & LLQP_SEND_COMP))
| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
parms->ud_av_l_key_ctl)
| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
max_r10_reg =
EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
- max_nr_send_wqes)
+ parms->squeue.max_wr + 1)
| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
- max_nr_receive_wqes)
+ parms->rqueue.max_wr + 1)
| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
- parms->max_send_sge)
+ parms->squeue.max_sge)
| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
- parms->max_recv_sge);
+ parms->rqueue.max_sge);
+
+ r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
+
+ if (parms->ext_type == EQPT_SRQ)
+ r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
+ else
+ r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
allocate_controls, /* r5 */
- qp->send_cq->ipz_cq_handle.handle,
- qp->recv_cq->ipz_cq_handle.handle,
- parms->ipz_eq_handle.handle,
- ((u64)qp->token << 32) | parms->pd.value,
- max_r10_reg, /* r10 */
- parms->ud_av_l_key_ctl, /* r11 */
- 0);
- qp->ipz_qp_handle.handle = outs[0];
- qp->real_qp_num = (u32)outs[1];
- parms->act_nr_send_sges =
+ parms->send_cq_handle.handle,
+ parms->recv_cq_handle.handle,
+ parms->eq_handle.handle,
+ ((u64)parms->token << 32) | parms->pd.value,
+ max_r10_reg, r11, r12);
+
+ parms->qp_handle.handle = outs[0];
+ parms->real_qp_num = (u32)outs[1];
+ parms->squeue.act_nr_wqes =
(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
- parms->act_nr_recv_wqes =
+ parms->rqueue.act_nr_wqes =
(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
- parms->act_nr_send_sges =
+ parms->squeue.act_nr_sges =
(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
- parms->act_nr_recv_sges =
+ parms->rqueue.act_nr_sges =
(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
- parms->nr_sq_pages =
+ parms->squeue.queue_size =
(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
- parms->nr_rq_pages =
+ parms->rqueue.queue_size =
(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
- if (ret == H_SUCCESS)
- hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+ if (ret == H_SUCCESS) {
+ rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
+ if (rc) {
+ ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
+ rc, outs[6]);
+
+ ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ parms->qp_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+ ret = H_NO_MEM;
+ }
+ }
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resources. ret=%lx", ret);
+ ehca_gen_err("Not enough resources. ret=%lli", ret);
return ret;
}
@@ -345,7 +376,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_port *query_port_response_block)
{
u64 ret;
- u64 r_cb = virt_to_abs(query_port_response_block);
+ u64 r_cb = __pa(query_port_response_block);
if (r_cb & (EHCA_PAGESIZE-1)) {
ehca_gen_err("response block not page aligned");
@@ -358,16 +389,36 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
r_cb, /* r6 */
0, 0, 0, 0);
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(query_port_response_block, 64, "response_block");
return ret;
}
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id, const u32 port_cap,
+ const u8 init_type, const int modify_mask)
+{
+ u64 port_attributes = port_cap;
+
+ if (modify_mask & IB_PORT_SHUTDOWN)
+ port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
+ if (modify_mask & IB_PORT_INIT_TYPE)
+ port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
+ if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
+
+ return ehca_plpar_hcall_norets(H_MODIFY_PORT,
+ adapter_handle.handle, /* r4 */
+ port_id, /* r5 */
+ port_attributes, /* r6 */
+ 0, 0, 0, 0);
+}
+
u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_hca *query_hca_rblock)
{
- u64 r_cb = virt_to_abs(query_hca_rblock);
+ u64 r_cb = __pa(query_hca_rblock);
if (r_cb & (EHCA_PAGESIZE-1)) {
ehca_gen_err("response_block=%p not page aligned",
@@ -390,7 +441,8 @@ u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
{
return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
adapter_handle.handle, /* r4 */
- queue_type | pagesize << 8, /* r5 */
+ (u64)queue_type | ((u64)pagesize) << 8,
+ /* r5 */
resource_handle, /* r6 */
logical_address_of_page, /* r7 */
count, /* r8 */
@@ -406,7 +458,7 @@ u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
const u64 count)
{
if (count != 1) {
- ehca_gen_err("Ppage counter=%lx", count);
+ ehca_gen_err("Ppage counter=%llx", count);
return H_PARAMETER;
}
return hipz_h_register_rpage(adapter_handle,
@@ -441,7 +493,7 @@ u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
const struct h_galpa gal)
{
if (count != 1) {
- ehca_gen_err("Page counter=%lx", count);
+ ehca_gen_err("Page counter=%llx", count);
return H_PARAMETER;
}
@@ -459,13 +511,13 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
const u64 count,
const struct h_galpa galpa)
{
- if (count != 1) {
- ehca_gen_err("Page counter=%lx", count);
+ if (count > 1) {
+ ehca_gen_err("Page counter=%llx", count);
return H_PARAMETER;
}
- return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
- qp_handle.handle,logical_address_of_page,
+ return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+ qp_handle.handle, logical_address_of_page,
count);
}
@@ -477,7 +529,7 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
int dis_and_get_function_code)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
adapter_handle.handle, /* r4 */
@@ -485,9 +537,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
qp_handle.handle, /* r6 */
0, 0, 0, 0, 0, 0);
if (log_addr_next_sq_wqe2processed)
- *log_addr_next_sq_wqe2processed = (void*)outs[0];
+ *log_addr_next_sq_wqe2processed = (void *)outs[0];
if (log_addr_next_rq_wqe2processed)
- *log_addr_next_rq_wqe2processed = (void*)outs[1];
+ *log_addr_next_rq_wqe2processed = (void *)outs[1];
return ret;
}
@@ -500,16 +552,16 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
struct h_galpa gal)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
adapter_handle.handle, /* r4 */
qp_handle.handle, /* r5 */
update_mask, /* r6 */
- virt_to_abs(mqpcb), /* r7 */
+ __pa(mqpcb), /* r7 */
0, 0, 0, 0, 0);
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Insufficient resources ret=%lx", ret);
+ ehca_gen_err("Insufficient resources ret=%lli", ret);
return ret;
}
@@ -523,7 +575,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
return ehca_plpar_hcall_norets(H_QUERY_QP,
adapter_handle.handle, /* r4 */
qp_handle.handle, /* r5 */
- virt_to_abs(qqpcb), /* r6 */
+ __pa(qqpcb), /* r6 */
0, 0, 0, 0);
}
@@ -531,7 +583,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
struct ehca_qp *qp)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = hcp_galpas_dtor(&qp->galpas);
if (ret) {
@@ -545,7 +597,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
qp->ipz_qp_handle.handle, /* r6 */
0, 0, 0, 0, 0, 0);
if (ret == H_HARDWARE)
- ehca_gen_err("HCA not operational. ret=%lx", ret);
+ ehca_gen_err("HCA not operational. ret=%lli", ret);
ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
adapter_handle.handle, /* r4 */
@@ -553,7 +605,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_RESOURCE)
- ehca_gen_err("Resource still in use. ret=%lx", ret);
+ ehca_gen_err("Resource still in use. ret=%lli", ret);
return ret;
}
@@ -577,7 +629,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
u32 * bma_qp_nr)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
adapter_handle.handle, /* r4 */
@@ -588,7 +640,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
*bma_qp_nr = (u32)outs[1];
if (ret == H_ALIAS_EXIST)
- ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+ ehca_gen_err("AQP1 already exists. ret=%lli", ret);
return ret;
}
@@ -610,7 +662,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
0, 0);
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resources. ret=%lx", ret);
+ ehca_gen_err("Not enough resources. ret=%lli", ret);
return ret;
}
@@ -649,7 +701,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0);
if (ret == H_RESOURCE)
- ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+ ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
return ret;
}
@@ -671,7 +723,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_RESOURCE)
- ehca_gen_err("Resource in use. ret=%lx ", ret);
+ ehca_gen_err("Resource in use. ret=%lli ", ret);
return ret;
}
@@ -685,7 +737,7 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
@@ -711,11 +763,24 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
{
u64 ret;
+ if (unlikely(ehca_debug_level >= 3)) {
+ if (count > 1) {
+ u64 *kpage;
+ int i;
+ kpage = __va(logical_address_of_page);
+ for (i = 0; i < count; i++)
+ ehca_gen_dbg("kpage[%d]=%p",
+ i, (void *)kpage[i]);
+ } else
+ ehca_gen_dbg("kpage=%p",
+ (void *)logical_address_of_page);
+ }
+
if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
ehca_gen_err("logical_address_of_page not on a 4k boundary "
- "adapter_handle=%lx mr=%p mr_handle=%lx "
+ "adapter_handle=%llx mr=%p mr_handle=%llx "
"pagesize=%x queue_type=%x "
- "logical_address_of_page=%lx count=%lx",
+ "logical_address_of_page=%llx count=%llx",
adapter_handle.handle, mr,
mr->ipz_mr_handle.handle, pagesize, queue_type,
logical_address_of_page, count);
@@ -733,7 +798,7 @@ u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
adapter_handle.handle, /* r4 */
@@ -767,7 +832,7 @@ u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
adapter_handle.handle, /* r4 */
@@ -794,7 +859,7 @@ u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
adapter_handle.handle, /* r4 */
@@ -816,7 +881,7 @@ u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
struct ehca_mw_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
@@ -834,7 +899,7 @@ u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
struct ehca_mw_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
adapter_handle.handle, /* r4 */
@@ -859,7 +924,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
void *rblock,
unsigned long *byte_count)
{
- u64 r_cb = virt_to_abs(rblock);
+ u64 r_cb = __pa(rblock);
if (r_cb & (EHCA_PAGESIZE-1)) {
ehca_gen_err("rblock not page aligned.");
@@ -872,3 +937,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
r_cb,
0, 0, 0, 0);
}
+
+u64 hipz_h_eoi(int irq)
+{
+ unsigned long xirr;
+
+ iosync();
+ xirr = (0xffULL << 24) | irq;
+
+ return plpar_hcall_norets(H_EOI, xirr);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd47095..a46e514c367 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -49,7 +49,7 @@
#include "hipz_hw.h"
/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
* resources, create the empty EQPT (ring).
*/
u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
@@ -78,13 +78,16 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
* initialize resources, create empty QPPTs (2 rings).
*/
u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
- struct ehca_qp *qp,
- struct ehca_alloc_qp_parms *parms);
+ struct ehca_alloc_qp_parms *parms, int is_user);
u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
const u8 port_id,
struct hipz_query_port *query_port_response_block);
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id, const u32 port_cap,
+ const u8 init_type, const int modify_mask);
+
u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_hca *query_hca_rblock);
@@ -257,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
const u64 ressource_handle,
void *rblock,
unsigned long *byte_count);
+u64 hipz_h_eoi(int irq);
#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
index 0b1a4772c78..077376ff3d2 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -42,24 +42,26 @@
#include "ehca_classes.h"
#include "hipz_hw.h"
-int hcall_map_page(u64 physaddr, u64 *mapaddr)
+u64 hcall_map_page(u64 physaddr)
{
- *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
- return 0;
+ return (u64)ioremap(physaddr, EHCA_PAGESIZE);
}
int hcall_unmap_page(u64 mapaddr)
{
- iounmap((volatile void __iomem*)mapaddr);
+ iounmap((volatile void __iomem *) mapaddr);
return 0;
}
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
u64 paddr_kernel, u64 paddr_user)
{
- int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
- if (ret)
- return ret;
+ if (!is_user) {
+ galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
+ if (!galpas->kernel.fw_handle)
+ return -ENOMEM;
+ } else
+ galpas->kernel.fw_handle = 0;
galpas->user.fw_handle = paddr_user;
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
index 5305c2a3ed9..d1b02991024 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.h
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -78,12 +78,12 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
*(volatile u64 __force *)addr = value;
}
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
u64 paddr_kernel, u64 paddr_user);
int hcp_galpas_dtor(struct h_galpas *galpas);
-int hcall_map_page(u64 physaddr, u64 * mapaddr);
+u64 hcall_map_page(u64 physaddr);
int hcall_unmap_page(u64 mapaddr);
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
index 20898a15344..868735fd318 100644
--- a/drivers/infiniband/hw/ehca/hipz_fns_core.h
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -53,10 +53,10 @@
#define hipz_galpa_load_cq(gal, offset) \
hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-#define hipz_galpa_store_qp(gal,offset, value) \
+#define hipz_galpa_store_qp(gal, offset, value) \
hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
#define hipz_galpa_load_qp(gal, offset) \
- hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+ hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
{
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
index 3fc92b031c5..bf996c7acc4 100644
--- a/drivers/infiniband/hw/ehca/hipz_hw.h
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -45,6 +45,8 @@
#include "ehca_tools.h"
+#define EHCA_MAX_MTU 4
+
/* QP Table Entry Memory Map */
struct hipz_qptemm {
u64 qpx_hcr;
@@ -159,10 +161,11 @@ struct hipz_qptemm {
/* 0x1000 */
};
-#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
+#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
/* MRMWPT Entry Memory Map */
struct hipz_mrmwmm {
@@ -184,7 +187,7 @@ struct hipz_mrmwmm {
};
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
struct hipz_qpedmm {
/* 0x00 */
@@ -235,7 +238,7 @@ struct hipz_qpedmm {
u64 qpedx_rrva3;
};
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
/* CQ Table Entry Memory Map */
struct hipz_cqtemm {
@@ -260,12 +263,12 @@ struct hipz_cqtemm {
/* 0x1000 */
};
-#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63)
-#define CQX_FECADDER EHCA_BMASK_IBM(32,63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63)
+#define CQX_FECADDER EHCA_BMASK_IBM(32, 63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
/* EQ Table Entry Memory Map */
struct hipz_eqtemm {
@@ -290,7 +293,7 @@ struct hipz_eqtemm {
};
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
/* access control defines for MR/MW */
#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000
@@ -358,6 +361,25 @@ struct hipz_query_hca {
u32 max_neq;
} __attribute__ ((packed));
+#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0)
+#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1)
+#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2)
+#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3)
+#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4)
+#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5)
+#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6)
+#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7)
+#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8)
+#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9)
+#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10)
+#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11)
+#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12)
+#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13)
+#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16)
+#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17)
+#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18)
+#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19)
+
/* query port response block */
struct hipz_query_port {
u32 state;
@@ -381,7 +403,11 @@ struct hipz_query_port {
u64 max_msg_sz;
u32 max_mtu;
u32 vl_cap;
- u8 reserved2[1900];
+ u32 phys_pstate;
+ u32 phys_state;
+ u32 phys_speed;
+ u32 phys_width;
+ u8 reserved2[1884];
u64 guid_entries[255];
} __attribute__ ((packed));
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index e028ff1588c..8d594517cd2 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -38,8 +38,15 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_tools.h"
#include "ipz_pt_fn.h"
+#include "ehca_classes.h"
+
+#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
+
+struct kmem_cache *small_qp_cache;
void *ipz_qpageit_get_inc(struct ipz_queue *queue)
{
@@ -49,7 +56,7 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue)
queue->current_q_offset -= queue->pagesize;
ret = NULL;
}
- if (((u64)ret) % EHCA_PAGESIZE) {
+ if (((u64)ret) % queue->pagesize) {
ehca_gen_err("ERROR!! not at PAGE-Boundary");
return NULL;
}
@@ -70,80 +77,219 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
return ret;
}
-int ipz_queue_ctor(struct ipz_queue *queue,
- const u32 nr_of_pages,
- const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
{
- int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
- int f;
-
- if (pagesize > PAGE_SIZE) {
- ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
- "than kernel page size", pagesize);
- return 0;
- }
- if (!pages_per_kpage) {
- ehca_gen_err("FATAL ERROR: invalid kernel page size. "
- "pages_per_kpage=%x", pages_per_kpage);
- return 0;
- }
- queue->queue_length = nr_of_pages * pagesize;
- queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
- if (!queue->queue_pages) {
- ehca_gen_err("ERROR!! didn't get the memory");
- return 0;
+ int i;
+ for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
+ u64 page = __pa(queue->queue_pages[i]);
+ if (addr >= page && addr < page + queue->pagesize) {
+ *q_offset = addr - page + i * queue->pagesize;
+ return 0;
+ }
}
- memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
- /*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
- f = 0;
+ return -EINVAL;
+}
+
+#if PAGE_SHIFT < EHCA_PAGESHIFT
+#error Kernel pages must be at least as large than eHCA pages (4K) !
+#endif
+
+/*
+ * allocate pages for queue:
+ * outer loop allocates whole kernel pages (page aligned) and
+ * inner loop divides a kernel page into smaller hca queue pages
+ */
+static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
+{
+ int k, f = 0;
+ u8 *kpage;
+
while (f < nr_of_pages) {
- u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
- int k;
+ kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
if (!kpage)
- goto ipz_queue_ctor_exit0; /*NOMEM*/
- for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
- (queue->queue_pages)[f] = (struct ipz_page *)kpage;
+ goto out;
+
+ for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
+ queue->queue_pages[f] = (struct ipz_page *)kpage;
kpage += EHCA_PAGESIZE;
f++;
}
}
+ return 1;
- queue->current_q_offset = 0;
+out:
+ for (f = 0; f < nr_of_pages && queue->queue_pages[f];
+ f += PAGES_PER_KPAGE)
+ free_page((unsigned long)(queue->queue_pages)[f]);
+ return 0;
+}
+
+static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
+{
+ int order = ilog2(queue->pagesize) - 9;
+ struct ipz_small_queue_page *page;
+ unsigned long bit;
+
+ mutex_lock(&pd->lock);
+
+ if (!list_empty(&pd->free[order]))
+ page = list_entry(pd->free[order].next,
+ struct ipz_small_queue_page, list);
+ else {
+ page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
+ if (!page)
+ goto out;
+
+ page->page = get_zeroed_page(GFP_KERNEL);
+ if (!page->page) {
+ kmem_cache_free(small_qp_cache, page);
+ goto out;
+ }
+
+ list_add(&page->list, &pd->free[order]);
+ }
+
+ bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
+ __set_bit(bit, page->bitmap);
+ page->fill++;
+
+ if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
+ list_move(&page->list, &pd->full[order]);
+
+ mutex_unlock(&pd->lock);
+
+ queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
+ queue->small_page = page;
+ queue->offset = bit << (order + 9);
+ return 1;
+
+out:
+ ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+ mutex_unlock(&pd->lock);
+ return 0;
+}
+
+static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
+{
+ int order = ilog2(queue->pagesize) - 9;
+ struct ipz_small_queue_page *page = queue->small_page;
+ unsigned long bit;
+ int free_page = 0;
+
+ bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
+ >> (order + 9);
+
+ mutex_lock(&pd->lock);
+
+ __clear_bit(bit, page->bitmap);
+ page->fill--;
+
+ if (page->fill == 0) {
+ list_del(&page->list);
+ free_page = 1;
+ }
+
+ if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
+ /* the page was full until we freed the chunk */
+ list_move_tail(&page->list, &pd->free[order]);
+
+ mutex_unlock(&pd->lock);
+
+ if (free_page) {
+ free_page(page->page);
+ kmem_cache_free(small_qp_cache, page);
+ }
+}
+
+int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
+ const u32 nr_of_pages, const u32 pagesize,
+ const u32 qe_size, const u32 nr_of_sg,
+ int is_small)
+{
+ if (pagesize > PAGE_SIZE) {
+ ehca_gen_err("FATAL ERROR: pagesize=%x "
+ "is greater than kernel page size", pagesize);
+ return 0;
+ }
+
+ /* init queue fields */
+ queue->queue_length = nr_of_pages * pagesize;
+ queue->pagesize = pagesize;
queue->qe_size = qe_size;
queue->act_nr_of_sg = nr_of_sg;
- queue->pagesize = pagesize;
+ queue->current_q_offset = 0;
queue->toggle_state = 1;
- return 1;
+ queue->small_page = NULL;
- ipz_queue_ctor_exit0:
- ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
- queue, f, nr_of_pages);
- for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
- if (!(queue->queue_pages)[f])
- break;
- free_page((unsigned long)(queue->queue_pages)[f]);
+ /* allocate queue page pointers */
+ queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!queue->queue_pages) {
+ queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
+ if (!queue->queue_pages) {
+ ehca_gen_err("Couldn't allocate queue page list");
+ return 0;
+ }
}
+
+ /* allocate actual queue pages */
+ if (is_small) {
+ if (!alloc_small_queue_page(queue, pd))
+ goto ipz_queue_ctor_exit0;
+ } else
+ if (!alloc_queue_pages(queue, nr_of_pages))
+ goto ipz_queue_ctor_exit0;
+
+ return 1;
+
+ipz_queue_ctor_exit0:
+ ehca_gen_err("Couldn't alloc pages queue=%p "
+ "nr_of_pages=%x", queue, nr_of_pages);
+ if (is_vmalloc_addr(queue->queue_pages))
+ vfree(queue->queue_pages);
+ else
+ kfree(queue->queue_pages);
+
return 0;
}
-int ipz_queue_dtor(struct ipz_queue *queue)
+int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
{
- int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
- int g;
- int nr_pages;
+ int i, nr_pages;
if (!queue || !queue->queue_pages) {
ehca_gen_dbg("queue or queue_pages is NULL");
return 0;
}
- nr_pages = queue->queue_length / queue->pagesize;
- for (g = 0; g < nr_pages; g += pages_per_kpage)
- free_page((unsigned long)(queue->queue_pages)[g]);
- vfree(queue->queue_pages);
+
+ if (queue->small_page)
+ free_small_queue_page(queue, pd);
+ else {
+ nr_pages = queue->queue_length / queue->pagesize;
+ for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
+ free_page((unsigned long)queue->queue_pages[i]);
+ }
+
+ if (is_vmalloc_addr(queue->queue_pages))
+ vfree(queue->queue_pages);
+ else
+ kfree(queue->queue_pages);
return 1;
}
+
+int ehca_init_small_qp_cache(void)
+{
+ small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
+ sizeof(struct ipz_small_queue_page),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!small_qp_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ehca_cleanup_small_qp_cache(void)
+{
+ kmem_cache_destroy(small_qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index 2f13509d525..a801274ea33 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -51,11 +51,27 @@
#include "ehca_tools.h"
#include "ehca_qes.h"
+struct ehca_pd;
+struct ipz_small_queue_page;
+
+extern struct kmem_cache *small_qp_cache;
+
/* struct generic ehca page */
struct ipz_page {
u8 entries[EHCA_PAGESIZE];
};
+#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
+
+struct ipz_small_queue_page {
+ unsigned long page;
+ unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
+ int fill;
+ void *mapped_addr;
+ u32 mmap_count;
+ struct list_head list;
+};
+
/* struct generic queue in linux kernel virtual memory (kv) */
struct ipz_queue {
u64 current_q_offset; /* current queue entry */
@@ -66,7 +82,8 @@ struct ipz_queue {
u32 queue_length; /* queue length allocated in bytes */
u32 pagesize;
u32 toggle_state; /* toggle flag - per page */
- u32 dummy3; /* 64 bit alignment */
+ u32 offset; /* save offset within page for small_qp */
+ struct ipz_small_queue_page *small_page;
};
/*
@@ -79,7 +96,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
if (q_offset >= queue->queue_length)
return NULL;
current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
- return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+ return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
}
/*
@@ -105,7 +122,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue);
* step in struct ipz_queue, will wrap in ringbuffer
* returns address (kv) of Queue Entry BEFORE increment
* warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
*/
static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
{
@@ -121,23 +137,24 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
}
/*
+ * return a bool indicating whether current Queue Entry is valid
+ */
+static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
+{
+ struct ehca_cqe *cqe = ipz_qeit_get(queue);
+ return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
+}
+
+/*
* return current Queue Entry, increment Queue Entry iterator by one
* step in struct ipz_queue, will wrap in ringbuffer
* returns address (kv) of Queue Entry BEFORE increment
* returns 0 and does not increment, if wrong valid state
* warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
*/
static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
{
- struct ehca_cqe *cqe = ipz_qeit_get(queue);
- u32 cqe_flags = cqe->cqe_flags;
-
- if ((cqe_flags >> 7) != (queue->toggle_state & 1))
- return NULL;
-
- ipz_qeit_get_inc(queue);
- return cqe;
+ return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
}
/*
@@ -150,6 +167,21 @@ static inline void *ipz_qeit_reset(struct ipz_queue *queue)
return ipz_qeit_get(queue);
}
+/*
+ * return the q_offset corresponding to an absolute address
+ */
+int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
+
+/*
+ * return the next queue offset. don't modify the queue.
+ */
+static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
+{
+ offset += queue->qe_size;
+ if (offset >= queue->queue_length) offset = 0;
+ return offset;
+}
+
/* struct generic page table */
struct ipz_pt {
u64 entries[EHCA_PT_ENTRIES];
@@ -173,9 +205,10 @@ struct ipz_qpt {
* see ipz_qpt_ctor()
* returns true if ok, false if out of memory
*/
-int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
- const u32 pagesize, const u32 qe_size,
- const u32 nr_of_sg);
+int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
+ const u32 nr_of_pages, const u32 pagesize,
+ const u32 qe_size, const u32 nr_of_sg,
+ int is_small);
/*
* destructor for a ipz_queue_t
@@ -183,7 +216,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
* see ipz_queue_ctor()
* returns true if ok, false if queue was NULL-ptr of free failed
*/
-int ipz_queue_dtor(struct ipz_queue *queue);
+int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
/*
* constructor for a ipz_qpt_t,
@@ -225,13 +258,22 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
{
void *ret = ipz_qeit_get(queue);
- u32 qe = *(u8 *) ret;
+ u32 qe = *(u8 *)ret;
if ((qe >> 7) != (queue->toggle_state & 1))
return NULL;
ipz_qeit_eq_get_inc(queue); /* this is a good one */
return ret;
}
+static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ u32 qe = *(u8 *)ret;
+ if ((qe >> 7) != (queue->toggle_state & 1))
+ return NULL;
+ return ret;
+}
+
/* returns address (GX) of first queue entry */
static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
{