diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca')
26 files changed, 2770 insertions, 1360 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 97d108634c5..465926319f3 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -1,7 +1,7 @@ /* * IBM eServer eHCA Infiniband device driver for Linux on POWER * - * adress vector functions + * address vector functions * * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> * Khadija Souissi <souissik@de.ibm.com> @@ -41,8 +41,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ - -#include <asm/current.h> +#include <linux/slab.h> #include "ehca_tools.h" #include "ehca_iverbs.h" @@ -50,6 +49,42 @@ static struct kmem_cache *av_cache; +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { int ret; @@ -69,15 +104,13 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.slid_path_bits = ah_attr->src_path_bits; if (ehca_static_rate < 0) { - int ah_mult = ib_rate_to_mult(ah_attr->static_rate); - int ehca_mult = - ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); - - if (ah_mult >= ehca_mult) - av->av.ipd = 0; - else - av->av.ipd = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; } else av->av.ipd = ehca_static_rate; @@ -136,17 +169,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { struct ehca_av *av; struct ehca_ud_av new_ehca_av; - struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, ib_device); - u32 cur_pid = current->tgid; - - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } memset(&new_ehca_av, 0, sizeof(new_ehca_av)); new_ehca_av.sl = ah_attr->sl; @@ -208,15 +232,6 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); - struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); - u32 cur_pid = current->tgid; - - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, sizeof(ah_attr->grh.dgid)); @@ -239,16 +254,6 @@ int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) int ehca_destroy_ah(struct ib_ah *ah) { - struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); - u32 cur_pid = current->tgid; - - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } - kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index b5e96030531..bd45e0f3923 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -53,6 +53,7 @@ struct ehca_pd; struct ehca_av; #include <linux/wait.h> +#include <linux/mutex.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> @@ -65,6 +66,7 @@ struct ehca_av; #include "ehca_irq.h" #define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff struct ehca_eqe_cache_entry { struct ehca_eqe *eqe; @@ -93,20 +95,24 @@ struct ehca_sma_attr { struct ehca_sport { struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_aqp1; - enum ib_rate rate; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; enum ib_port_state port_state; struct ehca_sma_attr saved_attr; + u32 pma_qp_nr; }; -#define HCA_CAP_MR_PGSIZE_4K 1 -#define HCA_CAP_MR_PGSIZE_64K 2 -#define HCA_CAP_MR_PGSIZE_1M 4 -#define HCA_CAP_MR_PGSIZE_16M 8 +#define HCA_CAP_MR_PGSIZE_4K 0x80000000 +#define HCA_CAP_MR_PGSIZE_64K 0x40000000 +#define HCA_CAP_MR_PGSIZE_1M 0x20000000 +#define HCA_CAP_MR_PGSIZE_16M 0x10000000 struct ehca_shca { struct ib_device ib_device; - struct ibmebus_dev *ibmebus_dev; + struct platform_device *ofdev; u8 num_ports; int hw_level; struct list_head shca_list; @@ -122,12 +128,15 @@ struct ehca_shca { /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ u32 hca_cap_mr_pgsize; int max_mtu; + int max_num_qps; + int max_num_cqs; + atomic_t num_cqs; + atomic_t num_qps; }; struct ehca_pd { struct ib_pd ib_pd; struct ipz_pd fw_pd; - u32 ownpid; /* small queue mgmt */ struct mutex lock; struct list_head free[2]; @@ -141,6 +150,38 @@ enum ehca_ext_qp_type { EQPT_SRQ = 3, }; +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u8 reported; + u8 cqe_req; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ +}; + +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + struct ehca_qp { union { struct ib_qp ib_qp; @@ -148,8 +189,11 @@ struct ehca_qp { }; u32 qp_type; enum ehca_ext_qp_type ext_type; + enum ib_qp_state state; struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; struct h_galpas galpas; u32 qkey; u32 real_qp_num; @@ -164,10 +208,23 @@ struct ehca_qp { struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; /* mmap counter for resources mapped into user space */ u32 mm_count_squeue; u32 mm_count_rqueue; u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; + int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -194,10 +251,11 @@ struct ehca_cq { atomic_t nr_events; /* #events seen */ wait_queue_head_t wait_completion; spinlock_t task_lock; - u32 ownpid; /* mmap counter for resources mapped into user space */ u32 mm_count_queue; u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; }; enum ehca_mr_flag { @@ -264,7 +322,7 @@ struct ehca_mr_pginfo { } phy; struct { /* type EHCA_MR_PGI_USER section */ struct ib_umem *region; - struct ib_umem_chunk *next_chunk; + struct scatterlist *next_sg; u64 next_nmap; } usr; struct { /* type EHCA_MR_PGI_FMR section */ @@ -317,12 +375,16 @@ extern rwlock_t ehca_qp_idr_lock; extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; -extern int ehca_use_hp_mr; -extern int ehca_scaling_code; -extern int ehca_mr_largepage; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ @@ -337,6 +399,8 @@ struct ehca_create_cq_resp { u32 cq_number; u32 token; struct ipzu_queue_resp ipz_queue; + u32 fw_handle_ofs; + u32 dummy; }; struct ehca_create_qp_resp { @@ -347,7 +411,8 @@ struct ehca_create_qp_resp { u32 qkey; /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ u32 real_qp_num; - u32 dummy; /* padding for 8 byte alignment */ + u32 fw_handle_ofs; + u32 dummy; struct ipzu_queue_resp ipz_squeue; struct ipzu_queue_resp ipz_rqueue; }; diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 1798e6466bd..689c35786dd 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) #define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) #define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) #define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) #define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) @@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) #define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) #define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) #define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) #define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) #define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) #define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 81aff36101b..8cc83753776 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -43,7 +43,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <asm/current.h> +#include <linux/slab.h> #include "ehca_iverbs.h" #include "ehca_classes.h" @@ -128,16 +128,25 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, void *vpage; u32 counter; u64 rpage, cqx_fec, h_ret; - int ipz_rc, ret, i; + int ipz_rc, i; unsigned long flags; if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", shca->max_num_cqs); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); + atomic_dec(&shca->num_cqs); return ERR_PTR(-ENOMEM); } @@ -148,28 +157,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, spin_lock_init(&my_cq->task_lock); atomic_set(&my_cq->nr_events, 0); init_waitqueue_head(&my_cq->wait_completion); - my_cq->ownpid = current->tgid; cq = &my_cq->ib_cq; adapter_handle = shca->ipz_hca_handle; param.eq_handle = shca->eq.ipz_eq_handle; - do { - if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't reserve idr nr. device=%p", - device); - goto create_cq_exit1; - } - - write_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - - } while (ret == -EAGAIN); + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); - if (ret) { + if (my_cq->token < 0) { cq = ERR_PTR(-ENOMEM); ehca_err(device, "Can't allocate new idr entry. device=%p", device); @@ -185,7 +185,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (h_ret != H_SUCCESS) { ehca_err(device, "hipz_h_alloc_resource_cq() failed " - "h_ret=%lx device=%p", h_ret, device); + "h_ret=%lli device=%p", h_ret, device); cq = ERR_PTR(ehca2ib_return_code(h_ret)); goto create_cq_exit2; } @@ -193,7 +193,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); if (!ipz_rc) { - ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", ipz_rc, device); cq = ERR_PTR(-EINVAL); goto create_cq_exit3; @@ -207,7 +207,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, cq = ERR_PTR(-EAGAIN); goto create_cq_exit4; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_cq(adapter_handle, my_cq->ipz_cq_handle, @@ -221,7 +221,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (h_ret < H_SUCCESS) { ehca_err(device, "hipz_h_register_rpage_cq() failed " - "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i " + "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " "act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); cq = ERR_PTR(-EINVAL); @@ -233,7 +233,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if ((h_ret != H_SUCCESS) || vpage) { ehca_err(device, "Registration of pages not " "complete ehca_cq=%p cq_num=%x " - "h_ret=%lx", my_cq, my_cq->cq_number, + "h_ret=%lli", my_cq, my_cq->cq_number, h_ret); cq = ERR_PTR(-EAGAIN); goto create_cq_exit4; @@ -241,7 +241,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%lx" + "ehca_cq=%p cq_num=%x h_ret=%lli " "counter=%i act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); @@ -255,7 +255,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, gal = my_cq->galpas.kernel; cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); - ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx", + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", my_cq, my_cq->cq_number, cqx_fec); my_cq->ib_cq.cqe = my_cq->nr_of_entries = @@ -265,6 +265,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, for (i = 0; i < QP_HASHTAB_LEN; i++) INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; @@ -276,8 +279,11 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, resp.ipz_queue.queue_length = ipz_queue->queue_length; resp.ipz_queue.pagesize = ipz_queue->pagesize; resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + resp.fw_handle_ofs = (u32) + (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } @@ -291,7 +297,7 @@ create_cq_exit3: h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); if (h_ret != H_SUCCESS) ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " - "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); + "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); create_cq_exit2: write_lock_irqsave(&ehca_cq_idr_lock, flags); @@ -301,6 +307,7 @@ create_cq_exit2: create_cq_exit1: kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return cq; } @@ -313,7 +320,6 @@ int ehca_destroy_cq(struct ib_cq *cq) struct ehca_shca *shca = container_of(device, struct ehca_shca, ib_device); struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - u32 cur_pid = current->tgid; unsigned long flags; if (cq->uobject) { @@ -322,12 +328,6 @@ int ehca_destroy_cq(struct ib_cq *cq) "user space cq_num=%x", my_cq->cq_number); return -EINVAL; } - if (my_cq->ownpid != cur_pid) { - ehca_err(device, "Invalid caller pid=%x ownpid=%x " - "cq_num=%x", - cur_pid, my_cq->ownpid, my_cq->cq_number); - return -EINVAL; - } } /* @@ -345,7 +345,7 @@ int ehca_destroy_cq(struct ib_cq *cq) h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ - ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err " + ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " "state. Try to delete it forcibly.", my_cq, cq_num, my_cq->ipz_cq_handle.handle); ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); @@ -355,27 +355,19 @@ int ehca_destroy_cq(struct ib_cq *cq) cq_num); } if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx " + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); return ehca2ib_return_code(h_ret); } ipz_queue_dtor(NULL, &my_cq->ipz_queue); kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return 0; } int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) { - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - u32 cur_pid = current->tgid; - - if (cq->uobject && my_cq->ownpid != cur_pid) { - ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_cq->ownpid); - return -EINVAL; - } - /* TODO: proper resize needs to be done */ ehca_err(cq->device, "not implemented yet"); diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 1d41faa7a33..90da6747d39 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -54,7 +54,8 @@ int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, const enum ehca_eq_type type, const u32 length) { - u64 ret; + int ret; + u64 h_ret; u32 nr_pages; u32 i; void *vpage; @@ -73,15 +74,15 @@ int ehca_create_eq(struct ehca_shca *shca, return -EINVAL; } - ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); - if (ret != H_SUCCESS) { + if (h_ret != H_SUCCESS) { ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); return -EINVAL; } @@ -97,24 +98,22 @@ int ehca_create_eq(struct ehca_shca *shca, u64 rpage; vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) { - ret = H_RESOURCE; + if (!vpage) goto create_eq_exit2; - } - rpage = virt_to_abs(vpage); - ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); + rpage = __pa(vpage); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); if (i == (nr_pages - 1)) { /* last page */ vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (ret != H_SUCCESS || vpage) + if (h_ret != H_SUCCESS || vpage) goto create_eq_exit2; } else { - if (ret != H_PAGE_REGISTERED || !vpage) + if (h_ret != H_PAGE_REGISTERED) goto create_eq_exit2; } } @@ -123,21 +122,21 @@ int ehca_create_eq(struct ehca_shca *shca, /* register interrupt handlers and initialize work queues */ if (type == EHCA_EQ) { - ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq, - IRQF_DISABLED, "ehca_eq", + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, + 0, "ehca_eq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); } else if (type == EHCA_NEQ) { - ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq, - IRQF_DISABLED, "ehca_neq", + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, + 0, "ehca_neq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); } eq->is_initialized = 1; @@ -170,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) unsigned long flags; u64 h_ret; - spin_lock_irqsave(&eq->spinlock, flags); - ibmebus_free_irq(NULL, eq->ist, (void *)shca); + ibmebus_free_irq(eq->ist, (void *)shca); - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); - spin_unlock_irqrestore(&eq->spinlock, flags); + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't free EQ resources."); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index cf22472d941..9ed4d258830 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -39,10 +39,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/gfp.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" #include "hcp_if.h" +static unsigned int limit_uint(unsigned int value) +{ + return min_t(unsigned int, value, INT_MAX); +} + int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { int i, ret = 0; @@ -77,42 +84,47 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } memset(props, 0, sizeof(struct ib_device_attr)); + props->page_size_cap = shca->hca_cap_mr_pgsize; props->fw_ver = rblock->hw_ver; props->max_mr_size = rblock->max_mr_size; props->vendor_id = rblock->vendor_id >> 8; props->vendor_part_id = rblock->vendor_part_id >> 16; props->hw_ver = rblock->hw_ver; - props->max_qp = min_t(int, rblock->max_qp, INT_MAX); - props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX); - props->max_sge = min_t(int, rblock->max_sge, INT_MAX); - props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX); - props->max_cq = min_t(int, rblock->max_cq, INT_MAX); - props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX); - props->max_mr = min_t(int, rblock->max_mr, INT_MAX); - props->max_mw = min_t(int, rblock->max_mw, INT_MAX); - props->max_pd = min_t(int, rblock->max_pd, INT_MAX); - props->max_ah = min_t(int, rblock->max_ah, INT_MAX); - props->max_fmr = min_t(int, rblock->max_mr, INT_MAX); + props->max_qp = limit_uint(rblock->max_qp); + props->max_qp_wr = limit_uint(rblock->max_wqes_wq); + props->max_sge = limit_uint(rblock->max_sge); + props->max_sge_rd = limit_uint(rblock->max_sge_rd); + props->max_cq = limit_uint(rblock->max_cq); + props->max_cqe = limit_uint(rblock->max_cqe); + props->max_mr = limit_uint(rblock->max_mr); + props->max_mw = limit_uint(rblock->max_mw); + props->max_pd = limit_uint(rblock->max_pd); + props->max_ah = limit_uint(rblock->max_ah); + props->max_ee = limit_uint(rblock->max_rd_ee_context); + props->max_rdd = limit_uint(rblock->max_rd_domain); + props->max_fmr = limit_uint(rblock->max_mr); + props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); + props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); + props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); + props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); + props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - props->max_srq = props->max_qp; - props->max_srq_wr = props->max_qp_wr; + props->max_srq = limit_uint(props->max_qp); + props->max_srq_wr = limit_uint(props->max_qp_wr); props->max_srq_sge = 3; } - props->max_pkeys = 16; - props->local_ca_ack_delay - = rblock->local_ca_ack_delay; - props->max_raw_ipv6_qp - = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX); - props->max_raw_ethy_qp - = min_t(int, rblock->max_raw_ethy_qp, INT_MAX); - props->max_mcast_grp - = min_t(int, rblock->max_mcast_grp, INT_MAX); - props->max_mcast_qp_attach - = min_t(int, rblock->max_mcast_qp_attach, INT_MAX); + props->max_pkeys = 16; + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; + props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); + props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); + props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); + props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); props->max_total_mcast_qp_attach - = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + = limit_uint(rblock->max_total_mcast_qp_attach); /* translate device capabilities */ props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | @@ -127,6 +139,46 @@ query_device1: return ret; } +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) +{ + switch (fw_mtu) { + case 0x1: + return IB_MTU_256; + case 0x2: + return IB_MTU_512; + case 0x3: + return IB_MTU_1024; + case 0x4: + return IB_MTU_2048; + case 0x5: + return IB_MTU_4096; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + fw_mtu); + return 0; + } +} + +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +{ + switch (vl_cap) { + case 0x1: + return 1; + case 0x2: + return 2; + case 0x3: + return 4; + case 0x4: + return 8; + case 0x5: + return 15; + default: + ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", + vl_cap); + return 0; + } +} + int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -150,33 +202,14 @@ int ehca_query_port(struct ib_device *ibdev, } memset(props, 0, sizeof(struct ib_port_attr)); - props->state = rblock->state; - - switch (rblock->max_mtu) { - case 0x1: - props->active_mtu = props->max_mtu = IB_MTU_256; - break; - case 0x2: - props->active_mtu = props->max_mtu = IB_MTU_512; - break; - case 0x3: - props->active_mtu = props->max_mtu = IB_MTU_1024; - break; - case 0x4: - props->active_mtu = props->max_mtu = IB_MTU_2048; - break; - case 0x5: - props->active_mtu = props->max_mtu = IB_MTU_4096; - break; - default: - ehca_err(&shca->ib_device, "Unknown MTU size: %x.", - rblock->max_mtu); - break; - } + props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); props->port_cap_flags = rblock->capability_mask; props->gid_tbl_len = rblock->gid_tbl_len; - props->max_msg_sz = rblock->max_msg_sz; + if (rblock->max_msg_sz) + props->max_msg_sz = rblock->max_msg_sz; + else + props->max_msg_sz = 0x1 << 31; props->bad_pkey_cntr = rblock->bad_pkey_cntr; props->qkey_viol_cntr = rblock->qkey_viol_cntr; props->pkey_tbl_len = rblock->pkey_tbl_len; @@ -186,12 +219,22 @@ int ehca_query_port(struct ib_device *ibdev, props->sm_sl = rblock->sm_sl; props->subnet_timeout = rblock->subnet_timeout; props->init_type_reply = rblock->init_type_reply; - - props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; - - /* at the moment (logical) link state is always LINK_UP */ - props->phys_state = 0x5; + props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); + + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = IB_SPEED_SDR; + } query_port1: ehca_free_fw_ctrlblock(rblock); @@ -278,7 +321,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, ib_device); struct hipz_query_port *rblock; - if (index > 255) { + if (index < 0 || index > 255) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; } @@ -305,7 +348,7 @@ query_gid1: return ret; } -const u32 allowed_port_caps = ( +static const u32 allowed_port_caps = ( IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP); @@ -352,7 +395,7 @@ int ehca_modify_port(struct ib_device *ibdev, hret = hipz_h_modify_port(shca->ipz_hca_handle, port, cap, props->init_type, port_modify_mask); if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed hret=%lx", + ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", hret); ret = -EINVAL; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index a925ea52443..8615d7cf7e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -41,6 +41,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> +#include <linux/smpboot.h> + #include "ehca_classes.h" #include "ehca_irq.h" #include "ehca_iverbs.h" @@ -62,6 +65,7 @@ #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) @@ -69,9 +73,6 @@ static void queue_comp_task(struct ehca_cq *__cq); static struct ehca_comp_pool *pool; -#ifdef CONFIG_HOTPLUG_CPU -static struct notifier_block comp_pool_callback_nb; -#endif static inline void comp_event_callback(struct ehca_cq *cq) { @@ -101,7 +102,7 @@ static void print_error_data(struct ehca_shca *shca, void *data, return; ehca_err(&shca->ib_device, - "QP 0x%x (resource=%lx) has errors.", + "QP 0x%x (resource=%llx) has errors.", qp->ib_qp.qp_num, resource); break; } @@ -110,21 +111,21 @@ static void print_error_data(struct ehca_shca *shca, void *data, struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%lx) has errors.", + "CQ 0x%x (resource=%llx) has errors.", cq->cq_number, resource); break; } default: ehca_err(&shca->ib_device, - "Unknown error type: %lx on %s.", + "Unknown error type: %llx on %s.", type, shca->ib_device.name); break; } - ehca_err(&shca->ib_device, "Error data is available: %lx.", resource); + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); ehca_err(&shca->ib_device, "EHCA ----- error data begin " "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%lx", resource); + ehca_dmp(rblock, length, "resource=%llx", resource); ehca_err(&shca->ib_device, "EHCA ----- error data end " "----------------------------------------------------"); @@ -154,7 +155,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, if (ret == H_R_STATE) ehca_err(&shca->ib_device, - "No error data is available: %lx.", resource); + "No error data is available: %llx.", resource); else if (ret == H_SUCCESS) { int length; @@ -166,7 +167,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, print_error_data(shca, data, rblock, length); } else ehca_err(&shca->ib_device, - "Error data could not be fetched: %lx", resource); + "Error data could not be fetched: %llx", resource); ehca_free_fw_ctrlblock(rblock); @@ -180,6 +181,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, { struct ib_event event; + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + event.device = &shca->ib_device; event.event = event_type; @@ -206,6 +211,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); read_unlock(&ehca_qp_idr_lock); if (!qp) @@ -225,6 +232,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, if (fatal && qp->ext_type == EQPT_SRQBASE) dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); return; } @@ -294,8 +303,8 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) case 0x11: /* unaffiliated access error */ ehca_err(&shca->ib_device, "Unaffiliated access error."); break; - case 0x12: /* path migrating error */ - ehca_err(&shca->ib_device, "Path migration error."); + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); break; case 0x13: /* interface trace stopped */ ehca_err(&shca->ib_device, "Interface trace stopped."); @@ -353,21 +362,50 @@ static void notify_port_conf_change(struct ehca_shca *shca, int port_num) *old_attr = new_attr; } +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + static void parse_ec(struct ehca_shca *shca, u64 eqe) { u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); - ehca_query_sma_attr(shca, port, - &shca->sport[port - 1].saved_attr); + ehca_query_sma_attr(shca, port, &sport->saved_attr); } else { - shca->sport[port - 1].port_state = IB_PORT_DOWN; + sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, "is inactive"); } @@ -381,13 +419,15 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) ehca_warn(&shca->ib_device, "disruptive port " "%d configuration change", port); - shca->sport[port - 1].port_state = IB_PORT_DOWN; + sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, "is inactive"); - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); } else notify_port_conf_change(shca, port); break; @@ -397,6 +437,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) case 0x33: /* trace stopped */ ehca_err(&shca->ib_device, "Traced stopped."); break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; default: ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", ec, shca->ib_device.name); @@ -432,13 +482,13 @@ void ehca_tasklet_neq(unsigned long data) struct ehca_eqe *eqe; u64 ret; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); while (eqe) { if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) parse_ec(shca, eqe->entry); - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); } ret = hipz_h_reset_event(shca->ipz_hca_handle, @@ -467,7 +517,7 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) struct ehca_cq *cq; eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { ehca_dbg(&shca->ib_device, "Got completion event"); token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); @@ -500,12 +550,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) { struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value; - unsigned long flags; + u64 eqe_value, ret; int eqe_cnt, i; int eq_empty = 0; - spin_lock_irqsave(&eq->irq_spinlock, flags); + spin_lock(&eq->irq_spinlock); if (is_irq) { const int max_query_cnt = 100; int query_cnt = 0; @@ -525,8 +574,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_cnt = 0; do { u32 token; - eqe_cache[eqe_cnt].eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, eq); + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); if (!eqe_cache[eqe_cnt].eqe) break; eqe_value = eqe_cache[eqe_cnt].eqe->entry; @@ -552,8 +600,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ehca_dbg(&shca->ib_device, "No eqe found for irq event"); goto unlock_irq_spinlock; - } else if (!is_irq) + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); /* enable irq for new packets */ @@ -585,14 +638,14 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) goto unlock_irq_spinlock; do { struct ehca_eqe *eqe; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe = ehca_poll_eq(shca, &shca->eq); if (!eqe) break; process_eqe(shca, eqe); } while (1); unlock_irq_spinlock: - spin_unlock_irqrestore(&eq->irq_spinlock, flags); + spin_unlock(&eq->irq_spinlock); } void ehca_tasklet_eq(unsigned long data) @@ -600,27 +653,30 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level) - ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (cpu == NR_CPUS) - cpu = first_cpu(cpu_online_map); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -631,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -643,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -650,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -658,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); + spin_unlock_irq(&cct->task_lock); comp_event_callback(cq); if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -688,157 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) +static void comp_task_park(unsigned int cpu) { - struct ehca_cpu_comp_task *cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - set_current_state(TASK_INTERRUPTIBLE); + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); } - __set_current_state(TASK_RUNNING); - - return 0; -} - -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); - - return cct->task; + spin_unlock_irq(&target->task_lock); } -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) +static void comp_task_stop(unsigned int cpu, bool online) { - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - task = cct->task; - cct->task = NULL; + spin_lock_irq(&cct->task_lock); cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -#ifdef CONFIG_HOTPLUG_CPU -static void take_over_work(struct ehca_comp_pool *pool, - int cpu) +static int comp_task_should_run(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - list_splice_init(&cct->cq_list, &list); - - while (!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - - list_del(&cq->entry); - __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, - smp_processor_id())); - } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + return cct->cq_jobs; } -static int comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if (!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return NOTIFY_BAD; - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, any_online_cpu(cpu_online_map)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); } - - return NOTIFY_OK; + spin_unlock_irq(&cct->task_lock); } -#endif + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, +}; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -848,48 +822,49 @@ int ehca_create_comp_pool(void) return -ENOMEM; spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = any_online_cpu(cpu_online_map); + pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; - } + if (!pool->cpu_comp_tasks) + goto out_pool; - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } -#ifdef CONFIG_HOTPLUG_CPU - comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority = 0; - register_cpu_notifier(&comp_pool_callback_nb); -#endif + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - printk(KERN_INFO "eHCA scaling code enabled\n"); + pr_info("eHCA scaling code enabled\n"); + return ret; - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; -#ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&comp_pool_callback_nb); -#endif + smpboot_unregister_percpu_thread(&comp_pool_threads); - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online(i)) - destroy_comp_task(pool, i); - } + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 3346cb06cea..5370199f08c 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { - wait_queue_head_t wait_queue; struct list_head cq_list; - struct task_struct *task; spinlock_t task_lock; int cq_jobs; + int active; }; struct ehca_comp_pool { - struct ehca_cpu_comp_task *cpu_comp_tasks; + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; int last_cpu; spinlock_t last_cpu_lock; }; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index dce503bb7d6..22f79afa7fc 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int ehca_dereg_mr(struct ib_mr *mr); -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); @@ -187,8 +187,18 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context); int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + void ehca_poll_eqs(unsigned long data); +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); @@ -197,4 +207,6 @@ void ehca_free_fw_ctrlblock(void *ptr); #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif +void ehca_recover_sqp(struct ib_qp *sqp); + #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 99036b65bb8..cd8d290a09f 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -43,63 +43,79 @@ #ifdef CONFIG_PPC_64K_PAGES #include <linux/slab.h> #endif + +#include <linux/notifier.h> +#include <linux/memory.h> #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" #include "ehca_tools.h" #include "hcp_if.h" +#define HCAD_VERSION "0029" + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0023"); +MODULE_VERSION(HCAD_VERSION); + +static bool ehca_open_aqp1 = 0; +static int ehca_hw_level = 0; +static bool ehca_poll_all_eqs = 1; -int ehca_open_aqp1 = 0; int ehca_debug_level = 0; -int ehca_hw_level = 0; -int ehca_nr_ports = 2; -int ehca_use_hp_mr = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; int ehca_port_act_time = 30; -int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; -int ehca_scaling_code = 0; -int ehca_mr_largepage = 0; - -module_param_named(open_aqp1, ehca_open_aqp1, int, 0); -module_param_named(debug_level, ehca_debug_level, int, 0); -module_param_named(hw_level, ehca_hw_level, int, 0); -module_param_named(nr_ports, ehca_nr_ports, int, 0); -module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); -module_param_named(port_act_time, ehca_port_act_time, int, 0); -module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); -module_param_named(static_rate, ehca_static_rate, int, 0); -module_param_named(scaling_code, ehca_scaling_code, int, 0); -module_param_named(mr_largepage, ehca_mr_largepage, int, 0); +bool ehca_scaling_code = 0; +int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); +module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); +module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); +module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); +module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); +module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); +module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); +module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); MODULE_PARM_DESC(open_aqp1, - "AQP1 on startup (0: no (default), 1: yes)"); + "Open AQP1 on startup (default: no)"); MODULE_PARM_DESC(debug_level, - "debug level" - " (0: no debug traces (default), 1: with debug traces)"); + "Amount of debug output (0: none (default), 1: traces, " + "2: some dumps, 3: lots)"); MODULE_PARM_DESC(hw_level, - "hardware level" - " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); + "Hardware level (0: autosensing (default), " + "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); MODULE_PARM_DESC(nr_ports, - "number of connected ports (default: 2)"); + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); MODULE_PARM_DESC(use_hp_mr, - "high performance MRs (0: no (default), 1: yes)"); + "Use high performance MRs (default: no)"); MODULE_PARM_DESC(port_act_time, - "time to wait for port activation (default: 30 sec)"); + "Time to wait for port activation (default: 30 sec)"); MODULE_PARM_DESC(poll_all_eqs, - "polls all event queues periodically" - " (0: no, 1: yes (default))"); + "Poll all event queues periodically (default: yes)"); MODULE_PARM_DESC(static_rate, - "set permanent static rate (default: disabled)"); + "Set permanent static rate (default: no static rate)"); MODULE_PARM_DESC(scaling_code, - "set scaling code (0: disabled/default, 1: enabled)"); -MODULE_PARM_DESC(mr_largepage, - "use large page for MR (0: use PAGE_SIZE (default), " - "1: use large page depending on MR size"); + "Enable scaling code (default: no)"); +MODULE_PARM_DESC(lock_hcalls, + "Serialize all hCalls made by the driver " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -107,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); static LIST_HEAD(shca_list); /* list of all registered ehcas */ -static DEFINE_SPINLOCK(shca_list_lock); +DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; @@ -195,6 +211,7 @@ static int ehca_create_slab_caches(void) if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; goto create_slab_caches6; } #endif @@ -256,14 +273,30 @@ static struct cap_descr { { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, }; -int ehca_sense_attributes(struct ehca_shca *shca) +static int ehca_sense_attributes(struct ehca_shca *shca) { int i, ret = 0; u64 h_ret; struct hipz_query_hca *rblock; struct hipz_query_port *port; + const char *loc_code; + + static const u32 pgsize_map[] = { + HCA_CAP_MR_PGSIZE_4K, 0x1000, + HCA_CAP_MR_PGSIZE_64K, 0x10000, + HCA_CAP_MR_PGSIZE_1M, 0x100000, + HCA_CAP_MR_PGSIZE_16M, 0x1000000, + }; + + ehca_gen_dbg("Probing adapter %s...", + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); + if (loc_code) + ehca_gen_dbg(" ... location lode=%s", loc_code); rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -273,7 +306,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query device properties. h_ret=%lx", + ehca_gen_err("Cannot query device properties. h_ret=%lli", h_ret); ret = -EPERM; goto sense_attributes1; @@ -318,21 +351,50 @@ int ehca_sense_attributes(struct ehca_shca *shca) shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - shca->sport[0].rate = IB_RATE_30_GBPS; - shca->sport[1].rate = IB_RATE_30_GBPS; - shca->hca_cap = rblock->hca_cap_indicators; ehca_gen_dbg(" ... HCA capabilities:"); for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); + + /* translate supported MR page sizes; always support 4K */ + shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) + if (rblock->memory_page_size_supported & pgsize_map[i]) + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; + } + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); + } + + /* query max MTU from first port -- it's the same for all ports */ port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query port properties. h_ret=%lx", + ehca_gen_err("Cannot query port properties. h_ret=%lli", h_ret); ret = -EPERM; goto sense_attributes1; @@ -369,7 +431,7 @@ init_node_guid1: return ret; } -int ehca_init_device(struct ehca_shca *shca) +static int ehca_init_device(struct ehca_shca *shca) { int ret; @@ -380,7 +442,7 @@ int ehca_init_device(struct ehca_shca *shca) strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 7; + shca->ib_device.uverbs_abi_ver = 8; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -402,7 +464,7 @@ int ehca_init_device(struct ehca_shca *shca) shca->ib_device.node_type = RDMA_NODE_IB_CA; shca->ib_device.phys_port_cnt = shca->num_ports; shca->ib_device.num_comp_vectors = 1; - shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; + shca->ib_device.dma_device = &shca->ofdev->dev; shca->ib_device.query_device = ehca_query_device; shca->ib_device.query_port = ehca_query_port; shca->ib_device.query_gid = ehca_query_gid; @@ -445,8 +507,9 @@ int ehca_init_device(struct ehca_shca *shca) shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; shca->ib_device.attach_mcast = ehca_attach_mcast; shca->ib_device.detach_mcast = ehca_detach_mcast; - /* shca->ib_device.process_mad = ehca_process_mad; */ + shca->ib_device.process_mad = ehca_process_mad; shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { shca->ib_device.uverbs_cmd_mask |= @@ -485,7 +548,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) } sport->ibcq_aqp1 = ibcq; - if (sport->ibqp_aqp1) { + if (sport->ibqp_sqp[IB_QPT_GSI]) { ehca_err(&shca->ib_device, "AQP1 QP is already created."); ret = -EPERM; goto create_aqp1; @@ -511,7 +574,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) ret = PTR_ERR(ibqp); goto create_aqp1; } - sport->ibqp_aqp1 = ibqp; + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; return 0; @@ -524,23 +587,22 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport) { int ret; - ret = ib_destroy_qp(sport->ibqp_aqp1); + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); if (ret) { - ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret); + ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); return ret; } ret = ib_destroy_cq(sport->ibcq_aqp1); if (ret) - ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret); + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); return ret; } static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", - ehca_debug_level); + return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); } static ssize_t ehca_store_debug_level(struct device_driver *ddp, @@ -552,8 +614,8 @@ static ssize_t ehca_store_debug_level(struct device_driver *ddp, return 1; } -DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, - ehca_show_debug_level, ehca_store_debug_level); +static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); static struct attribute *ehca_drv_attrs[] = { &driver_attr_debug_level.attr, @@ -564,6 +626,11 @@ static struct attribute_group ehca_drv_attr_grp = { .attrs = ehca_drv_attrs }; +static const struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + #define EHCA_RESOURCE_ATTR(name) \ static ssize_t ehca_show_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -573,16 +640,16 @@ static ssize_t ehca_show_##name(struct device *dev, \ struct hipz_query_hca *rblock; \ int data; \ \ - shca = dev->driver_data; \ + shca = dev_get_drvdata(dev); \ \ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ if (!rblock) { \ - dev_err(dev, "Can't allocate rblock memory."); \ + dev_err(dev, "Can't allocate rblock memory.\n"); \ return 0; \ } \ \ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ - dev_err(dev, "Can't query device properties"); \ + dev_err(dev, "Can't query device properties\n"); \ ehca_free_fw_ctrlblock(rblock); \ return 0; \ } \ @@ -617,21 +684,13 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, struct device_attribute *attr, char *buf) { - struct ehca_shca *shca = dev->driver_data; + struct ehca_shca *shca = dev_get_drvdata(dev); - return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle); + return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); } static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); -static ssize_t ehca_show_mr_largepage(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", ehca_mr_largepage); -} -static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL); - static struct attribute *ehca_dev_attrs[] = { &dev_attr_adapter_handle.attr, &dev_attr_num_ports.attr, @@ -648,7 +707,6 @@ static struct attribute *ehca_dev_attrs[] = { &dev_attr_cur_mw.attr, &dev_attr_max_pd.attr, &dev_attr_max_ah.attr, - &dev_attr_mr_largepage.attr, NULL }; @@ -656,24 +714,24 @@ static struct attribute_group ehca_dev_attr_grp = { .attrs = ehca_dev_attrs }; -static int __devinit ehca_probe(struct ibmebus_dev *dev, - const struct of_device_id *id) +static int ehca_probe(struct platform_device *dev) { struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; - int ret; + int ret, i, eq_size; + unsigned long flags; - handle = of_get_property(dev->ofdev.node, "ibm,hca-handle", NULL); + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); if (!handle) { ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->ofdev.node->full_name); + dev->dev.of_node->full_name); return -ENODEV; } if (!(*handle)) { ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->ofdev.node->full_name); + dev->dev.of_node->full_name); return -ENODEV; } @@ -682,11 +740,19 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, ehca_gen_err("Cannot allocate shca memory."); return -ENOMEM; } + mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); - shca->ibmebus_dev = dev; + shca->ofdev = dev; shca->ipz_hca_handle.handle = *handle; - dev->ofdev.dev.driver_data = shca; + dev_set_drvdata(&dev->dev, shca); ret = ehca_sense_attributes(shca); if (ret < 0) { @@ -700,8 +766,9 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, goto probe1; } + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); if (ret) { ehca_err(&shca->ib_device, "Cannot create EQ."); goto probe1; @@ -728,15 +795,15 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); if (ret) { - ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x", + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", ret); goto probe5; } - ret = ib_register_device(&shca->ib_device); + ret = ib_register_device(&shca->ib_device, NULL); if (ret) { ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%x", ret); + "ib_register_device() failed ret=%i", ret); goto probe6; } @@ -762,14 +829,14 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, } } - ret = sysfs_create_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp); + ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); if (ret) /* only complain; we can live without attributes */ ehca_err(&shca->ib_device, "Cannot create device attributes ret=%d", ret); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_add(&shca->shca_list, &shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return 0; @@ -777,7 +844,7 @@ probe8: ret = ehca_destroy_aqp1(&shca->sport[0]); if (ret) ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port 1. ret=%x", ret); + "Cannot destroy AQP1 for port 1. ret=%i", ret); probe7: ib_unregister_device(&shca->ib_device); @@ -812,12 +879,13 @@ probe1: return -EINVAL; } -static int __devexit ehca_remove(struct ibmebus_dev *dev) +static int ehca_remove(struct platform_device *dev) { - struct ehca_shca *shca = dev->ofdev.dev.driver_data; + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); + unsigned long flags; int ret; - sysfs_remove_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp); + sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); if (ehca_open_aqp1 == 1) { int i; @@ -826,7 +894,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev) if (ret) ehca_err(&shca->ib_device, "Cannot destroy AQP1 for port %x " - "ret=%x", ret, i); + "ret=%i", ret, i); } } @@ -835,26 +903,26 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev) ret = ehca_dereg_internal_maxmr(shca); if (ret) ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%x", ret); + "Cannot destroy internal MR. ret=%i", ret); ret = ehca_dealloc_pd(&shca->pd->ib_pd); if (ret) ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%x", ret); + "Cannot destroy internal PD. ret=%i", ret); ret = ehca_destroy_eq(shca, &shca->eq); if (ret) - ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret); + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); ret = ehca_destroy_eq(shca, &shca->neq); if (ret) - ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret); + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); ib_dealloc_device(&shca->ib_device); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_del(&shca->shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return ret; } @@ -867,12 +935,17 @@ static struct of_device_id ehca_device_table[] = }, {}, }; - -static struct ibmebus_driver ehca_driver = { - .name = "ehca", - .id_table = ehca_device_table, - .probe = ehca_probe, - .remove = ehca_remove, +MODULE_DEVICE_TABLE(of, ehca_device_table); + +static struct platform_driver ehca_driver = { + .probe = ehca_probe, + .remove = ehca_remove, + .driver = { + .name = "ehca", + .owner = THIS_MODULE, + .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, + }, }; void ehca_poll_eqs(unsigned long data) @@ -886,7 +959,7 @@ void ehca_poll_eqs(unsigned long data) struct ehca_eq *eq = &shca->eq; int max = 3; volatile u64 q_ofs, q_ofs2; - u64 flags; + unsigned long flags; spin_lock_irqsave(&eq->spinlock, flags); q_ofs = eq->ipz_queue.current_q_offset; spin_unlock_irqrestore(&eq->spinlock, flags); @@ -900,16 +973,51 @@ void ehca_poll_eqs(unsigned long data) ehca_process_eq(shca, 0); } } - mod_timer(&poll_eqs_timer, jiffies + HZ); + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); spin_unlock(&shca_list_lock); } -int __init ehca_module_init(void) +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + unsigned long flags; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock_irqsave(&shca_list_lock, flags); + if (list_empty(&shca_list)) { + spin_unlock_irqrestore(&shca_list_lock, flags); + return NOTIFY_OK; + } else { + spin_unlock_irqrestore(&shca_list_lock, flags); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "in conjunction with eHCA"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + +static int __init ehca_module_init(void) { int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0023)\n"); + "(Version " HCAD_VERSION ")\n"); ret = ehca_create_comp_pool(); if (ret) { @@ -924,16 +1032,24 @@ int __init ehca_module_init(void) goto module_init1; } + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + ret = ibmebus_register_driver(&ehca_driver); if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; - goto module_init2; + goto module_init3; } - ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_gen_err("Cannot create driver attributes ret=%d", ret); + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init4; + } if (ehca_poll_all_eqs != 1) { ehca_gen_err("WARNING!!!"); @@ -947,6 +1063,12 @@ int __init ehca_module_init(void) return 0; +module_init4: + ibmebus_unregister_driver(&ehca_driver); + +module_init3: + ehca_destroy_busmap(); + module_init2: ehca_destroy_slab_caches(); @@ -955,14 +1077,17 @@ module_init1: return ret; }; -void __exit ehca_module_exit(void) +static void __exit ehca_module_exit(void) { if (ehca_poll_all_eqs == 1) del_timer_sync(&poll_eqs_timer); - sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); ibmebus_unregister_driver(&ehca_driver); + unregister_memory_notifier(&ehca_mem_nb); + + ehca_destroy_busmap(); + ehca_destroy_slab_caches(); ehca_destroy_comp_pool(); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c index 32a870660bf..120aedf9f98 100644 --- a/drivers/infiniband/hw/ehca/ehca_mcast.c +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (h_ret != H_SUCCESS) ehca_err(ibqp->device, "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " - "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); return ehca2ib_return_code(h_ret); } @@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (h_ret != H_SUCCESS) ehca_err(ibqp->device, "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " - "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); return ehca2ib_return_code(h_ret); } diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index d97eda3e1da..3488e8c9fcb 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -40,8 +40,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <asm/current.h> - +#include <linux/slab.h> #include <rdma/ib_umem.h> #include "ehca_iverbs.h" @@ -51,9 +50,42 @@ #define NUM_CHUNKS(length, chunk_size) \ (((length) + (chunk_size - 1)) / (chunk_size)) + /* max number of rpages (per hcall register_rpages) */ #define MAX_RPAGES 512 +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; @@ -64,26 +96,23 @@ enum ehca_mr_pgsize { EHCA_MR_PGSIZE16M = 0x1000000L }; +#define EHCA_MR_PGSHIFT4K 12 +#define EHCA_MR_PGSHIFT64K 16 +#define EHCA_MR_PGSHIFT1M 20 +#define EHCA_MR_PGSHIFT16M 24 + +static u64 ehca_map_vaddr(void *caddr); + static u32 ehca_encode_hwpage_size(u32 pgsize) { - u32 idx = 0; - pgsize >>= 12; - /* - * map mr page size into hw code: - * 0, 1, 2, 3 for 4K, 64K, 1M, 64M - */ - while (!(pgsize & 1)) { - idx++; - pgsize >>= 4; - } - return idx; + int log = ilog2(pgsize); + WARN_ON(log < 12 || log > 24 || log & 3); + return (log - 12) / 4; } static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) { - if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) - return EHCA_MR_PGSIZE16M; - return EHCA_MR_PGSIZE4K; + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); } static struct ehca_mr *ehca_mr_new(void) @@ -141,7 +170,8 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); @@ -159,7 +189,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) get_dma_mr_exit0: if (IS_ERR(ib_mr)) - ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ", + ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", PTR_ERR(ib_mr), pd, mr_access_flags); return ib_mr; } /* end ehca_get_dma_mr() */ @@ -210,7 +240,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, } if ((size == 0) || (((u64)iova_start + size) < (u64)iova_start)) { - ehca_err(pd->device, "bad input values: size=%lx iova_start=%p", + ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", size, iova_start); ib_mr = ERR_PTR(-EINVAL); goto reg_phys_mr_exit0; @@ -253,11 +283,11 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.phys_buf_array = phys_buf_array; pginfo.next_hwpage = - ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); if (ret) { ib_mr = ERR_PTR(ret); goto reg_phys_mr_exit1; @@ -271,7 +301,7 @@ reg_phys_mr_exit1: ehca_mr_delete(e_mr); reg_phys_mr_exit0: if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p " + ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " "num_phys_buf=%x mr_access_flags=%x iova_start=%p", PTR_ERR(ib_mr), pd, phys_buf_array, num_phys_buf, mr_access_flags, iova_start); @@ -290,7 +320,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_mr_pginfo pginfo; - int ret; + int ret, page_shift; u32 num_kpages; u32 num_hwpages; u64 hwpage_size; @@ -315,8 +345,8 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } if (length == 0 || virt + length < virt) { - ehca_err(pd->device, "bad input values: length=%lx " - "virt_base=%lx", length, virt); + ehca_err(pd->device, "bad input values: length=%llx " + "virt_base=%llx", length, virt); ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit0; } @@ -329,7 +359,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags); + mr_access_flags, 0); if (IS_ERR(e_mr->umem)) { ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; @@ -345,20 +375,20 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, /* determine number of MR pages */ num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); /* select proper hw_pgsize */ - if (ehca_mr_largepage && - (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { - if (length <= EHCA_MR_PGSIZE4K - && PAGE_SIZE == EHCA_MR_PGSIZE4K) - hwpage_size = EHCA_MR_PGSIZE4K; - else if (length <= EHCA_MR_PGSIZE64K) - hwpage_size = EHCA_MR_PGSIZE64K; - else if (length <= EHCA_MR_PGSIZE1M) - hwpage_size = EHCA_MR_PGSIZE1M; - else - hwpage_size = EHCA_MR_PGSIZE16M; - } else - hwpage_size = EHCA_MR_PGSIZE4K; - ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); + page_shift = PAGE_SHIFT; + if (e_mr->umem->hugetlb) { + /* determine page_shift, clamp between 4K and 16M */ + page_shift = (fls64(length - 1) + 3) & ~3; + page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), + EHCA_MR_PGSHIFT16M); + } + hwpage_size = 1UL << page_shift; + + /* now that we have the desired page size, shift until it's + * supported, too. 4K is always supported, so this terminates. + */ + while (!(hwpage_size & shca->hca_cap_mr_pgsize)) + hwpage_size >>= 4; reg_user_mr_fallback: num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); @@ -370,16 +400,13 @@ reg_user_mr_fallback: pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; - pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, - (&e_mr->umem->chunk_list), - list); - + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { ehca_warn(pd->device, "failed to register mr " - "with hwpage_size=%lx", hwpage_size); + "with hwpage_size=%llx", hwpage_size); ehca_info(pd->device, "try to register mr with " "kpage_size=%lx", PAGE_SIZE); /* @@ -403,8 +430,7 @@ reg_user_mr_exit1: ehca_mr_delete(e_mr); reg_user_mr_exit0: if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x" - " udata=%p", + ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", PTR_ERR(ib_mr), pd, mr_access_flags, udata); return ib_mr; } /* end ehca_reg_user_mr() */ @@ -424,7 +450,6 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, struct ehca_shca *shca = container_of(mr->device, struct ehca_shca, ib_device); struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); u64 new_size; u64 *new_start; u32 new_acl; @@ -434,15 +459,6 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, u32 num_kpages = 0; u32 num_hwpages = 0; struct ehca_mr_pginfo pginfo; - u32 cur_pid = current->tgid; - - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - (my_pd->ownpid != cur_pid)) { - ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { /* TODO not supported, because PHYP rereg hCall needs pages */ @@ -526,7 +542,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, goto rereg_phys_mr_exit1; if ((new_size == 0) || (((u64)iova_start + new_size) < (u64)iova_start)) { - ehca_err(mr->device, "bad input values: new_size=%lx " + ehca_err(mr->device, "bad input values: new_size=%llx " "iova_start=%p", new_size, iova_start); ret = -EINVAL; goto rereg_phys_mr_exit1; @@ -543,7 +559,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.phys_buf_array = phys_buf_array; pginfo.next_hwpage = - ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; } if (mr_rereg_mask & IB_MR_REREG_ACCESS) new_acl = mr_access_flags; @@ -565,7 +581,7 @@ rereg_phys_mr_exit1: spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); rereg_phys_mr_exit0: if (ret) - ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p " + ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " "iova_start=%p", ret, mr, mr_rereg_mask, pd, phys_buf_array, @@ -582,19 +598,9 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) struct ehca_shca *shca = container_of(mr->device, struct ehca_shca, ib_device); struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); - u32 cur_pid = current->tgid; unsigned long sl_flags; struct ehca_mr_hipzout_parms hipzout; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - (my_pd->ownpid != cur_pid)) { - ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - ret = -EINVAL; - goto query_mr_exit0; - } - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " "e_mr->flags=%x", mr, e_mr, e_mr->flags); @@ -607,8 +613,8 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p " - "hca_hndl=%lx mr_hndl=%lx lkey=%x", + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " + "hca_hndl=%llx mr_hndl=%llx lkey=%x", h_ret, mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); ret = ehca2ib_return_code(h_ret); @@ -625,7 +631,7 @@ query_mr_exit1: spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); query_mr_exit0: if (ret) - ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p", + ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", ret, mr, mr_attr); return ret; } /* end ehca_query_mr() */ @@ -639,16 +645,6 @@ int ehca_dereg_mr(struct ib_mr *mr) struct ehca_shca *shca = container_of(mr->device, struct ehca_shca, ib_device); struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); - u32 cur_pid = current->tgid; - - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - (my_pd->ownpid != cur_pid)) { - ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - ret = -EINVAL; - goto dereg_mr_exit0; - } if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " @@ -667,8 +663,8 @@ int ehca_dereg_mr(struct ib_mr *mr) /* TODO: BUSY: MR still has bound window(s) */ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p " - "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " + "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", h_ret, shca, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); ret = ehca2ib_return_code(h_ret); @@ -683,13 +679,13 @@ int ehca_dereg_mr(struct ib_mr *mr) dereg_mr_exit0: if (ret) - ehca_err(mr->device, "ret=%x mr=%p", ret, mr); + ehca_err(mr->device, "ret=%i mr=%p", ret, mr); return ret; } /* end ehca_dereg_mr() */ /*----------------------------------------------------------------------*/ -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *ib_mw; u64 h_ret; @@ -699,6 +695,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) container_of(pd->device, struct ehca_shca, ib_device); struct ehca_mw_hipzout_parms hipzout; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + e_mw = ehca_mw_new(); if (!e_mw) { ib_mw = ERR_PTR(-ENOMEM); @@ -708,8 +707,8 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " - "shca=%p hca_hndl=%lx mw=%p", + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " + "shca=%p hca_hndl=%llx mw=%p", h_ret, shca, shca->ipz_hca_handle.handle, e_mw); ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); goto alloc_mw_exit1; @@ -723,7 +722,7 @@ alloc_mw_exit1: ehca_mw_delete(e_mw); alloc_mw_exit0: if (IS_ERR(ib_mw)) - ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd); + ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); return ib_mw; } /* end ehca_alloc_mw() */ @@ -750,8 +749,8 @@ int ehca_dealloc_mw(struct ib_mw *mw) h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); if (h_ret != H_SUCCESS) { - ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p " - "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " + "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, e_mw->ipz_mw_handle.handle); return ehca2ib_return_code(h_ret); @@ -805,8 +804,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ib_fmr = ERR_PTR(-EINVAL); goto alloc_fmr_exit0; } - hw_pgsize = ehca_get_max_hwpage_size(shca); - if ((1 << fmr_attr->page_shift) != hw_pgsize) { + + hw_pgsize = 1 << fmr_attr->page_shift; + if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", fmr_attr->page_shift); ib_fmr = ERR_PTR(-EINVAL); @@ -822,6 +822,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, /* register MR on HCA */ memset(&pginfo, 0, sizeof(pginfo)); + pginfo.hwpage_size = hw_pgsize; /* * pginfo.num_hwpages==0, ie register_rpages() will not be called * but deferred to map_phys_fmr() @@ -829,7 +830,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey); + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); if (ret) { ib_fmr = ERR_PTR(ret); goto alloc_fmr_exit1; @@ -846,10 +847,6 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, alloc_fmr_exit1: ehca_mr_delete(e_fmr); alloc_fmr_exit0: - if (IS_ERR(ib_fmr)) - ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x " - "fmr_attr=%p", PTR_ERR(ib_fmr), pd, - mr_access_flags, fmr_attr); return ib_fmr; } /* end ehca_alloc_fmr() */ @@ -879,7 +876,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, goto map_phys_fmr_exit0; if (iova % e_fmr->fmr_page_size) { /* only whole-numbered pages */ - ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x", + ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", iova, e_fmr->fmr_page_size); ret = -EINVAL; goto map_phys_fmr_exit0; @@ -916,8 +913,8 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, map_phys_fmr_exit0: if (ret) - ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " - "iova=%lx", ret, fmr, page_list, list_len, iova); + ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " + "iova=%llx", ret, fmr, page_list, list_len, iova); return ret; } /* end ehca_map_phys_fmr() */ @@ -936,11 +933,6 @@ int ehca_unmap_fmr(struct list_head *fmr_list) /* check all FMR belong to same SHCA, and check internal flag */ list_for_each_entry(ib_fmr, fmr_list, list) { prev_shca = shca; - if (!ib_fmr) { - ehca_gen_err("bad fmr=%p in list", ib_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } shca = container_of(ib_fmr->device, struct ehca_shca, ib_device); e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); @@ -979,7 +971,7 @@ int ehca_unmap_fmr(struct list_head *fmr_list) unmap_fmr_exit0: if (ret) - ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", ret, fmr_list, num_fmr, unmap_fmr_cnt); return ret; } /* end ehca_unmap_fmr() */ @@ -1003,8 +995,8 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); if (h_ret != H_SUCCESS) { - ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p " - "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " + "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, fmr->lkey); ret = ehca2ib_return_code(h_ret); @@ -1016,12 +1008,16 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) free_fmr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr); + ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); return ret; } /* end ehca_dealloc_fmr() */ /*----------------------------------------------------------------------*/ +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr *e_mr, u64 *iova_start, @@ -1030,7 +1026,8 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_pd *e_pd, struct ehca_mr_pginfo *pginfo, u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) { int ret; u64 h_ret; @@ -1046,15 +1043,21 @@ int ehca_reg_mr(struct ehca_shca *shca, (u64)iova_start, size, hipz_acl, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " - "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " + "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); ret = ehca2ib_return_code(h_ret); goto ehca_reg_mr_exit0; } e_mr->ipz_mr_handle = hipzout.handle; - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + if (ret) goto ehca_reg_mr_exit1; @@ -1072,9 +1075,9 @@ int ehca_reg_mr(struct ehca_shca *shca, ehca_reg_mr_exit1: h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " - "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x", + ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", h_ret, shca, e_mr, iova_start, size, acl, e_pd, hipzout.lkey, pginfo, pginfo->num_kpages, pginfo->num_hwpages, ret); @@ -1083,9 +1086,9 @@ ehca_reg_mr_exit1: } ehca_reg_mr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " - "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%lx num_hwpages=%lx", + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; @@ -1127,13 +1130,13 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = ehca_set_pagebuf(pginfo, rnum, kpage); if (ret) { ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%x rnum=%x kpage=%p", + "bad rc, ret=%i rnum=%x kpage=%p", ret, rnum, kpage); goto ehca_reg_mr_rpages_exit1; } if (rnum > 1) { - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", kpage, i); @@ -1155,8 +1158,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, */ if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "last " - "hipz_reg_rpage_mr failed, h_ret=%lx " - "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx" + "hipz_reg_rpage_mr failed, h_ret=%lli " + "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" " lkey=%x", h_ret, e_mr, i, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, @@ -1167,8 +1170,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = 0; } else if (h_ret != H_PAGE_REGISTERED) { ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " - "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx " - "mr_hndl=%lx", h_ret, e_mr, i, + "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " + "mr_hndl=%llx", h_ret, e_mr, i, e_mr->ib.ib_mr.lkey, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle); @@ -1183,8 +1186,8 @@ ehca_reg_mr_rpages_exit1: ehca_free_fw_ctrlblock(kpage); ehca_reg_mr_rpages_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " - "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr_rpages() */ @@ -1223,12 +1226,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); if (ret) { ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx " + "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " "kpage=%p", e_mr, pginfo, pginfo->type, pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p", kpage); ret = -EFAULT; @@ -1244,13 +1247,13 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, * (MW bound or MR is shared) */ ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " - "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); + "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); *pginfo = pginfo_save; ret = -EAGAIN; } else if ((u64 *)hipzout.vaddr != iova_start) { ehca_err(&shca->ib_device, "PHYP changed iova_start in " - "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " - "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, + "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " + "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey, hipzout.lkey); ret = -EFAULT; @@ -1273,8 +1276,8 @@ ehca_rereg_mr_rereg1_exit1: ehca_free_fw_ctrlblock(kpage); ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) - ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " - "pginfo=%p num_kpages=%lx num_hwpages=%lx", + ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx", ret, *lkey, *rkey, pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; @@ -1302,7 +1305,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, (e_mr->num_hwpages > MAX_RPAGES) || (pginfo->num_hwpages > e_mr->num_hwpages)) { ehca_dbg(&shca->ib_device, "Rereg3 case, " - "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x", + "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", pginfo->num_hwpages, e_mr->num_hwpages); rereg_1_hcall = 0; rereg_3_hcall = 1; @@ -1334,7 +1337,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx " + "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " "mr->lkey=%x", h_ret, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, @@ -1355,7 +1358,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey); + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); if (ret) { u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; memcpy(&e_mr->flags, &(save_mr.flags), @@ -1366,9 +1369,9 @@ int ehca_rereg_mr(struct ehca_shca *shca, ehca_rereg_mr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " - "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); @@ -1410,8 +1413,8 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, * FMRs are not shared and no MW bound to FMRs */ ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " - "mr_hndl=%lx lkey=%x lkey_out=%x", + "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " + "mr_hndl=%llx lkey=%x lkey_out=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, e_fmr->ib.ib_fmr.lkey, hipzout.lkey); @@ -1422,7 +1425,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " "lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, @@ -1448,7 +1451,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, ret = ehca_reg_mr(shca, e_fmr, NULL, (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); + &tmp_rkey, EHCA_REG_MR); if (ret) { u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; memcpy(&e_fmr->flags, &(save_mr.flags), @@ -1457,7 +1460,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, ehca_unmap_one_fmr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " + ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " "fmr_max_pages=%x", ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); return ret; @@ -1486,9 +1489,9 @@ int ehca_reg_smr(struct ehca_shca *shca, (u64)iova_start, hipz_acl, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " - "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, @@ -1510,13 +1513,97 @@ int ehca_reg_smr(struct ehca_shca *shca, ehca_reg_smr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p " + ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " "e_newmr=%p iova_start=%p acl=%x e_pd=%p", ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); return ret; } /* end ehca_reg_smr() */ /*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = __pa(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = __pa(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} /* register internal max-MR to internal SHCA */ int ehca_reg_internal_maxmr( @@ -1534,6 +1621,11 @@ int ehca_reg_internal_maxmr( u32 num_hwpages; u64 hw_pgsize; + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr = ehca_mr_new(); if (!e_mr) { ehca_err(&shca->ib_device, "out of memory"); @@ -1543,8 +1635,8 @@ int ehca_reg_internal_maxmr( e_mr->flags |= EHCA_MR_FLAG_MAXMR; /* register internal max-MR on HCA */ - size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64 *)KERNELBASE; + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, @@ -1563,10 +1655,10 @@ int ehca_reg_internal_maxmr( ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x " + "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " "num_hwpages=%x", e_mr, iova_start, size_maxmr, num_kpages, num_hwpages); goto ehca_reg_internal_maxmr_exit1; @@ -1585,7 +1677,7 @@ ehca_reg_internal_maxmr_exit1: ehca_mr_delete(e_mr); ehca_reg_internal_maxmr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p", + ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", ret, shca, e_pd, e_maxmr); return ret; } /* end ehca_reg_internal_maxmr() */ @@ -1612,8 +1704,8 @@ int ehca_reg_maxmr(struct ehca_shca *shca, (u64)iova_start, hipz_acl, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " - "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", h_ret, e_origmr, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); @@ -1653,7 +1745,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca) ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); if (ret) { ehca_err(&shca->ib_device, "dereg internal max-MR failed, " - "ret=%x e_maxmr=%p shca=%p lkey=%x", + "ret=%i e_maxmr=%p shca=%p lkey=%x", ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); shca->maxmr = e_maxmr; goto ehca_dereg_internal_maxmr_exit0; @@ -1663,7 +1755,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca) ehca_dereg_internal_maxmr_exit0: if (ret) - ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p", + ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", ret, shca, shca->maxmr); return ret; } /* end ehca_dereg_internal_maxmr() */ @@ -1690,28 +1782,28 @@ int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, /* check first buffer */ if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { ehca_gen_err("iova_start/addr mismatch, iova_start=%p " - "pbuf->addr=%lx pbuf->size=%lx", + "pbuf->addr=%llx pbuf->size=%llx", iova_start, pbuf->addr, pbuf->size); return -EINVAL; } if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && (num_phys_buf > 1)) { - ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx " - "pbuf->size=%lx", pbuf->addr, pbuf->size); + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " + "pbuf->size=%llx", pbuf->addr, pbuf->size); return -EINVAL; } for (i = 0; i < num_phys_buf; i++) { if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { - ehca_gen_err("bad address, i=%x pbuf->addr=%lx " - "pbuf->size=%lx", + ehca_gen_err("bad address, i=%x pbuf->addr=%llx " + "pbuf->size=%llx", i, pbuf->addr, pbuf->size); return -EINVAL; } if (((i > 0) && /* not 1st */ (i < (num_phys_buf - 1)) && /* not last */ (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { - ehca_gen_err("bad size, i=%x pbuf->size=%lx", + ehca_gen_err("bad size, i=%x pbuf->size=%llx", i, pbuf->size); return -EINVAL; } @@ -1744,7 +1836,7 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, page = page_list; for (i = 0; i < list_len; i++) { if (*page % e_fmr->fmr_page_size) { - ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p " + ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " "fmr_page_size=%x", i, *page, page, e_fmr, e_fmr->fmr_page_size); return -EINVAL; @@ -1763,62 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr; - u32 i = 0; u32 j = 0; int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - pgaddr = page_to_pfn(chunk->page_list[i].page) - << PAGE_SHIFT ; - *kpage = phys_to_abs(pgaddr + - (pginfo->next_hwpage * - pginfo->hwpage_size)); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx " - "chunk->page_list[i]=%lx " - "i=%x next_hwpage=%lx", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[i]), - i, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - i++; - } - j++; - if (j >= number) break; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1826,19 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, * check given pages for contiguous layout * last page addr is returned in prev_pgaddr for further check */ -static int ehca_check_kpages_per_ate(struct scatterlist *page_list, - int start_idx, int end_idx, +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, u64 *prev_pgaddr) { - int t; - for (t = start_idx; t <= end_idx; t++) { - u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT; - ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, - *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; + if (ehca_debug_level >= 3) + ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, + *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { - ehca_gen_err("uncontiguous page found pgaddr=%lx " - "prev_pgaddr=%lx page_list_i=%x", - pgaddr, *prev_pgaddr, t); + ehca_gen_err("uncontiguous page found pgaddr=%llx " + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); return -EINVAL; } *prev_pgaddr = pgaddr; @@ -1852,116 +1921,85 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr, prev_pgaddr; - u32 i = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - if (nr_kpages == kpages_per_hwpage) { - pgaddr = ( page_to_pfn(chunk->page_list[i].page) - << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx i=%x", - pgaddr, i); + while (*sg != NULL) { + + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); ret = -EFAULT; return ret; } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%lx i=%x " - "mr_pgsize=%lx", - pgaddr, i, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = phys_to_abs( - pgaddr & - ~(pginfo->hwpage_size - 1)); - } - ehca_gen_dbg("kpage=%lx chunk_page=%lx " - "value=%016lx", *kpage, pgaddr, - *(u64 *)abs_to_virt( - phys_to_abs(pgaddr))); - prev_pgaddr = pgaddr; - i++; - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } - if (i + nr_kpages > chunk->nmap) { - ret = ehca_check_kpages_per_ate( - chunk->page_list, i, - chunk->nmap - 1, &prev_pgaddr); - if (ret) return ret; - pginfo->kpage_cnt += chunk->nmap - i; - pginfo->u.usr.next_nmap += chunk->nmap - i; - nr_kpages -= chunk->nmap - i; - break; + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; - ret = ehca_check_kpages_per_ate(chunk->page_list, i, - i + nr_kpages - 1, - &prev_pgaddr); - if (ret) return ret; - i += nr_kpages; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) break; - } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } -int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) +static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) { int ret = 0; struct ib_phys_buf *pbuf; @@ -1980,21 +2018,20 @@ int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, if ((pginfo->kpage_cnt >= pginfo->num_kpages) || (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { ehca_gen_err("kpage_cnt >= num_kpages, " - "kpage_cnt=%lx num_kpages=%lx " - "hwpage_cnt=%lx " - "num_hwpages=%lx i=%x", + "kpage_cnt=%llx num_kpages=%llx " + "hwpage_cnt=%llx " + "num_hwpages=%llx i=%x", pginfo->kpage_cnt, pginfo->num_kpages, pginfo->hwpage_cnt, pginfo->num_hwpages, i); return -EFAULT; } - *kpage = phys_to_abs( - (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size)); + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx " - "next_hwpage=%lx", pbuf->addr, + ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " + "next_hwpage=%llx", pbuf->addr, pbuf->size, pginfo->next_hwpage); return -EFAULT; } @@ -2019,9 +2056,8 @@ int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, return ret; } -int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) +static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) { int ret = 0; u64 *fmrlist; @@ -2030,11 +2066,11 @@ int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, /* loop over desired page_list entries */ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size); + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_hwpage=%lx", + ehca_gen_err("*fmrlist=%llx fmrlist=%p " + "next_listelem=%llx next_hwpage=%llx", *fmrlist, fmrlist, pginfo->u.fmr.next_listelem, pginfo->next_hwpage); @@ -2058,11 +2094,10 @@ int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, u64 prev = *kpage; /* check if adrs are contiguous */ for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = phys_to_abs(fmrlist[j] & - ~(pginfo->hwpage_size - 1)); + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); if (prev + pginfo->u.fmr.fmr_pgsize != p) { ehca_gen_err("uncontiguous fmr pages " - "found prev=%lx p=%lx " + "found prev=%llx p=%llx " "idx=%x", prev, p, i + j); return -EINVAL; } @@ -2114,8 +2149,8 @@ int ehca_mr_is_maxmr(u64 size, u64 *iova_start) { /* a MR is treated as max-MR only if it fits following: */ - if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void *)KERNELBASE)) { + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2221,3 +2256,338 @@ void ehca_cleanup_mrmw_cache(void) if (mw_cache) kmem_cache_destroy(mw_cache); } + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = __pa(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index bc8f4e31c12..50d8b51306d 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -42,6 +42,11 @@ #ifndef _EHCA_MRMW_H_ #define _EHCA_MRMW_H_ +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr *e_mr, u64 *iova_start, @@ -50,7 +55,8 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_pd *e_pd, struct ehca_mr_pginfo *pginfo, u32 *lkey, - u32 *rkey); + u32 *rkey, + enum ehca_reg_type reg_type); int ehca_reg_mr_rpages(struct ehca_shca *shca, struct ehca_mr *e_mr, @@ -118,4 +124,9 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, void ehca_mr_deletenew(struct ehca_mr *mr); +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 43bcf085fcf..351577a6670 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -38,7 +38,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <asm/current.h> +#include <linux/slab.h> #include "ehca_tools.h" #include "ehca_iverbs.h" @@ -58,7 +58,6 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, return ERR_PTR(-ENOMEM); } - pd->ownpid = current->tgid; for (i = 0; i < 2; i++) { INIT_LIST_HEAD(&pd->free[i]); INIT_LIST_HEAD(&pd->full[i]); @@ -85,18 +84,10 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, int ehca_dealloc_pd(struct ib_pd *pd) { - u32 cur_pid = current->tgid; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); int i, leftovers = 0; struct ipz_small_queue_page *page, *tmp; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } - for (i = 0; i < 2; i++) { list_splice(&my_pd->full[i], &my_pd->free[i]); list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 818803057eb..90c4efa6758 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -46,7 +46,7 @@ #include "ehca_tools.h" -/* virtual scatter gather entry to specify remote adresses with length */ +/* virtual scatter gather entry to specify remote addresses with length */ struct ehca_vsgentry { u64 vaddr; u32 lkey; @@ -148,7 +148,7 @@ struct ehca_wqe { u32 immediate_data; union { struct { - u64 remote_virtual_adress; + u64 remote_virtual_address; u32 rkey; u32 reserved; u64 atomic_1st_op_dma_len; @@ -213,6 +213,7 @@ struct ehca_wqe { #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 struct ehca_cqe { u64 work_request_id; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 84d435a5ee1..2e89356c46f 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -43,8 +43,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ - -#include <asm/current.h> +#include <linux/slab.h> #include "ehca_classes.h" #include "ehca_tools.h" @@ -58,9 +57,7 @@ static struct kmem_cache *qp_cache; /* * attributes not supported by query qp */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ - IB_QP_MAX_QP_RD_ATOMIC | \ - IB_QP_ACCESS_FLAGS | \ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ IB_QP_EN_SQD_ASYNC_NOTIFY) /* @@ -254,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) return ST_UD; case IB_QPT_RAW_IPV6: return -EINVAL; - case IB_QPT_RAW_ETY: + case IB_QPT_RAW_ETHERTYPE: return -EINVAL; default: ehca_gen_err("Invalid ibqptype=%x", ibqptype); @@ -273,6 +270,7 @@ static inline void queue2resp(struct ipzu_queue_resp *resp, resp->queue_length = queue->queue_length; resp->pagesize = queue->pagesize; resp->toggle_state = queue->toggle_state; + resp->offset = queue->offset; } /* @@ -309,7 +307,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, } if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", ipz_rc); return -EBUSY; } @@ -323,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, ret = -EINVAL; goto init_qp_queue1; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, @@ -333,7 +331,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, if (cnt == (nr_q_pages - 1)) { /* last page! */ if (h_ret != expected_hret) { ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret= %lx ", h_ret); + "h_ret=%lli", h_ret); ret = ehca2ib_return_code(h_ret); goto init_qp_queue1; } @@ -347,7 +345,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret= %lx ", h_ret); + "h_ret=%lli", h_ret); ret = ehca2ib_return_code(h_ret); goto init_qp_queue1; } @@ -398,6 +396,54 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, queue->is_small = (queue->page_size != 0); } +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = qmap->entries - 1; + qmap->left_to_poll = 0; + qmap->next_wqe_idx = 0; + for (i = 0; i < qmap->entries; i++) { + qmap->map[i].reported = 1; + qmap->map[i].cqe_req = 0; + } +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -409,13 +455,13 @@ static struct ehca_qp *internal_create_qp( struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - struct ehca_qp *my_qp; + struct ehca_qp *my_qp, *my_srq = NULL; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int is_llqp = 0, has_srq = 0; + int is_llqp = 0, has_srq = 0, is_user = 0; int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ @@ -423,6 +469,19 @@ static struct ehca_qp *internal_create_qp( u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", shca->max_num_qps); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + memset(&parms, 0, sizeof(parms)); qp_type = init_attr->qp_type; @@ -430,6 +489,7 @@ static struct ehca_qp *internal_create_qp( init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -444,17 +504,22 @@ static struct ehca_qp *internal_create_qp( /* handle SRQ base QPs */ if (init_attr->srq) { - struct ehca_qp *my_srq = - container_of(init_attr->srq, struct ehca_qp, ib_srq); + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } has_srq = 1; parms.ext_type = EQPT_SRQBASE; parms.srq_qpn = my_srq->real_qp_num; - parms.srq_token = my_srq->token; } if (is_llqp && has_srq) { ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -466,6 +531,7 @@ static struct ehca_qp *internal_create_qp( ehca_err(pd->device, "no more than three SGEs " "supported for SRQ pd=%p max_sge=%x", pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -477,6 +543,7 @@ static struct ehca_qp *internal_create_qp( qp_type != IB_QPT_SMI && qp_type != IB_QPT_GSI) { ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -490,6 +557,7 @@ static struct ehca_qp *internal_create_qp( "or max_rq_wr=%x for RC LLQP", init_attr->cap.max_send_wr, init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; @@ -497,6 +565,7 @@ static struct ehca_qp *internal_create_qp( if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { ehca_err(pd->device, "UD LLQP not supported " "by this adapter"); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOSYS); } if (!(init_attr->cap.max_send_sge <= 5 @@ -508,36 +577,57 @@ static struct ehca_qp *internal_create_qp( "or max_recv_sge=%x for UD LLQP", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } else if (init_attr->cap.max_send_wr > 255) { ehca_err(pd->device, "Invalid Number of " - "ax_send_wr=%x for UD QP_TYPE=%x", + "max_send_wr=%x for UD QP_TYPE=%x", init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; default: ehca_err(pd->device, "unsupported LL QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } else { + int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI + || qp_type == IB_QPT_GSI) ? 250 : 252; + + if (init_attr->cap.max_send_sge > max_sge + || init_attr->cap.max_recv_sge > max_sge) { + ehca_err(pd->device, "Invalid number of SGEs requested " + "send_sge=%x recv_sge=%x max_sge=%x", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); - break; } } - if (pd->uobject && udata) - context = pd->uobject->context; - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOMEM); } + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); my_qp->qp_type = qp_type; my_qp->ext_type = parms.ext_type; + my_qp->state = IB_QPS_RESET; if (init_attr->recv_cq) my_qp->recv_cq = @@ -546,36 +636,38 @@ static struct ehca_qp *internal_create_qp( my_qp->send_cq = container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - do { - if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't reserve idr resources."); - goto create_qp_exit0; - } - - write_lock_irqsave(&ehca_qp_idr_lock, flags); - ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); - } while (ret == -EAGAIN); + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; - if (ret) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } goto create_qp_exit0; } + if (has_srq) + parms.srq_token = my_qp->token; + parms.servicetype = ibqptype2servicetype(qp_type); if (parms.servicetype < 0) { ret = -EINVAL; ehca_err(pd->device, "Invalid qp_type=%x", qp_type); - goto create_qp_exit0; + goto create_qp_exit1; } - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - parms.sigtype = HCALL_SIGT_EVERY; - else - parms.sigtype = HCALL_SIGT_BY_WQE; + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; @@ -598,8 +690,11 @@ static struct ehca_qp *internal_create_qp( parms.squeue.max_sge = max_send_sge; parms.rqueue.max_sge = max_recv_sge; - if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap) - && !(context && udata)) { /* no small QP support in userspace ATM */ + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { if (HAS_SQ(my_qp)) ehca_determine_small_queue( &parms.squeue, max_send_sge, is_llqp); @@ -610,9 +705,9 @@ static struct ehca_qp *internal_create_qp( (parms.squeue.is_small || parms.rqueue.is_small); } - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", h_ret); ret = ehca2ib_return_code(h_ret); goto create_qp_exit1; @@ -631,6 +726,8 @@ static struct ehca_qp *internal_create_qp( parms.squeue.act_nr_sges = 1; parms.rqueue.act_nr_sges = 1; } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; break; case IB_QPT_UD: case IB_QPT_GSI: @@ -666,9 +763,24 @@ static struct ehca_qp *internal_create_qp( &parms.squeue, swqe_size); if (ret) { ehca_err(pd->device, "Couldn't initialize squeue " - "and pages ret=%x", ret); + "and pages ret=%i", ret); goto create_qp_exit2; } + + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } } if (HAS_RQ(my_qp)) { @@ -677,9 +789,29 @@ static struct ehca_qp *internal_create_qp( H_SUCCESS, &parms.rqueue, rwqe_size); if (ret) { ehca_err(pd->device, "Couldn't initialize rqueue " - "and pages ret=%x", ret); - goto create_qp_exit3; + "and pages ret=%i", ret); + goto create_qp_exit4; } + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } + } else if (init_attr->srq && !is_user) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; } if (is_srq) { @@ -710,14 +842,36 @@ static struct ehca_qp *internal_create_qp( init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; my_qp->init_attr = *init_attr; + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit5; + } + } + } + /* NOTE: define_apq0() not supported yet */ if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", - h_ret); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; ret = ehca2ib_return_code(h_ret); - goto create_qp_exit4; + goto create_qp_exit6; } } @@ -725,8 +879,8 @@ static struct ehca_qp *internal_create_qp( ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { ehca_err(pd->device, - "Couldn't assign qp to send_cq ret=%x", ret); - goto create_qp_exit4; + "Couldn't assign qp to send_cq ret=%i", ret); + goto create_qp_exit7; } } @@ -741,26 +895,41 @@ static struct ehca_qp *internal_create_qp( resp.ext_type = my_qp->ext_type; resp.qkey = my_qp->qkey; resp.real_qp_num = my_qp->real_qp_num; - resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset; - resp.ipz_squeue.offset = my_qp->ipz_squeue.offset; + if (HAS_SQ(my_qp)) queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); if (HAS_RQ(my_qp)) queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + resp.fw_handle_ofs = (u32) + (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit4; + goto create_qp_exit8; } } return my_qp; -create_qp_exit4: +create_qp_exit8: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit7: + kfree(my_qp->mod_qp_parm); + +create_qp_exit6: + if (HAS_RQ(my_qp) && !is_user) + vfree(my_qp->rq_map.map); + +create_qp_exit5: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); +create_qp_exit4: + if (HAS_SQ(my_qp) && !is_user) + vfree(my_qp->sq_map.map); + create_qp_exit3: if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); @@ -775,6 +944,7 @@ create_qp_exit1: create_qp_exit0: kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return ERR_PTR(ret); } @@ -803,6 +973,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, struct hcp_modify_qp_control_block *mqpcb; u64 hret, update_mask; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + /* For common attributes, internal_create_qp() takes its info * out of qp_init_attr, so copy all common attrs there. */ @@ -820,7 +993,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, /* copy back return values */ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge; + srq_init_attr->attr.max_sge = 3; /* drive SRQ into RTR state */ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); @@ -840,8 +1013,8 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT" - "ehca_qp=%p qp_num=%x hret=%lx", + ehca_err(pd->device, "Could not modify SRQ to INIT " + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, hret); goto create_srq2; } @@ -854,8 +1027,8 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ" - "ehca_qp=%p qp_num=%x hret=%lx", + ehca_err(pd->device, "Could not enable SRQ " + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, hret); goto create_srq2; } @@ -868,12 +1041,14 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR" - "ehca_qp=%p qp_num=%x hret=%lx", + ehca_err(pd->device, "Could not modify SRQ to RTR " + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, hret); goto create_srq2; } + ehca_free_fw_ctrlblock(mqpcb); + return &my_qp->ib_srq; create_srq2: @@ -907,7 +1082,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, &bad_send_wqe_p, NULL, 2); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" - " ehca_qp=%p qp_num=%x h_ret=%lx", + " ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, qp_num, h_ret); return ehca2ib_return_code(h_ret); } @@ -915,8 +1090,8 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ - bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); - if (ehca_debug_level) + bad_send_wqe_v = __va((u64)bad_send_wqe_p); + if (ehca_debug_level >= 2) ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); squeue = &my_qp->ipz_squeue; if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { @@ -929,7 +1104,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ @@ -948,6 +1123,111 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, return 0; } +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + unsigned int tail_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = __va(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + tail_idx = next_index(qmap->tail, qmap->entries); + wqe_idx = q_ofs / ipz_queue->qe_size; + + /* check all processed wqes, whether a cqe is requested or not */ + while (tail_idx != wqe_idx) { + if (qmap->map[tail_idx].cqe_req) + qmap->left_to_poll++; + tail_idx = next_index(tail_idx, qmap->entries); + } + /* save index in queue, where we have to start flushing */ + qmap->next_wqe_idx = wqe_idx; + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + /* * internal_modify_qp with circumvention to handle aqp0 properly * smi_reset2init indicates if this is an internal reset-to-init-call for @@ -968,11 +1248,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 update_mask; u64 h_ret; int bad_wqe_cnt = 0; + int is_user = 0; int squeue_locked = 0; unsigned long flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); @@ -985,11 +1266,13 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb, my_qp->galpas.kernel); if (h_ret != H_SUCCESS) { ehca_err(ibqp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lx", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, ibqp->qp_num, h_ret); ret = ehca2ib_return_code(h_ret); goto modify_qp_exit1; } + if (ibqp->uobject) + is_user = 1; qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); @@ -1021,7 +1304,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, ibqp, &smiqp_attr, smiqp_attr_mask, 1); if (smirc) { ehca_err(ibqp->device, "SMI RESET -> INIT failed. " - "ehca_modify_qp() rc=%x", smirc); + "ehca_modify_qp() rc=%i", smirc); ret = H_PARAMETER; goto modify_qp_exit1; } @@ -1046,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; if (!smi_reset2init && !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask)) { + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { ret = -EINVAL; ehca_err(ibqp->device, "Invalid qp transition new_state=%x cur_state=%x " @@ -1123,7 +1406,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); if (ret) { ehca_err(ibqp->device, "prepare_sqe_rts() failed " - "ehca_qp=%p qp_num=%x ret=%x", + "ehca_qp=%p qp_num=%x ret=%i", my_qp, ibqp->qp_num, ret); goto modify_qp_exit2; } @@ -1149,10 +1432,19 @@ static int internal_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } mqpcb->prim_p_key_idx = attr->pkey_index; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); } if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; if (attr->port_num < 1 || attr->port_num > shca->num_ports) { ret = -EINVAL; ehca_err(ibqp->device, "Invalid port=%x. " @@ -1161,6 +1453,29 @@ static int internal_modify_qp(struct ib_qp *ibqp, shca->num_ports); goto modify_qp_exit2; } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } mqpcb->prim_phys_port = attr->port_num; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); } @@ -1169,10 +1484,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); } if (attr_mask & IB_QP_AV) { - int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> - init_attr.port_num].rate); - mqpcb->dlid = attr->ah_attr.dlid; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); mqpcb->source_path_bits = attr->ah_attr.src_path_bits; @@ -1180,11 +1491,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ah_mult < ehca_mult) - mqpcb->max_static_rate = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; - else - mqpcb->max_static_rate = 0; + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); /* @@ -1220,6 +1532,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; mqpcb->path_mtu = attr->path_mtu; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); } @@ -1253,54 +1567,80 @@ static int internal_modify_qp(struct ib_qp *ibqp, (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); } if (attr_mask & IB_QP_ALT_PATH) { - int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult( - shca->sport[my_qp->init_attr.port_num].rate); + if (attr->alt_port_num < 1 + || attr->alt_port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->alt_port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->alt_phys_port = attr->alt_port_num; + + if (attr->alt_pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->alt_p_key_idx = attr->alt_pkey_index; + mqpcb->timeout_al = attr->alt_timeout; mqpcb->dlid_al = attr->alt_ah_attr.dlid; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1); mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1); mqpcb->service_level_al = attr->alt_ah_attr.sl; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1); - if (ah_mult < ehca_mult) - mqpcb->max_static_rate = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; - else - mqpcb->max_static_rate_al = 0; + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1); + /* OpenIB doesn't support alternate retry counts - copy them */ + mqpcb->retry_count_al = mqpcb->retry_count; + mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) + | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); /* * only if GRH is TRUE we might consider SOURCE_GID_IDX * and DEST_GID otherwise phype will return H_ATTR_PARM!!! */ if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag_al = 1 << 31; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); - mqpcb->source_gid_idx_al = - attr->alt_ah_attr.grh.sgid_index; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1); + mqpcb->send_grh_flag_al = 1; for (cnt = 0; cnt < 16; cnt++) mqpcb->dest_gid_al.byte[cnt] = attr->alt_ah_attr.grh.dgid.raw[cnt]; - - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1); + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1); mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1); mqpcb->traffic_class_al = attr->alt_ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); } } @@ -1322,7 +1662,16 @@ static int internal_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PATH_MIG_STATE) { - mqpcb->path_migration_state = attr->path_mig_state; + if (attr->path_mig_state != IB_MIG_REARM + && attr->path_mig_state != IB_MIG_MIGRATED) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid mig_state=%x", + attr->path_mig_state); + goto modify_qp_exit2; + } + mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); } @@ -1337,7 +1686,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, /* no support for max_send/recv_sge yet */ } - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, @@ -1348,7 +1697,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " + ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } @@ -1381,15 +1730,34 @@ static int internal_modify_qp(struct ib_qp *ibqp, ret = ehca2ib_return_code(h_ret); ehca_err(ibqp->device, "ENABLE in context of " "RESET_2_INIT failed! Maybe you didn't get " - "a LID h_ret=%lx ehca_qp=%p qp_num=%x", + "a LID h_ret=%lli ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } if (statetrans == IB_QPST_ANY2RESET) { ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR && !is_user) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + if (!is_user) + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp) && !is_user) + reset_queue_map(&my_qp->rq_map); } if (attr_mask & IB_QP_QKEY) @@ -1410,19 +1778,110 @@ modify_qp_exit1: int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + int ret = 0; + + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, - ib_pd); - u32 cur_pid = current->tgid; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + goto out; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); } - return internal_modify_qp(ibqp, attr, attr_mask, 0); + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; +} + +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; } int ehca_query_qp(struct ib_qp *qp, @@ -1430,23 +1889,13 @@ int ehca_query_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, - ib_pd); struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, ib_device); struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; struct hcp_modify_qp_control_block *qpcb; - u32 cur_pid = current->tgid; int cnt, ret = 0; u64 h_ret; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } - if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { ehca_err(qp->device, "Invalid attribute mask " "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", @@ -1469,7 +1918,7 @@ int ehca_query_qp(struct ib_qp *qp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(qp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lx", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, qp->qp_num, h_ret); goto query_qp_exit1; } @@ -1490,7 +1939,7 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->qkey = qpcb->qkey; qp_attr->path_mtu = qpcb->path_mtu; - qp_attr->path_mig_state = qpcb->path_migration_state; + qp_attr->path_mig_state = qpcb->path_migration_state - 1; qp_attr->rq_psn = qpcb->receive_psn; qp_attr->sq_psn = qpcb->send_psn; qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; @@ -1512,19 +1961,13 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; qp_attr->dest_qp_num = qpcb->dest_qp_nr; - qp_attr->pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); - - qp_attr->port_num = - EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); - + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; qp_attr->timeout = qpcb->timeout; qp_attr->retry_cnt = qpcb->retry_count; qp_attr->rnr_retry = qpcb->rnr_retry_count; - qp_attr->alt_pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); - + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; @@ -1577,7 +2020,7 @@ int ehca_query_qp(struct ib_qp *qp, if (qp_init_attr) *qp_init_attr = my_qp->init_attr; - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); query_qp_exit1: @@ -1591,8 +2034,6 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, { struct ehca_qp *my_qp = container_of(ibsrq, struct ehca_qp, ib_srq); - struct ehca_pd *my_pd = - container_of(ibsrq->pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(ibsrq->pd->device, struct ehca_shca, ib_device); struct hcp_modify_qp_control_block *mqpcb; @@ -1600,14 +2041,6 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, u64 h_ret; int ret = 0; - u32 cur_pid = current->tgid; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!mqpcb) { ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " @@ -1621,8 +2054,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = - EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->curr_srq_limit = attr->srq_limit; mqpcb->qp_aff_asyn_ev_log_reg = EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); } @@ -1635,7 +2067,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto modify_srq_exit0; } - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, @@ -1644,7 +2076,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx " + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " "ehca_qp=%p qp_num=%x", h_ret, my_qp, my_qp->real_qp_num); } @@ -1658,22 +2090,13 @@ modify_srq_exit0: int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); - struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, ib_device); struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; struct hcp_modify_qp_control_block *qpcb; - u32 cur_pid = current->tgid; int ret = 0; u64 h_ret; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { ehca_err(srq->device, "Out of memory for qpcb " @@ -1687,16 +2110,16 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(srq->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lx", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, h_ret); goto query_srq_exit1; } srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->srq_limit = EHCA_BMASK_GET( - MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + srq_attr->max_sge = 3; + srq_attr->srq_limit = qpcb->curr_srq_limit; - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); query_srq_exit1: @@ -1711,33 +2134,30 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); - u32 cur_pid = current->tgid; + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; u32 qp_num = my_qp->real_qp_num; int ret; u64 h_ret; u8 port_num; + int is_user = 0; enum ib_qp_type qp_type; unsigned long flags; if (uobject) { + is_user = 1; if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { ehca_err(dev, "Resources still referenced in " "user space qp_num=%x", qp_num); return -EINVAL; } - if (my_pd->ownpid != cur_pid) { - ehca_err(dev, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; - } } if (my_qp->send_cq) { ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); if (ret) { ehca_err(dev, "Couldn't unassign qp from " - "send_cq ret=%x qp_num=%x cq_num=%x", ret, + "send_cq ret=%i qp_num=%x cq_num=%x", ret, qp_num, my_qp->send_cq->cq_number); return ret; } @@ -1747,9 +2167,22 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp) && !is_user) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { - ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx " + ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); return ehca2ib_return_code(h_ret); } @@ -1757,11 +2190,19 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, port_num = my_qp->init_attr.port_num; qp_type = my_qp->init_attr.qp_type; + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + /* no support for IB_QPT_SMI yet */ if (qp_type == IB_QPT_GSI) { struct ib_event event; ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); + shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port_num; @@ -1769,11 +2210,18 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, ib_dispatch_event(&event); } - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (HAS_SQ(my_qp)) + if (!is_user) + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + if (!is_user) + vfree(my_qp->sq_map.map); + } kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 94eed70fedf..47f94984353 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -42,7 +42,6 @@ */ -#include <asm-powerpc/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -50,9 +49,28 @@ #include "hcp_if.h" #include "hipz_fns.h" +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -66,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -78,10 +96,10 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, recv_wr->sg_list[cnt_ds].length; } - if (ehca_debug_level) { + if (ehca_debug_level >= 3) { ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } return 0; @@ -117,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *)abs_to_virt(sge->addr); + u8 *data = __va(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -135,12 +153,15 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr) + const struct ib_send_wr *send_wr, + u32 sq_map_idx, + int hidden) { u32 idx; u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -153,7 +174,11 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -176,13 +201,17 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->wr_flag = 0; - if (send_wr->send_flags & IB_SEND_SIGNALED) + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) { wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } if (send_wr->opcode == IB_WR_SEND_WITH_IMM || send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { /* this might not work as long as HW does not support it */ - wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data); + wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; } @@ -199,10 +228,14 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; wqe_p->local_ee_context_qkey = remote_qkey; - if (!send_wr->wr.ud.ah) { + if (unlikely(!send_wr->wr.ud.ah)) { ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); return -EINVAL; } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); wqe_p->u.ud_av.ud_av = my_av->av; @@ -235,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* no break is intentional here */ case IB_QPT_RC: /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_adress = + wqe_p->u.nud.remote_virtual_address = send_wr->wr.rdma.remote_addr; wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; @@ -255,6 +288,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, } /* eof idx */ wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + break; default: @@ -262,7 +304,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, return -EINVAL; } - if (ehca_debug_level) { + if (ehca_debug_level >= 3) { ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); } @@ -355,62 +397,107 @@ static inline void map_ib_wc_status(u32 cqe_status, *wc_status = IB_WC_SUCCESS; } +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u32 sq_map_idx; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ib_send_wr *cur_send_wr; - struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; unsigned long flags; + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + ret = -EINVAL; + goto out; + } + /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_s, flags); + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + /* loop processes list of send reqs */ - for (cur_send_wr = send_wr; cur_send_wr != NULL; - cur_send_wr = cur_send_wr->next) { - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - if (bad_send_wr) - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); - } - goto post_send_exit0; - } - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); - /* - * if something failed, - * reset the free entry pointer to the start value - */ + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); - } goto post_send_exit0; } wqe_cnt++; - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); - } /* eof for cur_send_wr */ + send_wr = send_wr->next; + } post_send_exit0: iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); + my_qp->message_count += wqe_cnt; spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; return ret; } @@ -419,63 +506,77 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; + u32 rq_map_idx; unsigned long flags; + struct ehca_qmap_entry *qmap_entry; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", my_qp, my_qp->real_qp_num, my_qp->ext_type); - return -ENODEV; + ret = -ENODEV; + goto out; } /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_r, flags); - /* loop processes list of send reqs */ - for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; - cur_recv_wr = cur_recv_wr->next) { + /* loop processes list of recv reqs */ + while (recv_wr) { u64 start_offset = my_qp->ipz_rqueue.current_q_offset; /* get pointer next to free WQE */ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_recv_wr) - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - } + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value */ if (unlikely(ret)) { my_qp->ipz_rqueue.current_q_offset = start_offset; - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - } + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + wqe_cnt++; - ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, my_qp->real_qp_num, wqe_cnt); - } /* eof for cur_recv_wr */ + recv_wr = recv_wr->next; + } /* eof for recv_wr */ post_recv_exit0: iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + return ret; } @@ -483,8 +584,17 @@ int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), - qp->device, recv_wr, bad_recv_wr); + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); } int ehca_post_srq_recv(struct ib_srq *srq, @@ -514,20 +624,23 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; - int cqe_count = 0; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; + int cqe_count = 0, is_error; -poll_cq_one_read_cqe: +repoll: cqe = (struct ehca_cqe *) ipz_qeit_get_inc_valid(&my_cq->ipz_queue); if (!cqe) { ret = -EAGAIN; - ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p " - "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret); - goto poll_cq_one_exit0; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; } /* prevents loads being reordered across this point */ @@ -547,7 +660,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", my_cq->cq_number, cqe->local_qp_number); /* ignore this purged cqe */ - goto poll_cq_one_read_cqe; + goto repoll; } spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; @@ -557,7 +670,7 @@ poll_cq_one_read_cqe: ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); @@ -566,15 +679,17 @@ poll_cq_one_read_cqe: * that caused sqe and turn off purge flag */ qp->sqerr_purgeflag = 0; - goto poll_cq_one_read_cqe; + goto repoll; } } - /* tracing cqe */ - if (unlikely(ehca_debug_level)) { + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { ehca_dbg(cq->device, - "Received COMPLETION ehca_cq=%p cq_num=%x -----", - my_cq, my_cq->cq_number); + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); ehca_dbg(cq->device, @@ -582,8 +697,62 @@ poll_cq_one_read_cqe: my_cq, my_cq->cq_number); } - /* we got a completion! */ - wc->wr_id = cqe->work_request_id; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -595,38 +764,30 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); /* update also queue adder to throw away this entry!!! */ - goto poll_cq_one_exit0; + goto repoll; } + /* eval ib_wc_status */ - if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) { + if (unlikely(is_error)) { /* complete with errors */ map_ib_wc_status(cqe->status, &wc->status); wc->vendor_err = wc->status; } else wc->status = IB_WC_SUCCESS; - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - wc->qp = &my_qp->ib_qp; - read_unlock(&ehca_qp_idr_lock); - wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; - wc->wc_flags = cqe->w_completion_flags; - wc->imm_data = cpu_to_be32(cqe->immediate_data); + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; - if (unlikely(wc->status != IB_WC_SUCCESS)) - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " - "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " - "cqe=%p", my_cq, my_cq->cq_number, cqe->optype, - cqe->status, cqe->local_qp_number, - cqe->remote_qp_number, cqe->work_request_id, cqe); - poll_cq_one_exit0: if (cqe_count > 0) hipz_update_feca(my_cq, cqe_count); @@ -634,13 +795,89 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -650,15 +887,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) } spin_lock_irqsave(&my_cq->spinlock, flags); - for (nr = 0; nr < num_entries; nr++) { + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ + entries_left -= nr; + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index 9f16e9c7939..dba8f9f8b99 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -39,15 +39,18 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <rdma/ib_mad.h> -#include <linux/module.h> -#include <linux/err.h> #include "ehca_classes.h" #include "ehca_tools.h" -#include "ehca_qes.h" #include "ehca_iverbs.h" #include "hcp_if.h" +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) + +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) /** * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue @@ -82,10 +85,13 @@ u64 ehca_define_sqp(struct ehca_shca *shca, if (ret != H_SUCCESS) { ehca_err(&shca->ib_device, - "Can't define AQP1 for port %x. rc=%lx", + "Can't define AQP1 for port %x. h_ret=%lli", port, ret); return ret; } + shca->sport[port - 1].pma_qp_nr = pma_qp_nr; + ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", + port, pma_qp_nr); break; default: ehca_err(&shca->ib_device, "invalid qp_type=%x", @@ -93,6 +99,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca, return H_PARAMETER; } + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + for (counter = 0; shca->sport[port - 1].port_state != IB_PORT_ACTIVE && counter < ehca_port_act_time; @@ -109,3 +118,120 @@ u64 ehca_define_sqp(struct ehca_shca *shca, return H_SUCCESS; } + +struct ib_perf { + struct ib_mad_hdr mad_hdr; + u8 reserved[40]; + u8 data[192]; +} __attribute__ ((packed)); + +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); + +static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct ib_perf *in_perf = (struct ib_perf *)in_mad; + struct ib_perf *out_perf = (struct ib_perf *)out_mad; + struct ib_class_port_info *poi = + (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; + struct ehca_shca *shca = + container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_sport *sport = &shca->sport[port_num - 1]; + + ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); + + *out_mad = *in_mad; + + if (in_perf->mad_hdr.class_version != 1) { + ehca_warn(ibdev, "Unsupported class_version=%x", + in_perf->mad_hdr.class_version); + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; + goto perf_reply; + } + + switch (in_perf->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_SET: + /* set class port info for redirection */ + out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; + out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; + memset(poi, 0, sizeof(*poi)); + poi->base_version = 1; + poi->class_version = 1; + poi->resp_time_value = 18; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; + poi->redirect_qkey = IB_QP1_QKEY; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + struct vertcfl *vertcfl = + (struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); + + ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", + sport->saved_attr.lid, sport->pma_qp_nr); + break; + + case IB_MGMT_METHOD_GET_RESP: + return IB_MAD_RESULT_FAILURE; + + default: + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; + break; + } + +perf_reply: + out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + int ret; + + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) + return IB_MAD_RESULT_FAILURE; + + /* accept only pma request */ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return IB_MAD_RESULT_SUCCESS; + + ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 57c77a715f4..d280b12aae6 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -54,13 +54,11 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/vmalloc.h> -#include <linux/version.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/device.h> -#include <asm/atomic.h> -#include <asm/abs_addr.h> +#include <linux/atomic.h> #include <asm/ibmebus.h> #include <asm/io.h> #include <asm/pgtable.h> @@ -73,40 +71,37 @@ extern int ehca_debug_level; if (unlikely(ehca_debug_level)) \ dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ "PU%04x EHCA_DBG:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, \ + raw_smp_processor_id(), __func__, \ ## arg); \ } while (0) #define ehca_info(ib_dev, format, arg...) \ dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, ## arg) + raw_smp_processor_id(), __func__, ## arg) #define ehca_warn(ib_dev, format, arg...) \ dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, ## arg) + raw_smp_processor_id(), __func__, ## arg) #define ehca_err(ib_dev, format, arg...) \ dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, ## arg) + raw_smp_processor_id(), __func__, ## arg) /* use this one only if no ib_dev available */ #define ehca_gen_dbg(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, ## arg); \ + raw_smp_processor_id(), __func__, ## arg); \ } while (0) #define ehca_gen_warn(format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, ## arg); \ - } while (0) + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) #define ehca_gen_err(format, arg...) \ printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ - get_paca()->paca_index, __FUNCTION__, ## arg) + raw_smp_processor_id(), __func__, ## arg) /** * ehca_dmp - printk a memory block, whose length is n*8 bytes. @@ -120,8 +115,8 @@ extern int ehca_debug_level; unsigned char *deb = (unsigned char *)(adr); \ for (x = 0; x < l; x += 16) { \ printk(KERN_INFO "EHCA_DMP:%s " format \ - " adr=%p ofs=%04x %016lx %016lx\n", \ - __FUNCTION__, ##args, deb, x, \ + " adr=%p ofs=%04x %016llx %016llx\n", \ + __func__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ deb += 16; \ } \ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 4bc687fdf53..1a1d5d99fcf 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -40,7 +40,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <asm/current.h> +#include <linux/slab.h> #include "ehca_classes.h" #include "ehca_iverbs.h" @@ -97,7 +97,7 @@ static void ehca_mm_close(struct vm_area_struct *vma) vma->vm_start, vma->vm_end, *count); } -static struct vm_operations_struct vm_ops = { +static const struct vm_operations_struct vm_ops = { .open = ehca_mm_open, .close = ehca_mm_close, }; @@ -109,19 +109,19 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, u64 vsize, physical; vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { + if (vsize < EHCA_PAGESIZE) { ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); return -EINVAL; } physical = galpas->user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ - ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, - vsize, vma->vm_page_prot); + ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ + ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, + vma->vm_page_prot); if (unlikely(ret)) { - ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + ehca_gen_err("remap_pfn_range() failed ret=%i", ret); return -ENOMEM; } @@ -139,14 +139,14 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, u64 start, ofs; struct page *page; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; start = vma->vm_start; for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); page = virt_to_page(virt_addr); ret = vm_insert_page(vma, start, page); if (unlikely(ret)) { - ehca_gen_err("vm_insert_page() failed rc=%x", ret); + ehca_gen_err("vm_insert_page() failed rc=%i", ret); return ret; } start += PAGE_SIZE; @@ -164,23 +164,23 @@ static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, int ret; switch (rsrc_type) { - case 1: /* galpa fw handle */ + case 0: /* galpa fw handle */ ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); if (unlikely(ret)) { ehca_err(cq->ib_cq.device, - "ehca_mmap_fw() failed rc=%x cq_num=%x", + "ehca_mmap_fw() failed rc=%i cq_num=%x", ret, cq->cq_number); return ret; } break; - case 2: /* cq queue_addr */ + case 1: /* cq queue_addr */ ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); if (unlikely(ret)) { ehca_err(cq->ib_cq.device, - "ehca_mmap_queue() failed rc=%x cq_num=%x", + "ehca_mmap_queue() failed rc=%i cq_num=%x", ret, cq->cq_number); return ret; } @@ -201,38 +201,36 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, int ret; switch (rsrc_type) { - case 1: /* galpa fw handle */ + case 0: /* galpa fw handle */ ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%x qp_num=%x", + "remap_pfn_range() failed ret=%i qp_num=%x", ret, qp->ib_qp.qp_num); return -ENOMEM; } break; - case 2: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", - qp->ib_qp.qp_num); + case 1: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", + "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", ret, qp->ib_qp.qp_num); return ret; } break; - case 3: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", - qp->ib_qp.qp_num); + case 2: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", + "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", ret, qp->ib_qp.qp_num); return ret; } @@ -249,19 +247,17 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { - u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; - u32 idr_handle = fileoffset >> 32; - u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ - u32 cur_pid = current->tgid; + u64 fileoffset = vma->vm_pgoff; + u32 idr_handle = fileoffset & 0x1FFFFFF; + u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ u32 ret; struct ehca_cq *cq; struct ehca_qp *qp; - struct ehca_pd *pd; struct ib_uobject *uobject; switch (q_type) { - case 1: /* CQ */ + case 0: /* CQ */ read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, idr_handle); read_unlock(&ehca_cq_idr_lock); @@ -270,26 +266,19 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (!cq) return -EINVAL; - if (cq->ownpid != cur_pid) { - ehca_err(cq->ib_cq.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, cq->ownpid); - return -ENOMEM; - } - if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) return -EINVAL; ret = ehca_mmap_cq(vma, cq, rsrc_type); if (unlikely(ret)) { ehca_err(cq->ib_cq.device, - "ehca_mmap_cq() failed rc=%x cq_num=%x", + "ehca_mmap_cq() failed rc=%i cq_num=%x", ret, cq->cq_number); return ret; } break; - case 2: /* QP */ + case 1: /* QP */ read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, idr_handle); read_unlock(&ehca_qp_idr_lock); @@ -298,14 +287,6 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (!qp) return -EINVAL; - pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); - if (pd->ownpid != cur_pid) { - ehca_err(qp->ib_qp.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, pd->ownpid); - return -ENOMEM; - } - uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; if (!uobject || uobject->context != context) return -EINVAL; @@ -313,7 +294,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) ret = ehca_mmap_qp(vma, qp, rsrc_type); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "ehca_mmap_qp() failed rc=%x qp_num=%x", + "ehca_mmap_qp() failed rc=%i qp_num=%x", ret, qp->ib_qp.qp_num); return ret; } diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 24f454162f2..89517ffb438 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -84,27 +84,11 @@ #define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) #define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) -static DEFINE_SPINLOCK(hcall_lock); +#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" +#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" +#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" -static u32 get_longbusy_msecs(int longbusy_rc) -{ - switch (longbusy_rc) { - case H_LONG_BUSY_ORDER_1_MSEC: - return 1; - case H_LONG_BUSY_ORDER_10_MSEC: - return 10; - case H_LONG_BUSY_ORDER_100_MSEC: - return 100; - case H_LONG_BUSY_ORDER_1_SEC: - return 1000; - case H_LONG_BUSY_ORDER_10_SEC: - return 10000; - case H_LONG_BUSY_ORDER_100_SEC: - return 100000; - default: - return 1; - } -} +static DEFINE_SPINLOCK(hcall_lock); static long ehca_plpar_hcall_norets(unsigned long opcode, unsigned long arg1, @@ -117,15 +101,23 @@ static long ehca_plpar_hcall_norets(unsigned long opcode, { long ret; int i, sleep_msecs; + unsigned long flags = 0; - ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " - "arg5=%lx arg6=%lx arg7=%lx", - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + if (H_IS_LONG_BUSY(ret)) { sleep_msecs = get_longbusy_msecs(ret); msleep_interruptible(sleep_msecs); @@ -133,16 +125,14 @@ static long ehca_plpar_hcall_norets(unsigned long opcode, } if (ret < H_SUCCESS) - ehca_gen_err("opcode=%lx ret=%lx" - " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" - " arg5=%lx arg6=%lx arg7=%lx ", - opcode, ret, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7); - - ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret); - return ret; + ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, + opcode, ret, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + else + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); + return ret; } return H_BUSY; @@ -161,25 +151,24 @@ static long ehca_plpar_hcall9(unsigned long opcode, unsigned long arg9) { long ret; - int i, sleep_msecs, lock_is_set = 0; + int i, sleep_msecs; unsigned long flags = 0; - ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " - "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, - arg8, arg9); + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); for (i = 0; i < 5; i++) { - if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) spin_lock_irqsave(&hcall_lock, flags); - lock_is_set = 1; - } ret = plpar_hcall9(opcode, outs, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); - if (lock_is_set) + if (ehca_lock_hcalls) spin_unlock_irqrestore(&hcall_lock, flags); if (H_IS_LONG_BUSY(ret)) { @@ -188,26 +177,19 @@ static long ehca_plpar_hcall9(unsigned long opcode, continue; } - if (ret < H_SUCCESS) - ehca_gen_err("opcode=%lx ret=%lx" - " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" - " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" - " arg9=%lx" - " out1=%lx out2=%lx out3=%lx out4=%lx" - " out5=%lx out6=%lx out7=%lx out8=%lx" - " out9=%lx", - opcode, ret, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9, - outs[0], outs[1], outs[2], outs[3], + if (ret < H_SUCCESS) { + ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + } else if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], outs[4], outs[5], outs[6], outs[7], outs[8]); - - ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx " - "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx " - "out9=%lx", - opcode, ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], outs[8]); return ret; } @@ -224,7 +206,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, u32 *eq_ist) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; u64 allocate_controls; /* resource type */ @@ -247,7 +229,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, *eq_ist = (u32)outs[5]; if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resource - ret=%lx ", ret); + ehca_gen_err("Not enough resource - ret=%lli ", ret); return ret; } @@ -267,8 +249,9 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, struct ehca_cq *cq, struct ehca_alloc_cq_parms *param) { + int rc; u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ @@ -281,21 +264,33 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, param->act_nr_of_entries = (u32)outs[3]; param->act_pages = (u32)outs[4]; - if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lx", ret); + ehca_gen_err("Not enough resources. ret=%lli", ret); return ret; } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms) + struct ehca_alloc_qp_parms *parms, int is_user) { + int rc; u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; allocate_controls = EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) @@ -356,11 +351,22 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, parms->rqueue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lx", ret); + ehca_gen_err("Not enough resources. ret=%lli", ret); return ret; } @@ -370,7 +376,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, struct hipz_query_port *query_port_response_block) { u64 ret; - u64 r_cb = virt_to_abs(query_port_response_block); + u64 r_cb = __pa(query_port_response_block); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response block not page aligned"); @@ -383,7 +389,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, r_cb, /* r6 */ 0, 0, 0, 0); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(query_port_response_block, 64, "response_block"); return ret; @@ -412,7 +418,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock) { - u64 r_cb = virt_to_abs(query_hca_rblock); + u64 r_cb = __pa(query_hca_rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response_block=%p not page aligned", @@ -452,7 +458,7 @@ u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, const u64 count) { if (count != 1) { - ehca_gen_err("Ppage counter=%lx", count); + ehca_gen_err("Ppage counter=%llx", count); return H_PARAMETER; } return hipz_h_register_rpage(adapter_handle, @@ -487,7 +493,7 @@ u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, const struct h_galpa gal) { if (count != 1) { - ehca_gen_err("Page counter=%lx", count); + ehca_gen_err("Page counter=%llx", count); return H_PARAMETER; } @@ -506,7 +512,7 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, const struct h_galpa galpa) { if (count > 1) { - ehca_gen_err("Page counter=%lx", count); + ehca_gen_err("Page counter=%llx", count); return H_PARAMETER; } @@ -523,7 +529,7 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, int dis_and_get_function_code) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, adapter_handle.handle, /* r4 */ @@ -546,16 +552,16 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, struct h_galpa gal) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ update_mask, /* r6 */ - virt_to_abs(mqpcb), /* r7 */ + __pa(mqpcb), /* r7 */ 0, 0, 0, 0, 0); if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Insufficient resources ret=%lx", ret); + ehca_gen_err("Insufficient resources ret=%lli", ret); return ret; } @@ -569,7 +575,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, return ehca_plpar_hcall_norets(H_QUERY_QP, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ - virt_to_abs(qqpcb), /* r6 */ + __pa(qqpcb), /* r6 */ 0, 0, 0, 0); } @@ -577,7 +583,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, struct ehca_qp *qp) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = hcp_galpas_dtor(&qp->galpas); if (ret) { @@ -591,7 +597,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, qp->ipz_qp_handle.handle, /* r6 */ 0, 0, 0, 0, 0, 0); if (ret == H_HARDWARE) - ehca_gen_err("HCA not operational. ret=%lx", ret); + ehca_gen_err("HCA not operational. ret=%lli", ret); ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, adapter_handle.handle, /* r4 */ @@ -599,7 +605,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, 0, 0, 0, 0, 0); if (ret == H_RESOURCE) - ehca_gen_err("Resource still in use. ret=%lx", ret); + ehca_gen_err("Resource still in use. ret=%lli", ret); return ret; } @@ -623,7 +629,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, u32 * bma_qp_nr) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, adapter_handle.handle, /* r4 */ @@ -634,7 +640,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, *bma_qp_nr = (u32)outs[1]; if (ret == H_ALIAS_EXIST) - ehca_gen_err("AQP1 already exists. ret=%lx", ret); + ehca_gen_err("AQP1 already exists. ret=%lli", ret); return ret; } @@ -656,7 +662,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, 0, 0); if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lx", ret); + ehca_gen_err("Not enough resources. ret=%lli", ret); return ret; } @@ -695,7 +701,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, 0, 0, 0, 0); if (ret == H_RESOURCE) - ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret); + ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); return ret; } @@ -717,7 +723,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, 0, 0, 0, 0, 0); if (ret == H_RESOURCE) - ehca_gen_err("Resource in use. ret=%lx ", ret); + ehca_gen_err("Resource in use. ret=%lli ", ret); return ret; } @@ -731,11 +737,8 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - ehca_gen_dbg("kernel PAGE_SIZE=%x access_ctrl=%016x " - "vaddr=%lx length=%lx", - (u32)PAGE_SIZE, access_ctrl, vaddr, length); ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ 5, /* r5 */ @@ -760,11 +763,11 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, { u64 ret; - if (unlikely(ehca_debug_level >= 2)) { + if (unlikely(ehca_debug_level >= 3)) { if (count > 1) { u64 *kpage; int i; - kpage = (u64 *)abs_to_virt(logical_address_of_page); + kpage = __va(logical_address_of_page); for (i = 0; i < count; i++) ehca_gen_dbg("kpage[%d]=%p", i, (void *)kpage[i]); @@ -775,9 +778,9 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { ehca_gen_err("logical_address_of_page not on a 4k boundary " - "adapter_handle=%lx mr=%p mr_handle=%lx " + "adapter_handle=%llx mr=%p mr_handle=%llx " "pagesize=%x queue_type=%x " - "logical_address_of_page=%lx count=%lx", + "logical_address_of_page=%llx count=%llx", adapter_handle.handle, mr, mr->ipz_mr_handle.handle, pagesize, queue_type, logical_address_of_page, count); @@ -795,7 +798,7 @@ u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_QUERY_MR, outs, adapter_handle.handle, /* r4 */ @@ -829,7 +832,7 @@ u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, adapter_handle.handle, /* r4 */ @@ -856,7 +859,7 @@ u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, adapter_handle.handle, /* r4 */ @@ -878,7 +881,7 @@ u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, struct ehca_mw_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ @@ -896,7 +899,7 @@ u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, struct ehca_mw_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_QUERY_MW, outs, adapter_handle.handle, /* r4 */ @@ -921,7 +924,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, void *rblock, unsigned long *byte_count) { - u64 r_cb = virt_to_abs(rblock); + u64 r_cb = __pa(rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("rblock not page aligned."); @@ -934,3 +937,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, r_cb, 0, 0, 0, 0); } + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 60ce02b7066..a46e514c367 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -49,7 +49,7 @@ #include "hipz_hw.h" /* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize * resources, create the empty EQPT (ring). */ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms); + struct ehca_alloc_qp_parms *parms, int is_user); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, @@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, const u64 ressource_handle, void *rblock, unsigned long *byte_count); +u64 hipz_h_eoi(int irq); #endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 214821095cb..077376ff3d2 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -42,10 +42,9 @@ #include "ehca_classes.h" #include "hipz_hw.h" -int hcall_map_page(u64 physaddr, u64 *mapaddr) +u64 hcall_map_page(u64 physaddr) { - *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); - return 0; + return (u64)ioremap(physaddr, EHCA_PAGESIZE); } int hcall_unmap_page(u64 mapaddr) @@ -54,12 +53,15 @@ int hcall_unmap_page(u64 mapaddr) return 0; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; galpas->user.fw_handle = paddr_user; diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h index 5305c2a3ed9..d1b02991024 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -78,12 +78,12 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) *(volatile u64 __force *)addr = value; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user); int hcp_galpas_dtor(struct h_galpas *galpas); -int hcall_map_page(u64 physaddr, u64 * mapaddr); +u64 hcall_map_page(u64 physaddr); int hcall_unmap_page(u64 mapaddr); diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index d9739e55451..bf996c7acc4 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -378,6 +378,7 @@ struct hipz_query_hca { #define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) #define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) #define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) /* query port response block */ struct hipz_query_port { @@ -402,7 +403,11 @@ struct hipz_query_port { u64 max_msg_sz; u32 max_mtu; u32 vl_cap; - u8 reserved2[1900]; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; u64 guid_entries[255]; } __attribute__ ((packed)); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 29bd476fbd5..8d594517cd2 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -38,6 +38,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ipz_pt_fn.h" #include "ehca_classes.h" @@ -79,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) { int i; for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = (u64)virt_to_abs(queue->queue_pages[i]); + u64 page = __pa(queue->queue_pages[i]); if (addr >= page && addr < page + queue->pagesize) { *q_offset = addr - page + i * queue->pagesize; return 0; @@ -158,10 +160,12 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); queue->small_page = page; + queue->offset = bit << (order + 9); return 1; out: ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); return 0; } @@ -218,12 +222,15 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), + GFP_KERNEL | __GFP_NOWARN); if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } } - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); /* allocate actual queue pages */ if (is_small) { @@ -238,7 +245,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 0; } @@ -260,7 +270,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 1; } |
