diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca')
24 files changed, 1026 insertions, 639 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 56735ea2fc5..465926319f3 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -41,6 +41,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" #include "hcp_if.h" diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index c825142a2fb..bd45e0f3923 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -112,7 +112,7 @@ struct ehca_sport { struct ehca_shca { struct ib_device ib_device; - struct of_device *ofdev; + struct platform_device *ofdev; u8 num_ports; int hw_level; struct list_head shca_list; @@ -322,7 +322,7 @@ struct ehca_mr_pginfo { } phy; struct { /* type EHCA_MR_PGI_USER section */ struct ib_umem *region; - struct ib_umem_chunk *next_chunk; + struct scatterlist *next_sg; u64 next_nmap; } usr; struct { /* type EHCA_MR_PGI_FMR section */ @@ -375,11 +375,12 @@ extern rwlock_t ehca_qp_idr_lock; extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; -extern int ehca_use_hp_mr; -extern int ehca_scaling_code; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; extern int ehca_max_cq; diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 1798e6466bd..689c35786dd 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) #define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) #define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) #define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) #define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) @@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) #define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) #define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) #define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) #define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) #define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) #define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 97e4b231cdc..8cc83753776 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -43,6 +43,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_iverbs.h" #include "ehca_classes.h" #include "ehca_irq.h" @@ -126,7 +128,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, void *vpage; u32 counter; u64 rpage, cqx_fec, h_ret; - int ipz_rc, ret, i; + int ipz_rc, i; unsigned long flags; if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) @@ -161,32 +163,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, adapter_handle = shca->ipz_hca_handle; param.eq_handle = shca->eq.ipz_eq_handle; - do { - if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't reserve idr nr. device=%p", - device); - goto create_cq_exit1; - } - - write_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - } while (ret == -EAGAIN); + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); - if (ret) { + if (my_cq->token < 0) { cq = ERR_PTR(-ENOMEM); ehca_err(device, "Can't allocate new idr entry. device=%p", device); goto create_cq_exit1; } - if (my_cq->token > 0x1FFFFFF) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Invalid number of cq. device=%p", device); - goto create_cq_exit2; - } - /* * CQs maximum depth is 4GB-64, but we need additional 20 as buffer * for receiving errors CQEs. @@ -218,7 +207,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, cq = ERR_PTR(-EAGAIN); goto create_cq_exit4; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_cq(adapter_handle, my_cq->ipz_cq_handle, @@ -294,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 523e733c630..90da6747d39 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -101,7 +101,7 @@ int ehca_create_eq(struct ehca_shca *shca, if (!vpage) goto create_eq_exit2; - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, eq->ipz_eq_handle, &eq->pf, @@ -122,21 +122,21 @@ int ehca_create_eq(struct ehca_shca *shca, /* register interrupt handlers and initialize work queues */ if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - IRQF_DISABLED, "ehca_eq", + 0, "ehca_eq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - IRQF_DISABLED, "ehca_neq", + 0, "ehca_neq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); } eq->is_initialized = 1; @@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) unsigned long flags; u64 h_ret; - spin_lock_irqsave(&eq->spinlock, flags); ibmebus_free_irq(eq->ist, (void *)shca); - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); - spin_unlock_irqrestore(&eq->spinlock, flags); + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't free EQ resources."); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 9209c5332df..9ed4d258830 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -39,6 +39,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/gfp.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" #include "hcp_if.h" @@ -231,7 +233,7 @@ int ehca_query_port(struct ib_device *ibdev, props->phys_state = 5; props->state = rblock->state; props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; + props->active_speed = IB_SPEED_SDR; } query_port1: @@ -319,7 +321,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, ib_device); struct hipz_query_port *rblock; - if (index > 255) { + if (index < 0 || index > 255) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 99bcbd7ffb0..8615d7cf7e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -41,6 +41,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> +#include <linux/smpboot.h> + #include "ehca_classes.h" #include "ehca_irq.h" #include "ehca_iverbs.h" @@ -479,13 +482,13 @@ void ehca_tasklet_neq(unsigned long data) struct ehca_eqe *eqe; u64 ret; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); while (eqe) { if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) parse_ec(shca, eqe->entry); - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); } ret = hipz_h_reset_event(shca->ipz_hca_handle, @@ -548,11 +551,10 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; u64 eqe_value, ret; - unsigned long flags; int eqe_cnt, i; int eq_empty = 0; - spin_lock_irqsave(&eq->irq_spinlock, flags); + spin_lock(&eq->irq_spinlock); if (is_irq) { const int max_query_cnt = 100; int query_cnt = 0; @@ -572,8 +574,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_cnt = 0; do { u32 token; - eqe_cache[eqe_cnt].eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, eq); + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); if (!eqe_cache[eqe_cnt].eqe) break; eqe_value = eqe_cache[eqe_cnt].eqe->entry; @@ -637,14 +638,14 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) goto unlock_irq_spinlock; do { struct ehca_eqe *eqe; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe = ehca_poll_eq(shca, &shca->eq); if (!eqe) break; process_eqe(shca, eqe); } while (1); unlock_irq_spinlock: - spin_unlock_irqrestore(&eq->irq_spinlock, flags); + spin_unlock(&eq->irq_spinlock); } void ehca_tasklet_eq(unsigned long data) @@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); + spin_unlock_irq(&cct->task_lock); comp_event_callback(cq); if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) +static void comp_task_park(unsigned int cpu) { - struct ehca_cpu_comp_task *cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - set_current_state(TASK_INTERRUPTIBLE); + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); } - __set_current_state(TASK_RUNNING); - - return 0; -} - -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); - - return cct->task; + spin_unlock_irq(&target->task_lock); } -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) +static void comp_task_stop(unsigned int cpu, bool online) { - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - task = cct->task; - cct->task = NULL; + spin_lock_irq(&cct->task_lock); cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) +static int comp_task_should_run(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - list_splice_init(&cct->cq_list, &list); - - while (!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - - list_del(&cq->entry); - __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, - smp_processor_id())); - } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + return cct->cq_jobs; } -static int __cpuinit comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if (!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return NOTIFY_BAD; - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpumask_any(cpu_online_mask)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); } - - return NOTIFY_OK; + spin_unlock_irq(&cct->task_lock); } -static struct notifier_block comp_pool_callback_nb __cpuinitdata = { - .notifier_call = comp_pool_callback, - .priority = 0, +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, }; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -905,38 +825,46 @@ int ehca_create_comp_pool(void) pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; - } + if (!pool->cpu_comp_tasks) + goto out_pool; - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } - register_hotcpu_notifier(&comp_pool_callback_nb); + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - printk(KERN_INFO "eHCA scaling code enabled\n"); + pr_info("eHCA scaling code enabled\n"); + return ret; - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; - unregister_hotcpu_notifier(&comp_pool_callback_nb); - - for_each_online_cpu(i) - destroy_comp_task(pool, i); + smpboot_unregister_percpu_thread(&comp_pool_threads); + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 3346cb06cea..5370199f08c 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { - wait_queue_head_t wait_queue; struct list_head cq_list; - struct task_struct *task; spinlock_t task_lock; int cq_jobs; + int active; }; struct ehca_comp_pool { - struct ehca_cpu_comp_task *cpu_comp_tasks; + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; int last_cpu; spinlock_t last_cpu_lock; }; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 8f7f282ead6..22f79afa7fc 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int ehca_dereg_mr(struct ib_mr *mr); -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 368311ce332..cd8d290a09f 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,23 +52,23 @@ #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0026" +#define HCAD_VERSION "0029" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); MODULE_VERSION(HCAD_VERSION); -static int ehca_open_aqp1 = 0; +static bool ehca_open_aqp1 = 0; static int ehca_hw_level = 0; -static int ehca_poll_all_eqs = 1; +static bool ehca_poll_all_eqs = 1; int ehca_debug_level = 0; -int ehca_nr_ports = 2; -int ehca_use_hp_mr = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_static_rate = -1; -int ehca_scaling_code = 0; +bool ehca_scaling_code = 0; int ehca_lock_hcalls = -1; int ehca_max_cq = -1; int ehca_max_qp = -1; @@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); @@ -95,8 +95,8 @@ MODULE_PARM_DESC(hw_level, "Hardware level (0: autosensing (default), " "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); MODULE_PARM_DESC(nr_ports, - "number of connected ports (-1: autodetect, 1: port one only, " - "2: two ports (default)"); + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); MODULE_PARM_DESC(use_hp_mr, "Use high performance MRs (default: no)"); MODULE_PARM_DESC(port_act_time, @@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); static LIST_HEAD(shca_list); /* list of all registered ehcas */ -static DEFINE_SPINLOCK(shca_list_lock); +DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; @@ -211,6 +211,7 @@ static int ehca_create_slab_caches(void) if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; goto create_slab_caches6; } #endif @@ -291,8 +292,9 @@ static int ehca_sense_attributes(struct ehca_shca *shca) }; ehca_gen_dbg("Probing adapter %s...", - shca->ofdev->node->full_name); - loc_code = of_get_property(shca->ofdev->node, "ibm,loc-code", NULL); + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); if (loc_code) ehca_gen_dbg(" ... location lode=%s", loc_code); @@ -359,7 +361,8 @@ static int ehca_sense_attributes(struct ehca_shca *shca) * a firmware property, so it's valid across all adapters */ if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC); + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); /* translate supported MR page sizes; always support 4K */ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; @@ -506,6 +509,7 @@ static int ehca_init_device(struct ehca_shca *shca) shca->ib_device.detach_mcast = ehca_detach_mcast; shca->ib_device.process_mad = ehca_process_mad; shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { shca->ib_device.uverbs_cmd_mask |= @@ -622,7 +626,7 @@ static struct attribute_group ehca_drv_attr_grp = { .attrs = ehca_drv_attrs }; -static struct attribute_group *ehca_drv_attr_groups[] = { +static const struct attribute_group *ehca_drv_attr_groups[] = { &ehca_drv_attr_grp, NULL, }; @@ -636,7 +640,7 @@ static ssize_t ehca_show_##name(struct device *dev, \ struct hipz_query_hca *rblock; \ int data; \ \ - shca = dev->driver_data; \ + shca = dev_get_drvdata(dev); \ \ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ if (!rblock) { \ @@ -680,7 +684,7 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, struct device_attribute *attr, char *buf) { - struct ehca_shca *shca = dev->driver_data; + struct ehca_shca *shca = dev_get_drvdata(dev); return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); @@ -710,8 +714,7 @@ static struct attribute_group ehca_dev_attr_grp = { .attrs = ehca_dev_attrs }; -static int __devinit ehca_probe(struct of_device *dev, - const struct of_device_id *id) +static int ehca_probe(struct platform_device *dev) { struct ehca_shca *shca; const u64 *handle; @@ -719,16 +722,16 @@ static int __devinit ehca_probe(struct of_device *dev, int ret, i, eq_size; unsigned long flags; - handle = of_get_property(dev->node, "ibm,hca-handle", NULL); + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); if (!handle) { ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->node->full_name); + dev->dev.of_node->full_name); return -ENODEV; } if (!(*handle)) { ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->node->full_name); + dev->dev.of_node->full_name); return -ENODEV; } @@ -749,7 +752,7 @@ static int __devinit ehca_probe(struct of_device *dev, shca->ofdev = dev; shca->ipz_hca_handle.handle = *handle; - dev->dev.driver_data = shca; + dev_set_drvdata(&dev->dev, shca); ret = ehca_sense_attributes(shca); if (ret < 0) { @@ -797,7 +800,7 @@ static int __devinit ehca_probe(struct of_device *dev, goto probe5; } - ret = ib_register_device(&shca->ib_device); + ret = ib_register_device(&shca->ib_device, NULL); if (ret) { ehca_err(&shca->ib_device, "ib_register_device() failed ret=%i", ret); @@ -876,9 +879,9 @@ probe1: return -EINVAL; } -static int __devexit ehca_remove(struct of_device *dev) +static int ehca_remove(struct platform_device *dev) { - struct ehca_shca *shca = dev->dev.driver_data; + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); unsigned long flags; int ret; @@ -934,13 +937,14 @@ static struct of_device_id ehca_device_table[] = }; MODULE_DEVICE_TABLE(of, ehca_device_table); -static struct of_platform_driver ehca_driver = { - .name = "ehca", - .match_table = ehca_device_table, +static struct platform_driver ehca_driver = { .probe = ehca_probe, .remove = ehca_remove, - .driver = { + .driver = { + .name = "ehca", + .owner = THIS_MODULE, .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, }, }; @@ -1028,17 +1032,23 @@ static int __init ehca_module_init(void) goto module_init1; } + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + ret = ibmebus_register_driver(&ehca_driver); if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; - goto module_init2; + goto module_init3; } ret = register_memory_notifier(&ehca_mem_nb); if (ret) { ehca_gen_err("Failed registering memory add/remove notifier"); - goto module_init3; + goto module_init4; } if (ehca_poll_all_eqs != 1) { @@ -1053,9 +1063,12 @@ static int __init ehca_module_init(void) return 0; -module_init3: +module_init4: ibmebus_unregister_driver(&ehca_driver); +module_init3: + ehca_destroy_busmap(); + module_init2: ehca_destroy_slab_caches(); @@ -1073,6 +1086,8 @@ static void __exit ehca_module_exit(void) unregister_memory_notifier(&ehca_mem_nb); + ehca_destroy_busmap(); + ehca_destroy_slab_caches(); ehca_destroy_comp_pool(); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 72f83f7df61..3488e8c9fcb 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -40,6 +40,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> #include <rdma/ib_umem.h> #include "ehca_iverbs.h" @@ -53,6 +54,38 @@ /* max number of rpages (per hcall register_rpages) */ #define MAX_RPAGES 512 +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; @@ -68,6 +101,8 @@ enum ehca_mr_pgsize { #define EHCA_MR_PGSHIFT1M 20 #define EHCA_MR_PGSHIFT16M 24 +static u64 ehca_map_vaddr(void *caddr); + static u32 ehca_encode_hwpage_size(u32 pgsize) { int log = ilog2(pgsize); @@ -77,7 +112,7 @@ static u32 ehca_encode_hwpage_size(u32 pgsize) static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) { - return 1UL << ilog2(shca->hca_cap_mr_pgsize); + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); } static struct ehca_mr *ehca_mr_new(void) @@ -135,7 +170,8 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); @@ -251,7 +287,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); if (ret) { ib_mr = ERR_PTR(ret); goto reg_phys_mr_exit1; @@ -364,13 +400,10 @@ reg_user_mr_fallback: pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; - pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, - (&e_mr->umem->chunk_list), - list); - + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { ehca_warn(pd->device, "failed to register mr " "with hwpage_size=%llx", hwpage_size); @@ -652,7 +685,7 @@ dereg_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *ib_mw; u64 h_ret; @@ -662,6 +695,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) container_of(pd->device, struct ehca_shca, ib_device); struct ehca_mw_hipzout_parms hipzout; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + e_mw = ehca_mw_new(); if (!e_mw) { ib_mw = ERR_PTR(-ENOMEM); @@ -794,7 +830,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey); + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); if (ret) { ib_fmr = ERR_PTR(ret); goto alloc_fmr_exit1; @@ -897,11 +933,6 @@ int ehca_unmap_fmr(struct list_head *fmr_list) /* check all FMR belong to same SHCA, and check internal flag */ list_for_each_entry(ib_fmr, fmr_list, list) { prev_shca = shca; - if (!ib_fmr) { - ehca_gen_err("bad fmr=%p in list", ib_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } shca = container_of(ib_fmr->device, struct ehca_shca, ib_device); e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); @@ -983,6 +1014,10 @@ free_fmr_exit0: /*----------------------------------------------------------------------*/ +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr *e_mr, u64 *iova_start, @@ -991,7 +1026,8 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_pd *e_pd, struct ehca_mr_pginfo *pginfo, u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) { int ret; u64 h_ret; @@ -1015,7 +1051,13 @@ int ehca_reg_mr(struct ehca_shca *shca, e_mr->ipz_mr_handle = hipzout.handle; - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + if (ret) goto ehca_reg_mr_exit1; @@ -1094,7 +1136,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } if (rnum > 1) { - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", kpage, i); @@ -1189,7 +1231,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p", kpage); ret = -EFAULT; @@ -1316,7 +1358,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey); + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); if (ret) { u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; memcpy(&e_mr->flags, &(save_mr.flags), @@ -1409,7 +1451,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, ret = ehca_reg_mr(shca, e_fmr, NULL, (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); + &tmp_rkey, EHCA_REG_MR); if (ret) { u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; memcpy(&e_fmr->flags, &(save_mr.flags), @@ -1478,6 +1520,90 @@ ehca_reg_smr_exit0: } /* end ehca_reg_smr() */ /*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = __pa(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = __pa(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} /* register internal max-MR to internal SHCA */ int ehca_reg_internal_maxmr( @@ -1495,6 +1621,11 @@ int ehca_reg_internal_maxmr( u32 num_hwpages; u64 hw_pgsize; + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr = ehca_mr_new(); if (!e_mr) { ehca_err(&shca->ib_device, "out of memory"); @@ -1504,8 +1635,8 @@ int ehca_reg_internal_maxmr( e_mr->flags |= EHCA_MR_FLAG_MAXMR; /* register internal max-MR on HCA */ - size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64 *)KERNELBASE; + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, @@ -1524,7 +1655,7 @@ int ehca_reg_internal_maxmr( ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " @@ -1724,62 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr; - u32 i = 0; u32 j = 0; int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ; - *kpage = phys_to_abs(pgaddr + - (pginfo->next_hwpage * - pginfo->hwpage_size)); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx " - "chunk->page_list[i]=%llx " - "i=%x next_hwpage=%llx", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[i]), - i, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - i++; - } - j++; - if (j >= number) break; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1787,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, * check given pages for contiguous layout * last page addr is returned in prev_pgaddr for further check */ -static int ehca_check_kpages_per_ate(struct scatterlist *page_list, - int start_idx, int end_idx, +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, u64 *prev_pgaddr) { - int t; - for (t = start_idx; t <= end_idx; t++) { - u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; if (ehca_debug_level >= 3) ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx page_list_i=%x", - pgaddr, *prev_pgaddr, t); + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); return -EINVAL; } *prev_pgaddr = pgaddr; @@ -1814,113 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr, prev_pgaddr; - u32 i = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - if (nr_kpages == kpages_per_hwpage) { - pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx i=%x", - pgaddr, i); + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); ret = -EFAULT; return ret; } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx i=%x " - "mr_pgsize=%llx", - pgaddr, i, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = phys_to_abs( - pgaddr & - ~(pginfo->hwpage_size - 1)); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)abs_to_virt( - phys_to_abs(pgaddr)); - ehca_gen_dbg("kpage=%llx chunk_page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - i++; - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } - if (i + nr_kpages > chunk->nmap) { - ret = ehca_check_kpages_per_ate( - chunk->page_list, i, - chunk->nmap - 1, &prev_pgaddr); - if (ret) return ret; - pginfo->kpage_cnt += chunk->nmap - i; - pginfo->u.usr.next_nmap += chunk->nmap - i; - nr_kpages -= chunk->nmap - i; - break; + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; - ret = ehca_check_kpages_per_ate(chunk->page_list, i, - i + nr_kpages - 1, - &prev_pgaddr); - if (ret) return ret; - i += nr_kpages; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) break; - } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1953,9 +2027,8 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, pginfo->num_hwpages, i); return -EFAULT; } - *kpage = phys_to_abs( - (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size)); + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); if ( !(*kpage) && pbuf->addr ) { ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " "next_hwpage=%llx", pbuf->addr, @@ -1993,8 +2066,8 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, /* loop over desired page_list entries */ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size); + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%llx fmrlist=%p " "next_listelem=%llx next_hwpage=%llx", @@ -2021,8 +2094,7 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, u64 prev = *kpage; /* check if adrs are contiguous */ for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = phys_to_abs(fmrlist[j] & - ~(pginfo->hwpage_size - 1)); + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); if (prev + pginfo->u.fmr.fmr_pgsize != p) { ehca_gen_err("uncontiguous fmr pages " "found prev=%llx p=%llx " @@ -2077,8 +2149,8 @@ int ehca_mr_is_maxmr(u64 size, u64 *iova_start) { /* a MR is treated as max-MR only if it fits following: */ - if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void *)KERNELBASE)) { + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2184,3 +2256,338 @@ void ehca_cleanup_mrmw_cache(void) if (mw_cache) kmem_cache_destroy(mw_cache); } + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = __pa(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index bc8f4e31c12..50d8b51306d 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -42,6 +42,11 @@ #ifndef _EHCA_MRMW_H_ #define _EHCA_MRMW_H_ +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr *e_mr, u64 *iova_start, @@ -50,7 +55,8 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_pd *e_pd, struct ehca_mr_pginfo *pginfo, u32 *lkey, - u32 *rkey); + u32 *rkey, + enum ehca_reg_type reg_type); int ehca_reg_mr_rpages(struct ehca_shca *shca, struct ehca_mr *e_mr, @@ -118,4 +124,9 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, void ehca_mr_deletenew(struct ehca_mr *mr); +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 2fe554855fa..351577a6670 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -38,6 +38,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 5d28e3e98a2..90c4efa6758 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -46,7 +46,7 @@ #include "ehca_tools.h" -/* virtual scatter gather entry to specify remote adresses with length */ +/* virtual scatter gather entry to specify remote addresses with length */ struct ehca_vsgentry { u64 vaddr; u32 lkey; @@ -148,7 +148,7 @@ struct ehca_wqe { u32 immediate_data; union { struct { - u64 remote_virtual_adress; + u64 remote_virtual_address; u32 rkey; u32 reserved; u64 atomic_1st_op_dma_len; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 00c10815971..2e89356c46f 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -43,6 +43,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -55,9 +57,7 @@ static struct kmem_cache *qp_cache; /* * attributes not supported by query qp */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ - IB_QP_MAX_QP_RD_ATOMIC | \ - IB_QP_ACCESS_FLAGS | \ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ IB_QP_EN_SQD_ASYNC_NOTIFY) /* @@ -251,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) return ST_UD; case IB_QPT_RAW_IPV6: return -EINVAL; - case IB_QPT_RAW_ETY: + case IB_QPT_RAW_ETHERTYPE: return -EINVAL; default: ehca_gen_err("Invalid ibqptype=%x", ibqptype); @@ -321,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, ret = -EINVAL; goto init_qp_queue1; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, @@ -461,7 +461,7 @@ static struct ehca_qp *internal_create_qp( ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int is_llqp = 0, has_srq = 0; + int is_llqp = 0, has_srq = 0, is_user = 0; int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ @@ -609,9 +609,6 @@ static struct ehca_qp *internal_create_qp( } } - if (pd->uobject && udata) - context = pd->uobject->context; - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); @@ -619,6 +616,11 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + atomic_set(&my_qp->nr_events, 0); init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); @@ -634,30 +636,26 @@ static struct ehca_qp *internal_create_qp( my_qp->send_cq = container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - do { - if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't reserve idr resources."); - goto create_qp_exit0; - } + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - } while (ret == -EAGAIN); + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; - if (ret) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } goto create_qp_exit0; } - if (my_qp->token > 0x1FFFFFF) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - goto create_qp_exit1; - } - if (has_srq) parms.srq_token = my_qp->token; @@ -707,7 +705,7 @@ static struct ehca_qp *internal_create_qp( (parms.squeue.is_small || parms.rqueue.is_small); } - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", h_ret); @@ -769,18 +767,20 @@ static struct ehca_qp *internal_create_qp( goto create_qp_exit2; } - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -792,20 +792,21 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit4; } - - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } else if (init_attr->srq) { + } else if (init_attr->srq && !is_user) { /* this is a base QP, use the queue map of the SRQ */ my_qp->rq_map = my_srq->rq_map; INIT_LIST_HEAD(&my_qp->rq_err_node); @@ -918,7 +919,7 @@ create_qp_exit7: kfree(my_qp->mod_qp_parm); create_qp_exit6: - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) vfree(my_qp->rq_map.map); create_qp_exit5: @@ -926,7 +927,7 @@ create_qp_exit5: ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit4: - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) vfree(my_qp->sq_map.map); create_qp_exit3: @@ -972,6 +973,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, struct hcp_modify_qp_control_block *mqpcb; u64 hret, update_mask; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + /* For common attributes, internal_create_qp() takes its info * out of qp_init_attr, so copy all common attrs there. */ @@ -1086,7 +1090,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ - bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + bad_send_wqe_v = __va((u64)bad_send_wqe_p); if (ehca_debug_level >= 2) ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); squeue = &my_qp->ipz_squeue; @@ -1130,7 +1134,7 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, /* convert real to abs address */ wqe_p = wqe_p & (~(1UL << 63)); - wqe_v = abs_to_virt(wqe_p); + wqe_v = __va(wqe_p); if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { ehca_gen_err("Invalid offset for calculating left cqes " @@ -1244,6 +1248,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 update_mask; u64 h_ret; int bad_wqe_cnt = 0; + int is_user = 0; int squeue_locked = 0; unsigned long flags = 0; @@ -1266,6 +1271,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, ret = ehca2ib_return_code(h_ret); goto modify_qp_exit1; } + if (ibqp->uobject) + is_user = 1; qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); @@ -1322,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; if (!smi_reset2init && !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask)) { + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { ret = -EINVAL; ehca_err(ibqp->device, "Invalid qp transition new_state=%x cur_state=%x " @@ -1728,7 +1735,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { ret = check_for_left_cqes(my_qp, shca); if (ret) goto modify_qp_exit2; @@ -1738,16 +1746,17 @@ static int internal_modify_qp(struct ib_qp *ibqp, ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); - if (qp_cur_state == IB_QPS_ERR) { + if (qp_cur_state == IB_QPS_ERR && !is_user) { del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); if (HAS_RQ(my_qp)) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); } - reset_queue_map(&my_qp->sq_map); + if (!is_user) + reset_queue_map(&my_qp->sq_map); - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) reset_queue_map(&my_qp->rq_map); } @@ -1952,19 +1961,13 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; qp_attr->dest_qp_num = qpcb->dest_qp_nr; - qp_attr->pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); - - qp_attr->port_num = - EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); - + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; qp_attr->timeout = qpcb->timeout; qp_attr->retry_cnt = qpcb->retry_count; qp_attr->rnr_retry = qpcb->rnr_retry_count; - qp_attr->alt_pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); - + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; @@ -2051,8 +2054,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = - EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->curr_srq_limit = attr->srq_limit; mqpcb->qp_aff_asyn_ev_log_reg = EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); } @@ -2115,8 +2117,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; srq_attr->max_sge = 3; - srq_attr->srq_limit = EHCA_BMASK_GET( - MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + srq_attr->srq_limit = qpcb->curr_srq_limit; if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); @@ -2138,10 +2139,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, int ret; u64 h_ret; u8 port_num; + int is_user = 0; enum ib_qp_type qp_type; unsigned long flags; if (uobject) { + is_user = 1; if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { ehca_err(dev, "Resources still referenced in " @@ -2168,10 +2171,10 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, * SRQs will never get into an error list and do not have a recv_cq, * so we need to skip them here. */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); /* now wait until all pending events have completed */ @@ -2209,13 +2212,13 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - - vfree(my_qp->rq_map.map); + if (!is_user) + vfree(my_qp->rq_map.map); } if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - - vfree(my_qp->sq_map.map); + if (!is_user) + vfree(my_qp->sq_map.map); } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 5a3d96f84c7..47f94984353 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -42,7 +42,6 @@ */ -#include <asm/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -136,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *)abs_to_virt(sge->addr); + u8 *data = __va(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -269,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* no break is intentional here */ case IB_QPT_RC: /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_adress = + wqe_p->u.nud.remote_virtual_address = send_wr->wr.rdma.remote_addr; wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; @@ -400,7 +399,6 @@ static inline void map_ib_wc_status(u32 cqe_status, static inline int post_one_send(struct ehca_qp *my_qp, struct ib_send_wr *cur_send_wr, - struct ib_send_wr **bad_send_wr, int hidden) { struct ehca_wqe *wqe_p; @@ -412,8 +410,6 @@ static inline int post_one_send(struct ehca_qp *my_qp, wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_send_wr) - *bad_send_wr = cur_send_wr; ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " "qp_num=%x", my_qp->ib_qp.qp_num); return -ENOMEM; @@ -433,8 +429,6 @@ static inline int post_one_send(struct ehca_qp *my_qp, */ if (unlikely(ret)) { my_qp->ipz_squeue.current_q_offset = start_offset; - if (bad_send_wr) - *bad_send_wr = cur_send_wr; ehca_err(my_qp->ib_qp.device, "Could not write WQE " "qp_num=%x", my_qp->ib_qp.qp_num); return -EINVAL; @@ -448,7 +442,6 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr **bad_send_wr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ib_send_wr *cur_send_wr; int wqe_cnt = 0; int ret = 0; unsigned long flags; @@ -457,7 +450,8 @@ int ehca_post_send(struct ib_qp *qp, if (unlikely(my_qp->state < IB_QPS_RTS)) { ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", my_qp->state, qp->qp_num); - return -EINVAL; + ret = -EINVAL; + goto out; } /* LOCK the QUEUE */ @@ -476,24 +470,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr circ_wr; memset(&circ_wr, 0, sizeof(circ_wr)); circ_wr.opcode = IB_WR_RDMA_READ; - post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */ + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ wqe_cnt++; ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); my_qp->message_count = my_qp->packet_count = 0; } /* loop processes list of send reqs */ - for (cur_send_wr = send_wr; cur_send_wr != NULL; - cur_send_wr = cur_send_wr->next) { - ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0); + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); if (unlikely(ret)) { - /* if one or more WQEs were successful, don't fail */ - if (wqe_cnt) - ret = 0; goto post_send_exit0; } wqe_cnt++; - } /* eof for cur_send_wr */ + send_wr = send_wr->next; + } post_send_exit0: iosync(); /* serialize GAL register access */ @@ -503,6 +494,10 @@ post_send_exit0: my_qp, qp->qp_num, wqe_cnt, ret); my_qp->message_count += wqe_cnt; spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; return ret; } @@ -511,7 +506,6 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; @@ -522,27 +516,23 @@ static int internal_post_recv(struct ehca_qp *my_qp, if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", my_qp, my_qp->real_qp_num, my_qp->ext_type); - return -ENODEV; + ret = -ENODEV; + goto out; } /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_r, flags); - /* loop processes list of send reqs */ - for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; - cur_recv_wr = cur_recv_wr->next) { + /* loop processes list of recv reqs */ + while (recv_wr) { u64 start_offset = my_qp->ipz_rqueue.current_q_offset; /* get pointer next to free WQE */ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_recv_wr) - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - } + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } /* @@ -552,7 +542,7 @@ static int internal_post_recv(struct ehca_qp *my_qp, rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, rq_map_idx); /* * if something failed, @@ -560,22 +550,20 @@ static int internal_post_recv(struct ehca_qp *my_qp, */ if (unlikely(ret)) { my_qp->ipz_rqueue.current_q_offset = start_offset; - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - } + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } qmap_entry = &my_qp->rq_map.map[rq_map_idx]; - qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); qmap_entry->reported = 0; qmap_entry->cqe_req = 1; wqe_cnt++; - } /* eof for cur_recv_wr */ + recv_wr = recv_wr->next; + } /* eof for recv_wr */ post_recv_exit0: iosync(); /* serialize GAL register access */ @@ -584,6 +572,11 @@ post_recv_exit0: ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", my_qp, my_qp->real_qp_num, wqe_cnt, ret); spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + return ret; } @@ -597,6 +590,7 @@ int ehca_post_recv(struct ib_qp *qp, if (unlikely(my_qp->state == IB_QPS_RESET)) { ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; return -EINVAL; } @@ -786,7 +780,11 @@ repoll: wc->slid = cqe->rlid; wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; - wc->wc_flags = cqe->w_completion_flags; + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index c568b28f4e2..dba8f9f8b99 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -125,14 +125,30 @@ struct ib_perf { u8 data[192]; } __attribute__ ((packed)); +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_perf *in_perf = (struct ib_perf *)in_mad; struct ib_perf *out_perf = (struct ib_perf *)out_mad; struct ib_class_port_info *poi = (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct ehca_sport *sport = &shca->sport[port_num - 1]; @@ -158,10 +174,29 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, poi->base_version = 1; poi->class_version = 1; poi->resp_time_value = 18; - poi->redirect_lid = sport->saved_attr.lid; - poi->redirect_qp = sport->pma_qp_nr; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; poi->redirect_qkey = IB_QP1_QKEY; - poi->redirect_pkey = IB_DEFAULT_PKEY_FULL; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + struct vertcfl *vertcfl = + (struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", sport->saved_attr.lid, sport->pma_qp_nr); @@ -183,12 +218,11 @@ perf_reply: int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad) + struct ib_mad *in_mad, struct ib_mad *out_mad) { int ret; - if (!port_num || port_num > ibdev->phys_port_cnt) + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; /* accept only pma request */ @@ -196,7 +230,8 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); - ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index f09914cccf5..d280b12aae6 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -58,8 +58,7 @@ #include <linux/cpu.h> #include <linux/device.h> -#include <asm/atomic.h> -#include <asm/abs_addr.h> +#include <linux/atomic.h> #include <asm/ibmebus.h> #include <asm/io.h> #include <asm/pgtable.h> diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 3cb688d2913..1a1d5d99fcf 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -40,6 +40,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" @@ -95,7 +97,7 @@ static void ehca_mm_close(struct vm_area_struct *vma) vma->vm_start, vma->vm_end, *count); } -static struct vm_operations_struct vm_ops = { +static const struct vm_operations_struct vm_ops = { .open = ehca_mm_open, .close = ehca_mm_close, }; @@ -115,7 +117,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, physical = galpas->user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, vma->vm_page_prot); if (unlikely(ret)) { @@ -137,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, u64 start, ofs; struct page *page; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; start = vma->vm_start; for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index d0ab0c0d5e9..89517ffb438 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -90,26 +90,6 @@ static DEFINE_SPINLOCK(hcall_lock); -static u32 get_longbusy_msecs(int longbusy_rc) -{ - switch (longbusy_rc) { - case H_LONG_BUSY_ORDER_1_MSEC: - return 1; - case H_LONG_BUSY_ORDER_10_MSEC: - return 10; - case H_LONG_BUSY_ORDER_100_MSEC: - return 100; - case H_LONG_BUSY_ORDER_1_SEC: - return 1000; - case H_LONG_BUSY_ORDER_10_SEC: - return 10000; - case H_LONG_BUSY_ORDER_100_SEC: - return 100000; - default: - return 1; - } -} - static long ehca_plpar_hcall_norets(unsigned long opcode, unsigned long arg1, unsigned long arg2, @@ -269,6 +249,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, struct ehca_cq *cq, struct ehca_alloc_cq_parms *param) { + int rc; u64 ret; unsigned long outs[PLPAR_HCALL9_BUFSIZE]; @@ -283,8 +264,19 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, param->act_nr_of_entries = (u32)outs[3]; param->act_pages = (u32)outs[4]; - if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); @@ -293,8 +285,9 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms) + struct ehca_alloc_qp_parms *parms, int is_user) { + int rc; u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; unsigned long outs[PLPAR_HCALL9_BUFSIZE]; @@ -358,8 +351,19 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, parms->rqueue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); @@ -372,7 +376,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, struct hipz_query_port *query_port_response_block) { u64 ret; - u64 r_cb = virt_to_abs(query_port_response_block); + u64 r_cb = __pa(query_port_response_block); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response block not page aligned"); @@ -414,7 +418,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock) { - u64 r_cb = virt_to_abs(query_hca_rblock); + u64 r_cb = __pa(query_hca_rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response_block=%p not page aligned", @@ -553,7 +557,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ update_mask, /* r6 */ - virt_to_abs(mqpcb), /* r7 */ + __pa(mqpcb), /* r7 */ 0, 0, 0, 0, 0); if (ret == H_NOT_ENOUGH_RESOURCES) @@ -571,7 +575,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, return ehca_plpar_hcall_norets(H_QUERY_QP, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ - virt_to_abs(qqpcb), /* r6 */ + __pa(qqpcb), /* r6 */ 0, 0, 0, 0); } @@ -763,7 +767,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, if (count > 1) { u64 *kpage; int i; - kpage = (u64 *)abs_to_virt(logical_address_of_page); + kpage = __va(logical_address_of_page); for (i = 0; i < count; i++) ehca_gen_dbg("kpage[%d]=%p", i, (void *)kpage[i]); @@ -920,7 +924,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, void *rblock, unsigned long *byte_count) { - u64 r_cb = virt_to_abs(rblock); + u64 r_cb = __pa(rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("rblock not page aligned."); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2c3c6e0ea5c..a46e514c367 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -49,7 +49,7 @@ #include "hipz_hw.h" /* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize * resources, create the empty EQPT (ring). */ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms); + struct ehca_alloc_qp_parms *parms, int is_user); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 214821095cb..077376ff3d2 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -42,10 +42,9 @@ #include "ehca_classes.h" #include "hipz_hw.h" -int hcall_map_page(u64 physaddr, u64 *mapaddr) +u64 hcall_map_page(u64 physaddr) { - *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); - return 0; + return (u64)ioremap(physaddr, EHCA_PAGESIZE); } int hcall_unmap_page(u64 mapaddr) @@ -54,12 +53,15 @@ int hcall_unmap_page(u64 mapaddr) return 0; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; galpas->user.fw_handle = paddr_user; diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h index 5305c2a3ed9..d1b02991024 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -78,12 +78,12 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) *(volatile u64 __force *)addr = value; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user); int hcp_galpas_dtor(struct h_galpas *galpas); -int hcall_map_page(u64 physaddr, u64 * mapaddr); +u64 hcall_map_page(u64 physaddr); int hcall_unmap_page(u64 mapaddr); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index c3a32846543..8d594517cd2 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -38,6 +38,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ipz_pt_fn.h" #include "ehca_classes.h" @@ -79,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) { int i; for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = (u64)virt_to_abs(queue->queue_pages[i]); + u64 page = __pa(queue->queue_pages[i]); if (addr >= page && addr < page + queue->pagesize) { *q_offset = addr - page + i * queue->pagesize; return 0; @@ -220,12 +222,15 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), + GFP_KERNEL | __GFP_NOWARN); if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } } - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); /* allocate actual queue pages */ if (is_small) { @@ -240,7 +245,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 0; } @@ -262,7 +270,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 1; } |
