diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca/ehca_irq.c')
| -rw-r--r-- | drivers/infiniband/hw/ehca/ehca_irq.c | 598 |
1 files changed, 314 insertions, 284 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index f284be1c916..8615d7cf7e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -5,6 +5,8 @@ * * Authors: Heiko J Schick <schickhj@de.ibm.com> * Khadija Souissi <souissi@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * @@ -39,6 +41,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> +#include <linux/smpboot.h> + #include "ehca_classes.h" #include "ehca_irq.h" #include "ehca_iverbs.h" @@ -47,28 +52,27 @@ #include "hipz_fns.h" #include "ipz_pt_fn.h" -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) static void queue_comp_task(struct ehca_cq *__cq); -static struct ehca_comp_pool* pool; -#ifdef CONFIG_HOTPLUG_CPU -static struct notifier_block comp_pool_callback_nb; -#endif +static struct ehca_comp_pool *pool; static inline void comp_event_callback(struct ehca_cq *cq) { @@ -82,8 +86,8 @@ static inline void comp_event_callback(struct ehca_cq *cq) return; } -static void print_error_data(struct ehca_shca * shca, void* data, - u64* rblock, int length) +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) { u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); u64 resource = rblock[1]; @@ -91,37 +95,37 @@ static void print_error_data(struct ehca_shca * shca, void* data, switch (type) { case 0x1: /* Queue Pair */ { - struct ehca_qp *qp = (struct ehca_qp*)data; + struct ehca_qp *qp = (struct ehca_qp *)data; /* only print error data if AER is set */ if (rblock[6] == 0) return; ehca_err(&shca->ib_device, - "QP 0x%x (resource=%lx) has errors.", + "QP 0x%x (resource=%llx) has errors.", qp->ib_qp.qp_num, resource); break; } case 0x4: /* Completion Queue */ { - struct ehca_cq *cq = (struct ehca_cq*)data; + struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%lx) has errors.", + "CQ 0x%x (resource=%llx) has errors.", cq->cq_number, resource); break; } default: ehca_err(&shca->ib_device, - "Unknown errror type: %lx on %s.", + "Unknown error type: %llx on %s.", type, shca->ib_device.name); break; } - ehca_err(&shca->ib_device, "Error data is available: %lx.", resource); + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); ehca_err(&shca->ib_device, "EHCA ----- error data begin " "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%lx", resource); + ehca_dmp(rblock, length, "resource=%llx", resource); ehca_err(&shca->ib_device, "EHCA ----- error data end " "----------------------------------------------------"); @@ -151,7 +155,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, if (ret == H_R_STATE) ehca_err(&shca->ib_device, - "No error data is available: %lx.", resource); + "No error data is available: %llx.", resource); else if (ret == H_SUCCESS) { int length; @@ -163,7 +167,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, print_error_data(shca, data, rblock, length); } else ehca_err(&shca->ib_device, - "Error data could not be fetched: %lx", resource); + "Error data could not be fetched: %llx", resource); ehca_free_fw_ctrlblock(rblock); @@ -172,34 +176,64 @@ error_data1: } -static void qp_event_callback(struct ehca_shca *shca, - u64 eqe, +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, enum ib_event_type event_type) { struct ib_event event; + + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ struct ehca_qp *qp; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); - + if (qp) + atomic_inc(&qp->nr_events); + read_unlock(&ehca_qp_idr_lock); if (!qp) return; - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - if (!qp->ib_qp.event_handler) - return; - - event.device = &shca->ib_device; - event.event = event_type; - event.element.qp = &qp->ib_qp; + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); return; } @@ -207,18 +241,22 @@ static void cq_event_callback(struct ehca_shca *shca, u64 eqe) { struct ehca_cq *cq; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (!cq) return; ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + return; } @@ -228,17 +266,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) switch (identifier) { case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); break; case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); break; case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); break; case 0x05: /* QP error */ case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); break; case 0x07: /* CQ error */ case 0x08: /* CQ error */ @@ -265,13 +303,18 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) case 0x11: /* unaffiliated access error */ ehca_err(&shca->ib_device, "Unaffiliated access error."); break; - case 0x12: /* path migrating error */ - ehca_err(&shca->ib_device, "Path migration error."); + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); break; case 0x13: /* interface trace stopped */ ehca_err(&shca->ib_device, "Interface trace stopped."); break; case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; default: ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", identifier, shca->ib_device.name); @@ -281,30 +324,90 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) return; } -static void parse_ec(struct ehca_shca *shca, u64 eqe) +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) { struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, &sport->saved_attr); } else { - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); } break; case 0x31: @@ -312,24 +415,21 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) * disruptive change is caused by * LID, PKEY or SM change */ - ehca_warn(&shca->ib_device, - "disruptive port %x configuration change", port); - - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); + } else + notify_port_conf_change(shca, port); break; case 0x32: /* adapter malfunction */ ehca_err(&shca->ib_device, "Adapter malfunction."); @@ -337,6 +437,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) case 0x33: /* trace stopped */ ehca_err(&shca->ib_device, "Traced stopped."); break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; default: ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", ec, shca->ib_device.name); @@ -372,13 +482,13 @@ void ehca_tasklet_neq(unsigned long data) struct ehca_eqe *eqe; u64 ret; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); while (eqe) { if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) parse_ec(shca, eqe->entry); - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); } ret = hipz_h_reset_event(shca->ipz_hca_handle, @@ -404,35 +514,31 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) { u64 eqe_value; u32 token; - unsigned long flags; struct ehca_cq *cq; eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { ehca_dbg(&shca->ib_device, "Got completion event"); token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (cq == NULL) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq token=%x", token); return; } reset_eq_pending(cq); - cq->nr_events++; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); if (ehca_scaling_code) queue_comp_task(cq); else { comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -444,12 +550,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) { struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value; - unsigned long flags; + u64 eqe_value, ret; int eqe_cnt, i; int eq_empty = 0; - spin_lock_irqsave(&eq->irq_spinlock, flags); + spin_lock(&eq->irq_spinlock); if (is_irq) { const int max_query_cnt = 100; int query_cnt = 0; @@ -469,24 +574,23 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_cnt = 0; do { u32 token; - eqe_cache[eqe_cnt].eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, eq); + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); if (!eqe_cache[eqe_cnt].eqe) break; eqe_value = eqe_cache[eqe_cnt].eqe->entry; if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock(&ehca_cq_idr_lock); + read_lock(&ehca_cq_idr_lock); eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (!eqe_cache[eqe_cnt].cq) { - spin_unlock(&ehca_cq_idr_lock); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq " "token=%x", token); continue; } - eqe_cache[eqe_cnt].cq->nr_events++; - spin_unlock(&ehca_cq_idr_lock); } else eqe_cache[eqe_cnt].cq = NULL; eqe_cnt++; @@ -496,8 +600,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ehca_dbg(&shca->ib_device, "No eqe found for irq event"); goto unlock_irq_spinlock; - } else if (!is_irq) + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); /* enable irq for new packets */ @@ -517,12 +626,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) else { struct ehca_cq *cq = eq->eqe_cache[i].cq; comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -533,14 +638,14 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) goto unlock_irq_spinlock; do { struct ehca_eqe *eqe; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe = ehca_poll_eq(shca, &shca->eq); if (!eqe) break; process_eqe(shca, eqe); } while (1); unlock_irq_spinlock: - spin_unlock_irqrestore(&eq->irq_spinlock, flags); + spin_unlock(&eq->irq_spinlock); } void ehca_tasklet_eq(unsigned long data) @@ -548,27 +653,30 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level) - ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (cpu == NR_CPUS) - cpu = first_cpu(cpu_online_map); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -579,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -591,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -598,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -606,31 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } -static void run_comp_task(struct ehca_cpu_comp_task* cct) +static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); - comp_event_callback(cq); + spin_unlock_irq(&cct->task_lock); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -639,151 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) +static void comp_task_park(unsigned int cpu) { - struct ehca_cpu_comp_task* cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while(!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - set_current_state(TASK_INTERRUPTIBLE); + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); } - __set_current_state(TASK_RUNNING); - - return 0; -} - -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); - - return cct->task; + spin_unlock_irq(&target->task_lock); } -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) +static void comp_task_stop(unsigned int cpu, bool online) { - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - task = cct->task; - cct->task = NULL; + spin_lock_irq(&cct->task_lock); cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -static void take_over_work(struct ehca_comp_pool *pool, - int cpu) +static int comp_task_should_run(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - list_splice_init(&cct->cq_list, &list); - - while(!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - - list_del(&cq->entry); - __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, - smp_processor_id())); - } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + return cct->cq_jobs; } -#ifdef CONFIG_HOTPLUG_CPU -static int comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; - switch (action) { - case CPU_UP_PREPARE: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if(!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return NOTIFY_BAD; - } - break; - case CPU_UP_CANCELED: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, any_online_cpu(cpu_online_map)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); } - - return NOTIFY_OK; + spin_unlock_irq(&cct->task_lock); } -#endif + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, +}; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -793,48 +822,49 @@ int ehca_create_comp_pool(void) return -ENOMEM; spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = any_online_cpu(cpu_online_map); + pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; - } + if (!pool->cpu_comp_tasks) + goto out_pool; - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } -#ifdef CONFIG_HOTPLUG_CPU - comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority =0; - register_cpu_notifier(&comp_pool_callback_nb); -#endif + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - printk(KERN_INFO "eHCA scaling code enabled\n"); + pr_info("eHCA scaling code enabled\n"); + return ret; - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; -#ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&comp_pool_callback_nb); -#endif + smpboot_unregister_percpu_thread(&comp_pool_threads); - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online(i)) - destroy_comp_task(pool, i); - } + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } |
