diff options
Diffstat (limited to 'drivers/infiniband/core/mad.c')
| -rw-r--r-- | drivers/infiniband/core/mad.c | 843 |
1 files changed, 590 insertions, 253 deletions
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index d393b504bf2..ab31f136d04 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1,7 +1,8 @@ /* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,9 +32,11 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ */ #include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <rdma/ib_cache.h> #include "mad_priv.h" #include "mad_rmpp.h" @@ -45,15 +48,21 @@ MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); +static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; +static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; -kmem_cache_t *ib_mad_cache; +module_param_named(send_queue_size, mad_sendq_size, int, 0444); +MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); +module_param_named(recv_queue_size, mad_recvq_size, int, 0444); +MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); + +static struct kmem_cache *ib_mad_cache; static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; /* Port list lock */ -static spinlock_t ib_mad_port_list_lock; - +static DEFINE_SPINLOCK(ib_mad_port_list_lock); /* Forward declarations */ static int method_in_use(struct ib_mad_mgmt_method_table **method, @@ -65,8 +74,8 @@ static struct ib_mad_agent_private *find_mad_agent( static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); -static void timeout_sends(void *data); -static void local_completions(void *data); +static void timeout_sends(struct work_struct *work); +static void local_completions(struct work_struct *work); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); @@ -167,6 +176,15 @@ static int is_vendor_method_in_use( return 0; } +int ib_response_mad(struct ib_mad *mad) +{ + return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) || + (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) && + (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP))); +} +EXPORT_SYMBOL(ib_response_mad); + /* * ib_register_mad_agent - Register to send/receive MADs */ @@ -227,6 +245,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (!is_vendor_oui(mad_reg_req->oui)) goto error1; } + /* Make sure class supplied is consistent with RMPP */ + if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { + if (rmpp_version) + goto error1; + } /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != @@ -254,6 +277,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error1; } + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0 */ + if (!port_priv->qp_info[qpn].qp) { + ret = ERR_PTR(-EPROTONOSUPPORT); + goto error1; + } + /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { @@ -269,13 +299,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } if (mad_reg_req) { - reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); + reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); goto error3; } - /* Make a copy of the MAD registration request */ - memcpy(reg_req, mad_reg_req, sizeof *reg_req); } /* Now, fill in the various structures */ @@ -288,6 +316,16 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; + spin_lock_init(&mad_agent_priv->lock); + INIT_LIST_HEAD(&mad_agent_priv->send_list); + INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_LIST_HEAD(&mad_agent_priv->done_list); + INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); + INIT_LIST_HEAD(&mad_agent_priv->local_list); + INIT_WORK(&mad_agent_priv->local_work, local_completions); + atomic_set(&mad_agent_priv->refcount, 1); + init_completion(&mad_agent_priv->comp); spin_lock_irqsave(&port_priv->reg_lock, flags); mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; @@ -337,18 +375,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); spin_unlock_irqrestore(&port_priv->reg_lock, flags); - spin_lock_init(&mad_agent_priv->lock); - INIT_LIST_HEAD(&mad_agent_priv->send_list); - INIT_LIST_HEAD(&mad_agent_priv->wait_list); - INIT_LIST_HEAD(&mad_agent_priv->done_list); - INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); - INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); - INIT_LIST_HEAD(&mad_agent_priv->local_list); - INIT_WORK(&mad_agent_priv->local_work, local_completions, - mad_agent_priv); - atomic_set(&mad_agent_priv->refcount, 1); - init_waitqueue_head(&mad_agent_priv->wait); - return &mad_agent_priv->agent; error4: @@ -394,19 +420,15 @@ static int register_snoop_agent(struct ib_mad_qp_info *qp_info, if (i == qp_info->snoop_table_size) { /* Grow table. */ - new_snoop_table = kmalloc(sizeof mad_snoop_priv * - qp_info->snoop_table_size + 1, - GFP_ATOMIC); + new_snoop_table = krealloc(qp_info->snoop_table, + sizeof mad_snoop_priv * + (qp_info->snoop_table_size + 1), + GFP_ATOMIC); if (!new_snoop_table) { i = -ENOMEM; goto out; } - if (qp_info->snoop_table) { - memcpy(new_snoop_table, qp_info->snoop_table, - sizeof mad_snoop_priv * - qp_info->snoop_table_size); - kfree(qp_info->snoop_table); - } + qp_info->snoop_table = new_snoop_table; qp_info->snoop_table_size++; } @@ -462,7 +484,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_snoop_priv->agent.port_num = port_num; mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_waitqueue_head(&mad_snoop_priv->wait); + init_completion(&mad_snoop_priv->comp); mad_snoop_priv->snoop_index = register_snoop_agent( &port_priv->qp_info[qpn], mad_snoop_priv); @@ -481,6 +503,18 @@ error1: } EXPORT_SYMBOL(ib_register_mad_snoop); +static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + complete(&mad_agent_priv->comp); +} + +static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) +{ + if (atomic_dec_and_test(&mad_snoop_priv->refcount)) + complete(&mad_snoop_priv->comp); +} + static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; @@ -504,9 +538,8 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); - atomic_dec(&mad_agent_priv->refcount); - wait_event(mad_agent_priv->wait, - !atomic_read(&mad_agent_priv->refcount)); + deref_mad_agent(mad_agent_priv); + wait_for_completion(&mad_agent_priv->comp); kfree(mad_agent_priv->reg_req); ib_dereg_mr(mad_agent_priv->agent.mr); @@ -524,9 +557,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) atomic_dec(&qp_info->snoop_count); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - atomic_dec(&mad_snoop_priv->refcount); - wait_event(mad_snoop_priv->wait, - !atomic_read(&mad_snoop_priv->refcount)); + deref_snoop_agent(mad_snoop_priv); + wait_for_completion(&mad_snoop_priv->comp); kfree(mad_snoop_priv); } @@ -555,13 +587,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) } EXPORT_SYMBOL(ib_unregister_mad_agent); -static inline int response_mad(struct ib_mad *mad) -{ - /* Trap represses are responses although response bit is reset */ - return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); -} - static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; @@ -595,8 +620,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, send_buf, mad_send_wc); - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - wake_up(&mad_snoop_priv->wait); + deref_snoop_agent(mad_snoop_priv); spin_lock_irqsave(&qp_info->snoop_lock, flags); } spin_unlock_irqrestore(&qp_info->snoop_lock, flags); @@ -621,14 +645,14 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, mad_recv_wc); - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - wake_up(&mad_snoop_priv->wait); + deref_snoop_agent(mad_snoop_priv); spin_lock_irqsave(&qp_info->snoop_lock, flags); } spin_unlock_irqrestore(&qp_info->snoop_lock, flags); } -static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, +static void build_smp_wc(struct ib_qp *qp, + u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); @@ -638,7 +662,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, wc->pkey_index = pkey_index; wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); wc->src_qp = IB_QP0; - wc->qp_num = IB_QP0; + wc->qp = qp; wc->slid = slid; wc->sl = 0; wc->dlid_path_bits = 0; @@ -653,7 +677,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr) { - int ret; + int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; unsigned long flags; struct ib_mad_local_private *local; @@ -661,18 +685,34 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num = mad_agent_priv->agent.port_num; + u8 port_num; struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; - if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { + if (device->node_type == RDMA_NODE_IB_SWITCH && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + port_num = send_wr->wr.ud.port_num; + else + port_num = mad_agent_priv->agent.port_num; + + /* + * Directed route handling starts if the initial LID routed part of + * a request or the ending LID routed part of a response is empty. + * If we are at the start of the LID routed part, don't update the + * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. + */ + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, device->node_type, port_num) == + IB_SMI_DISCARD) { ret = -EINVAL; printk(KERN_ERR PFX "Invalid directed route\n"); goto out; } + /* Check to post send on QP or process locally */ - ret = smi_check_local_dr_smp(smp, device, port_num); - if (!ret || !device->process_mad) + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) goto out; local = kmalloc(sizeof *local, GFP_ATOMIC); @@ -691,7 +731,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid), + build_smp_wc(mad_agent_priv->agent.qp, + send_wr->wr_id, be16_to_cpu(smp->dr_slid), send_wr->wr.ud.pkey_index, send_wr->wr.ud.port_num, &mad_wc); @@ -702,7 +743,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (response_mad(&mad_priv->mad.mad) && + if (ib_response_mad(&mad_priv->mad.mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -722,16 +763,17 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - mad_priv->mad.mad.mad_hdr.tid = - ((struct ib_mad *)smp)->mad_hdr.tid; + memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); recv_mad_agent = find_mad_agent(port_priv, &mad_priv->mad.mad); } if (!port_priv || !recv_mad_agent) { + /* + * No receiving agent so drop packet and + * generate send completion. + */ kmem_cache_free(ib_mad_cache, mad_priv); - kfree(local); - ret = 0; - goto out; + break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; @@ -757,18 +799,67 @@ out: return ret; } -static int get_buf_length(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len) { int seg_size, pad; seg_size = sizeof(struct ib_mad) - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; - if (pad == seg_size) - pad = 0; + return pad == seg_size ? 0 : pad; } else - pad = seg_size; - return hdr_len + data_len + pad; + return seg_size; +} + +static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_segment *s, *t; + + list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { + list_del(&s->list); + kfree(s); + } +} + +static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, + gfp_t gfp_mask) +{ + struct ib_mad_send_buf *send_buf = &send_wr->send_buf; + struct ib_rmpp_mad *rmpp_mad = send_buf->mad; + struct ib_rmpp_segment *seg = NULL; + int left, seg_size, pad; + + send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + seg_size = send_buf->seg_size; + pad = send_wr->pad; + + /* Allocate data segments. */ + for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { + seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); + if (!seg) { + printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " + "alloc failed for len %zd, gfp %#x\n", + sizeof (*seg) + seg_size, gfp_mask); + free_send_rmpp_list(send_wr); + return -ENOMEM; + } + seg->num = ++send_buf->seg_count; + list_add_tail(&seg->list, &send_wr->rmpp_list); + } + + /* Zero any padding */ + if (pad) + memset(seg->data + seg_size - pad, 0, pad); + + rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> + agent.rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + send_wr->cur_seg = container_of(send_wr->rmpp_list.next, + struct ib_rmpp_segment, list); + send_wr->last_ack_seg = send_wr->cur_seg; + return 0; } struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, @@ -779,32 +870,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; - int buf_size; + int pad, message_size, ret, size; void *buf; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - buf_size = get_buf_length(hdr_len, data_len); + pad = get_pad_size(hdr_len, data_len); + message_size = hdr_len + data_len + pad; if ((!mad_agent->rmpp_version && - (rmpp_active || buf_size > sizeof(struct ib_mad))) || - (!rmpp_active && buf_size > sizeof(struct ib_mad))) + (rmpp_active || message_size > sizeof(struct ib_mad))) || + (!rmpp_active && message_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask); + size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - mad_send_wr = buf + buf_size; + mad_send_wr = buf + size; + INIT_LIST_HEAD(&mad_send_wr->rmpp_list); mad_send_wr->send_buf.mad = buf; + mad_send_wr->send_buf.hdr_len = hdr_len; + mad_send_wr->send_buf.data_len = data_len; + mad_send_wr->pad = pad; mad_send_wr->mad_agent_priv = mad_agent_priv; - mad_send_wr->sg_list[0].length = buf_size; + mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; + mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; + mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 1; + mad_send_wr->send_wr.num_sge = 2; mad_send_wr->send_wr.opcode = IB_WR_SEND; mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; @@ -812,13 +911,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - IB_MGMT_RMPP_HDR + data_len); - rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; - rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, - IB_MGMT_RMPP_FLAG_ACTIVE); + ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + if (ret) { + kfree(buf); + return ERR_PTR(ret); + } } mad_send_wr->send_buf.mad_agent = mad_agent; @@ -827,16 +924,80 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_create_send_mad); +int ib_get_mad_data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return IB_MGMT_SA_HDR; + else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || + (mgmt_class == IB_MGMT_CLASS_BIS)) + return IB_MGMT_DEVICE_HDR; + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return IB_MGMT_VENDOR_HDR; + else + return IB_MGMT_MAD_HDR; +} +EXPORT_SYMBOL(ib_get_mad_data_offset); + +int ib_is_mad_class_rmpp(u8 mgmt_class) +{ + if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || + (mgmt_class == IB_MGMT_CLASS_BIS) || + ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) + return 1; + return 0; +} +EXPORT_SYMBOL(ib_is_mad_class_rmpp); + +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct list_head *list; + + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + list = &mad_send_wr->cur_seg->list; + + if (mad_send_wr->cur_seg->num < seg_num) { + list_for_each_entry(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } else if (mad_send_wr->cur_seg->num > seg_num) { + list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } + return mad_send_wr->cur_seg->data; +} +EXPORT_SYMBOL(ib_get_rmpp_segment); + +static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) +{ + if (mad_send_wr->send_buf.seg_count) + return ib_get_rmpp_segment(&mad_send_wr->send_buf, + mad_send_wr->seg_num); + else + return mad_send_wr->send_buf.mad + + mad_send_wr->send_buf.hdr_len; +} + void ib_free_send_mad(struct ib_mad_send_buf *send_buf) { struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); - kfree(send_buf->mad); + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + free_send_rmpp_list(mad_send_wr); + kfree(send_buf->mad); + deref_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_free_send_mad); @@ -857,10 +1018,26 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; - sge->addr = dma_map_single(mad_agent->device->dma_device, - mad_send_wr->send_buf.mad, sge->length, - DMA_TO_DEVICE); - pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); + sge[0].addr = ib_dma_map_single(mad_agent->device, + mad_send_wr->send_buf.mad, + sge[0].length, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; + + mad_send_wr->header_mapping = sge[0].addr; + + sge[1].addr = ib_dma_map_single(mad_agent->device, + ib_get_payload(mad_send_wr), + sge[1].length, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + return -ENOMEM; + } + mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { @@ -877,11 +1054,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), - sge->length, DMA_TO_DEVICE); - + if (ret) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->payload_mapping, + sge[1].length, DMA_TO_DEVICE); + } return ret; } @@ -913,6 +1093,13 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, goto error; } + if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { + if (mad_agent_priv->agent.rmpp_version) { + ret = -EINVAL; + goto error; + } + } + /* * Save pointer to next work request to post in case the * current one completes, and the user modifies the work @@ -934,7 +1121,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); - mad_send_wr->retries = send_buf->retries; + mad_send_wr->max_retries = send_buf->retries; + mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -1020,10 +1209,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, { int i; - for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { printk(KERN_ERR PFX "Method %d already in use\n", i); return -EINVAL; @@ -1087,8 +1273,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, int i; for (i = 0; i < MAX_MGMT_OUI; i++) - /* Is there matching OUI for this vendor class ? */ - if (!memcmp(vendor_class->oui[i], oui, 3)) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; @@ -1157,13 +1343,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, goto error3; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error3: @@ -1256,13 +1438,9 @@ check_in_use: goto error4; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error4: @@ -1394,7 +1572,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - if (response_mad(mad)) { + if (ib_response_mad(mad)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1428,6 +1606,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, mad->mad_hdr.class_version].class; if (!class) goto out; + if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; if (method) @@ -1509,29 +1690,107 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } +static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, + struct ib_mad_recv_wc *rwc) +{ + return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == + rwc->recv_buf.mad->mad_hdr.mgmt_class; +} + +static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_wr_private *wr, + struct ib_mad_recv_wc *rwc ) +{ + struct ib_ah_attr attr; + u8 send_resp, rcv_resp; + union ib_gid sgid; + struct ib_device *device = mad_agent_priv->agent.device; + u8 port_num = mad_agent_priv->agent.port_num; + u8 lmc; + + send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); + rcv_resp = ib_response_mad(rwc->recv_buf.mad); + + if (send_resp == rcv_resp) + /* both requests, or both responses. GIDs different */ + return 0; + + if (ib_query_ah(wr->send_buf.ah, &attr)) + /* Assume not equal, to avoid false positives. */ + return 0; + + if (!!(attr.ah_flags & IB_AH_GRH) != + !!(rwc->wc->wc_flags & IB_WC_GRH)) + /* one has GID, other does not. Assume different */ + return 0; + + if (!send_resp && rcv_resp) { + /* is request/response. */ + if (!(attr.ah_flags & IB_AH_GRH)) { + if (ib_get_cached_lmc(device, port_num, &lmc)) + return 0; + return (!lmc || !((attr.src_path_bits ^ + rwc->wc->dlid_path_bits) & + ((1 << lmc) - 1))); + } else { + if (ib_get_cached_gid(device, port_num, + attr.grh.sgid_index, &sgid)) + return 0; + return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, + 16); + } + } + + if (!(attr.ah_flags & IB_AH_GRH)) + return attr.dlid == rwc->wc->slid; + else + return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, + 16); +} + +static inline int is_direct(u8 class) +{ + return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); +} + struct ib_mad_send_wr_private* -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_recv_wc *wc) { - struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wr_private *wr; + struct ib_mad *mad; - list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, - agent_list) { - if (mad_send_wr->tid == tid) - return mad_send_wr; + mad = (struct ib_mad *)wc->recv_buf.mad; + + list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { + if ((wr->tid == mad->mad_hdr.tid) && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } /* * It's possible to receive the response before we've * been notified that the send has completed */ - list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, - agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && - mad_send_wr->tid == tid && mad_send_wr->timeout) { + list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { + if (is_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad->mad_hdr.tid && + wr->timeout && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ - return (mad_send_wr->status == IB_WC_SUCCESS) ? - mad_send_wr : NULL; - } + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } return NULL; } @@ -1539,11 +1798,9 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; - if (mad_send_wr->refcount == 1) { - list_del(&mad_send_wr->agent_list); - list_add_tail(&mad_send_wr->agent_list, + if (mad_send_wr->refcount == 1) + list_move_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->done_list); - } } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, @@ -1552,7 +1809,6 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; - __be64 tid; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); @@ -1560,22 +1816,19 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); return; } } /* Complete corresponding request */ - if (response_mad(mad_recv_wc->recv_buf.mad)) { - tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid; + if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = ib_find_send_mad(mad_agent_priv, tid); + mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); ib_free_recv_mad(mad_recv_wc); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); return; } ib_mark_mad_done(mad_send_wr); @@ -1594,24 +1847,40 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); } } +static bool generate_unmatched_resp(struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || + recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, sizeof *response); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + response->mad.mad.mad_hdr.status = + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + + return true; + } else { + return false; + } +} static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { struct ib_mad_qp_info *qp_info; struct ib_mad_private_header *mad_priv_hdr; - struct ib_mad_private *recv, *response; + struct ib_mad_private *recv, *response = NULL; struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; - - response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); - if (!response) - printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " - "for response buffer\n"); + int port_num; + int ret = IB_MAD_RESULT_SUCCESS; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; @@ -1620,11 +1889,11 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); - dma_unmap_single(port_priv->device->dma_device, - pci_unmap_addr(&recv->header, mapping), - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), - DMA_FROM_DEVICE); + ib_dma_unmap_single(port_priv->device, + recv->header.mapping, + sizeof(struct ib_mad_private) - + sizeof(struct ib_mad_private_header), + DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; @@ -1640,39 +1909,61 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) goto out; + response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + if (!response) { + printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " + "for response buffer\n"); + goto out; + } + + if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + port_num = wc->port_num; + else + port_num = port_priv->port_num; + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (!smi_handle_dr_smp_recv(&recv->mad.smp, - port_priv->device->node_type, - port_priv->port_num, - port_priv->device->phys_port_cnt)) + enum smi_forward_action retsmi; + + if (smi_handle_dr_smp_recv(&recv->mad.smp, + port_priv->device->node_type, + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) goto out; - if (!smi_check_forward_dr_smp(&recv->mad.smp)) + + retsmi = smi_check_forward_dr_smp(&recv->mad.smp); + if (retsmi == IB_SMI_LOCAL) goto local; - if (!smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_priv->port_num)) - goto out; - if (!smi_check_local_dr_smp(&recv->mad.smp, + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(&recv->mad.smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + goto out; + + if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) + goto out; + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, sizeof(*response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response(&response->mad.mad, + &response->grh, wc, port_priv->device, - port_priv->port_num)) + smi_get_fwd_port(&recv->mad.smp), + qp_info->qp->qp_num); + goto out; + } } local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { - int ret; - - if (!response) { - printk(KERN_ERR PFX "No memory for response MAD\n"); - /* - * Is it better to assume that - * it wouldn't be processed ? - */ - goto out; - } - ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, @@ -1685,7 +1976,7 @@ local: agent_send_response(&response->mad.mad, &recv->grh, wc, port_priv->device, - port_priv->port_num, + port_num, qp_info->qp->qp_num); goto out; } @@ -1700,6 +1991,10 @@ local: * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; + } else if ((ret & IB_MAD_RESULT_SUCCESS) && + generate_unmatched_resp(recv, response)) { + agent_send_response(&response->mad.mad, &recv->grh, wc, + port_priv->device, port_num, qp_info->qp->qp_num); } out: @@ -1727,13 +2022,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; - cancel_delayed_work(&mad_agent_priv->timed_work); delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(mad_agent_priv->qp_info-> - port_priv->wq, - &mad_agent_priv->timed_work, delay); + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } } } @@ -1766,11 +2059,9 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ - if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) { - cancel_delayed_work(&mad_agent_priv->timed_work); - queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timed_work, delay); - } + if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, @@ -1827,8 +2118,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_send_wc); /* Release reference on agent taken when sending */ - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -1853,9 +2143,12 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, qp_info = send_queue->qp_info; retry: - dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, mapping), - mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->header_mapping, + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->payload_mapping, + mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -1867,8 +2160,7 @@ retry: queued_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); - list_del(&mad_list->list); - list_add_tail(&mad_list->list, &send_queue->list); + list_move_tail(&mad_list->list, &send_queue->list); } spin_unlock_irqrestore(&send_queue->lock, flags); @@ -1968,12 +2260,12 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, /* * IB MAD completion callback */ -static void ib_mad_completion_handler(void *data) +static void ib_mad_completion_handler(struct work_struct *work) { struct ib_mad_port_private *port_priv; struct ib_wc wc; - port_priv = (struct ib_mad_port_private *)data; + port_priv = container_of(work, struct ib_mad_port_private, work); ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) { @@ -2007,15 +2299,13 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } } /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - /* Empty local completion list as well */ - list_splice_init(&mad_agent_priv->local_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ @@ -2094,37 +2384,41 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_cancel_mad); -static void local_completions(void *data) +static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; - int recv = 0; + int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; - mad_agent_priv = (struct ib_mad_agent_private *)data; + mad_agent_priv = + container_of(work, struct ib_mad_agent_private, local_work); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, struct ib_mad_local_private, completion_list); + list_del(&local->completion_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + free_mad = 0; if (local->mad_priv) { recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); + free_mad = 1; goto local_send_completion; } - recv = 1; /* * Defined behavior is to complete response * before request */ - build_smp_wc((unsigned long) local->mad_send_wr, + build_smp_wc(recv_mad_agent->agent.qp, + (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), 0, recv_mad_agent->agent.port_num, &wc); @@ -2162,9 +2456,8 @@ local_send_completion: &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_del(&local->completion_list); atomic_dec(&mad_agent_priv->refcount); - if (!recv) + if (free_mad) kmem_cache_free(ib_mad_cache, local->mad_priv); kfree(local); } @@ -2175,9 +2468,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; - if (!mad_send_wr->retries--) + if (!mad_send_wr->retries_left) return -ETIMEDOUT; + mad_send_wr->retries_left--; + mad_send_wr->send_buf.retries++; + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { @@ -2204,14 +2500,15 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) return ret; } -static void timeout_sends(void *data) +static void timeout_sends(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags, delay; - mad_agent_priv = (struct ib_mad_agent_private *)data; + mad_agent_priv = container_of(work, struct ib_mad_agent_private, + timed_work.work); mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -2254,8 +2551,12 @@ static void timeout_sends(void *data) static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg) { struct ib_mad_port_private *port_priv = cq->cq_context; + unsigned long flags; - queue_work(port_priv->wq, &port_priv->work); + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + if (!list_empty(&port_priv->port_list)) + queue_work(port_priv->wq, &port_priv->work); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); } /* @@ -2293,13 +2594,17 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, break; } } - sg_list.addr = dma_map_single(qp_info->port_priv-> - device->dma_device, - &mad_priv->grh, - sizeof *mad_priv - - sizeof mad_priv->header, - DMA_FROM_DEVICE); - pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); + sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, + &mad_priv->grh, + sizeof *mad_priv - + sizeof mad_priv->header, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + break; + } + mad_priv->header.mapping = sg_list.addr; recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; @@ -2314,12 +2619,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); - dma_unmap_single(qp_info->port_priv->device->dma_device, - pci_unmap_addr(&mad_priv->header, - mapping), - sizeof *mad_priv - - sizeof mad_priv->header, - DMA_FROM_DEVICE); + ib_dma_unmap_single(qp_info->port_priv->device, + mad_priv->header.mapping, + sizeof *mad_priv - + sizeof mad_priv->header, + DMA_FROM_DEVICE); kmem_cache_free(ib_mad_cache, mad_priv); printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); break; @@ -2338,6 +2642,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) struct ib_mad_private *recv; struct ib_mad_list_head *mad_list; + if (!qp_info->qp) + return; + while (!list_empty(&qp_info->recv_queue.list)) { mad_list = list_entry(qp_info->recv_queue.list.next, @@ -2351,11 +2658,11 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) /* Remove from posted receive MAD list */ list_del(&mad_list->list); - dma_unmap_single(qp_info->port_priv->device->dma_device, - pci_unmap_addr(&recv->header, mapping), - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), - DMA_FROM_DEVICE); + ib_dma_unmap_single(qp_info->port_priv->device, + recv->header.mapping, + sizeof(struct ib_mad_private) - + sizeof(struct ib_mad_private_header), + DMA_FROM_DEVICE); kmem_cache_free(ib_mad_cache, recv); } @@ -2370,21 +2677,30 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; + u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { + if (!attr) { printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + IB_DEFAULT_PKEY_FULL, &pkey_index); + if (ret) + pkey_index = 0; + for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; + if (!qp) + continue; + /* * PKey index for QP1 is irrelevant but * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; - attr->pkey_index = 0; + attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); @@ -2420,6 +2736,9 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) } for (i = 0; i < IB_MAD_QPS_CORE; i++) { + if (!port_priv->qp_info[i].qp) + continue; + ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { printk(KERN_ERR PFX "Couldn't post receive WRs\n"); @@ -2472,8 +2791,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info, qp_init_attr.send_cq = qp_info->port_priv->cq; qp_init_attr.recv_cq = qp_info->port_priv->cq; qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE; - qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE; + qp_init_attr.cap.max_send_wr = mad_sendq_size; + qp_init_attr.cap.max_recv_wr = mad_recvq_size; qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; qp_init_attr.qp_type = qp_type; @@ -2488,8 +2807,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info, goto error; } /* Use minimum queue sizes unless the CQ is resized */ - qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE; - qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE; + qp_info->send_queue.max_active = mad_sendq_size; + qp_info->recv_queue.max_active = mad_recvq_size; return 0; error: @@ -2498,6 +2817,9 @@ error: static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { + if (!qp_info->qp) + return; + ib_destroy_qp(qp_info->qp); kfree(qp_info->snoop_table); } @@ -2513,6 +2835,7 @@ static int ib_mad_port_open(struct ib_device *device, struct ib_mad_port_private *port_priv; unsigned long flags; char name[sizeof "ib_mad123"]; + int has_smi; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); @@ -2528,10 +2851,14 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); - cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2; + cq_size = mad_sendq_size + mad_recvq_size; + has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + if (has_smi) + cq_size *= 2; + port_priv->cq = ib_create_cq(port_priv->device, ib_mad_thread_completion_handler, - NULL, port_priv, cq_size); + NULL, port_priv, cq_size, 0); if (IS_ERR(port_priv->cq)) { printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); @@ -2552,9 +2879,11 @@ static int ib_mad_port_open(struct ib_device *device, goto error5; } - ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); - if (ret) - goto error6; + if (has_smi) { + ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); + if (ret) + goto error6; + } ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); if (ret) goto error7; @@ -2565,7 +2894,11 @@ static int ib_mad_port_open(struct ib_device *device, ret = -ENOMEM; goto error8; } - INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv); + INIT_WORK(&port_priv->work, ib_mad_completion_handler); + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_mad_port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); ret = ib_mad_port_start(port_priv); if (ret) { @@ -2573,12 +2906,13 @@ static int ib_mad_port_open(struct ib_device *device, goto error9; } - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - list_add_tail(&port_priv->port_list, &ib_mad_port_list); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); return 0; error9: + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_del_init(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + destroy_workqueue(port_priv->wq); error8: destroy_mad_qp(&port_priv->qp_info[1]); @@ -2615,11 +2949,9 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) printk(KERN_ERR PFX "Port %d not found\n", port_num); return -ENODEV; } - list_del(&port_priv->port_list); + list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - /* Stop processing completions. */ - flush_workqueue(port_priv->wq); destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); @@ -2639,7 +2971,10 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (device->node_type == IB_NODE_SWITCH) { + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { start = 0; end = 0; } else { @@ -2686,7 +3021,10 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; - if (device->node_type == IB_NODE_SWITCH) { + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; } else { @@ -2714,13 +3052,16 @@ static int __init ib_mad_init_module(void) { int ret; - spin_lock_init(&ib_mad_port_list_lock); + mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); + mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); + + mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); + mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); if (!ib_mad_cache) { printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); @@ -2747,12 +3088,8 @@ error1: static void __exit ib_mad_cleanup_module(void) { ib_unregister_client(&mad_client); - - if (kmem_cache_destroy(ib_mad_cache)) { - printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n"); - } + kmem_cache_destroy(ib_mad_cache); } module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module); - |
