diff options
author | Paul Mackerras <paulus@samba.org> | 2008-01-31 11:25:51 +1100 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2008-01-31 11:25:51 +1100 |
commit | bd45ac0c5daae35e7c71138172e63df5cf644cf6 (patch) | |
tree | 5eb5a599bf6a9d7a8a34e802db932aa9e9555de4 /drivers/infiniband/core | |
parent | 4eece4ccf997c0e6d8fdad3d842e37b16b8d705f (diff) | |
parent | 5bdeae46be6dfe9efa44a548bd622af325f4bdb4 (diff) |
Merge branch 'linux-2.6'
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/addr.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 306 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 67 | ||||
-rw-r--r-- | drivers/infiniband/core/fmr_pool.c | 33 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_priv.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_rmpp.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 55 | ||||
-rw-r--r-- | drivers/infiniband/core/smi.h | 18 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 39 | ||||
-rw-r--r-- | drivers/infiniband/core/ucm.c | 37 | ||||
-rw-r--r-- | drivers/infiniband/core/ucma.c | 92 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 115 |
13 files changed, 598 insertions, 207 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 5381c80de10..a58ad8a470f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -110,7 +110,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; int ret; - dev = ip_dev_find(ip); + dev = ip_dev_find(&init_net, ip); if (!dev) return -EADDRNOTAVAIL; @@ -158,7 +158,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; - if (ip_route_output_key(&rt, &fl)) + if (ip_route_output_key(&init_net, &rt, &fl)) return; neigh_event_send(rt->u.dst.neighbour, NULL); @@ -179,7 +179,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; - ret = ip_route_output_key(&rt, &fl); + ret = ip_route_output_key(&init_net, &rt, &fl); if (ret) goto out; @@ -261,15 +261,15 @@ static int addr_resolve_local(struct sockaddr_in *src_in, __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; - dev = ip_dev_find(dst_ip); + dev = ip_dev_find(&init_net, dst_ip); if (!dev) return -EADDRNOTAVAIL; - if (ZERONET(src_ip)) { + if (ipv4_is_zeronet(src_ip)) { src_in->sin_family = dst_in->sin_family; src_in->sin_addr.s_addr = dst_ip; ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (LOOPBACK(src_ip)) { + } else if (ipv4_is_loopback(src_ip)) { ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); if (!ret) memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2e39236d189..c0150147d34 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -37,12 +37,14 @@ #include <linux/completion.h> #include <linux/dma-mapping.h> +#include <linux/device.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/random.h> #include <linux/rbtree.h> #include <linux/spinlock.h> +#include <linux/sysfs.h> #include <linux/workqueue.h> #include <rdma/ib_cache.h> @@ -78,17 +80,94 @@ static struct ib_cm { struct workqueue_struct *wq; } cm; +/* Counter indexes ordered by attribute ID */ +enum { + CM_REQ_COUNTER, + CM_MRA_COUNTER, + CM_REJ_COUNTER, + CM_REP_COUNTER, + CM_RTU_COUNTER, + CM_DREQ_COUNTER, + CM_DREP_COUNTER, + CM_SIDR_REQ_COUNTER, + CM_SIDR_REP_COUNTER, + CM_LAP_COUNTER, + CM_APR_COUNTER, + CM_ATTR_COUNT, + CM_ATTR_ID_OFFSET = 0x0010, +}; + +enum { + CM_XMIT, + CM_XMIT_RETRIES, + CM_RECV, + CM_RECV_DUPLICATES, + CM_COUNTER_GROUPS +}; + +static char const counter_group_names[CM_COUNTER_GROUPS] + [sizeof("cm_rx_duplicates")] = { + "cm_tx_msgs", "cm_tx_retries", + "cm_rx_msgs", "cm_rx_duplicates" +}; + +struct cm_counter_group { + struct kobject obj; + atomic_long_t counter[CM_ATTR_COUNT]; +}; + +struct cm_counter_attribute { + struct attribute attr; + int index; +}; + +#define CM_COUNTER_ATTR(_name, _index) \ +struct cm_counter_attribute cm_##_name##_counter_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ + .index = _index \ +} + +static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); +static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); +static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); +static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); +static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); +static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); +static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); +static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); +static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); +static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); +static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); + +static struct attribute *cm_counter_default_attrs[] = { + &cm_req_counter_attr.attr, + &cm_mra_counter_attr.attr, + &cm_rej_counter_attr.attr, + &cm_rep_counter_attr.attr, + &cm_rtu_counter_attr.attr, + &cm_dreq_counter_attr.attr, + &cm_drep_counter_attr.attr, + &cm_sidr_req_counter_attr.attr, + &cm_sidr_rep_counter_attr.attr, + &cm_lap_counter_attr.attr, + &cm_apr_counter_attr.attr, + NULL +}; + struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; + struct kobject port_obj; u8 port_num; + struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { struct list_head list; struct ib_device *device; + struct kobject dev_obj; u8 ack_delay; - struct cm_port port[0]; + struct cm_port *port[0]; }; struct cm_av { @@ -278,7 +357,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->device, &path->sgid, &p, NULL)) { - port = &cm_dev->port[p-1]; + port = cm_dev->port[p-1]; break; } } @@ -1270,6 +1349,9 @@ static void cm_dup_req_handler(struct cm_work *work, struct ib_mad_send_buf *msg = NULL; int ret; + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REQ_COUNTER]); + /* Quick state check to discard duplicate REQs. */ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) return; @@ -1616,6 +1698,8 @@ static void cm_dup_rep_handler(struct cm_work *work) if (!cm_id_priv) return; + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; @@ -1781,6 +1865,8 @@ static int cm_rtu_handler(struct cm_work *work) if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; @@ -1958,6 +2044,8 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; } @@ -1977,6 +2065,8 @@ static int cm_dreq_handler(struct cm_work *work) case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -1988,6 +2078,10 @@ static int cm_dreq_handler(struct cm_work *work) if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; + case IB_CM_DREQ_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); + goto unlock; default: goto unlock; } @@ -2339,10 +2433,20 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) { + if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) + atomic_long_inc(&work->port-> + counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); goto out; + } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; + case IB_CM_MRA_REQ_RCVD: + case IB_CM_MRA_REP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); + /* fall through */ default: goto out; } @@ -2502,6 +2606,8 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -2515,6 +2621,10 @@ static int cm_lap_handler(struct cm_work *work) if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; + case IB_CM_LAP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); + goto unlock; default: goto unlock; } @@ -2796,6 +2906,8 @@ static int cm_sidr_req_handler(struct cm_work *work) cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; @@ -2990,6 +3102,27 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_port *port; + u16 attr_index; + + port = mad_agent->context; + attr_index = be16_to_cpu(((struct ib_mad_hdr *) + msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; + + /* + * If the send was in response to a received message (context[0] is not + * set to a cm_id), and is not a REJ, then it is a send that was + * manually retried. + */ + if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + msg->retries = 1; + + atomic_long_add(1 + msg->retries, + &port->counter_group[CM_XMIT].counter[attr_index]); + if (msg->retries) + atomic_long_add(msg->retries, + &port->counter_group[CM_XMIT_RETRIES]. + counter[attr_index]); switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -3148,8 +3281,10 @@ EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { + struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; + u16 attr_id; int paths = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { @@ -3194,6 +3329,10 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, return; } + attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); + atomic_long_inc(&port->counter_group[CM_RECV]. + counter[attr_id - CM_ATTR_ID_OFFSET]); + work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { @@ -3204,7 +3343,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; - work->port = (struct cm_port *)mad_agent->context; + work->port = port; queue_delayed_work(cm.wq, &work->work, 0); } @@ -3379,6 +3518,108 @@ static void cm_get_ack_delay(struct cm_device *cm_dev) cm_dev->ack_delay = attr.local_ca_ack_delay; } +static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, + char *buf) +{ + struct cm_counter_group *group; + struct cm_counter_attribute *cm_attr; + + group = container_of(obj, struct cm_counter_group, obj); + cm_attr = container_of(attr, struct cm_counter_attribute, attr); + + return sprintf(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); +} + +static struct sysfs_ops cm_counter_ops = { + .show = cm_show_counter +}; + +static struct kobj_type cm_counter_obj_type = { + .sysfs_ops = &cm_counter_ops, + .default_attrs = cm_counter_default_attrs +}; + +static void cm_release_port_obj(struct kobject *obj) +{ + struct cm_port *cm_port; + + printk(KERN_ERR "free cm port\n"); + + cm_port = container_of(obj, struct cm_port, port_obj); + kfree(cm_port); +} + +static struct kobj_type cm_port_obj_type = { + .release = cm_release_port_obj +}; + +static void cm_release_dev_obj(struct kobject *obj) +{ + struct cm_device *cm_dev; + + printk(KERN_ERR "free cm dev\n"); + + cm_dev = container_of(obj, struct cm_device, dev_obj); + kfree(cm_dev); +} + +static struct kobj_type cm_dev_obj_type = { + .release = cm_release_dev_obj +}; + +struct class cm_class = { + .name = "infiniband_cm", +}; +EXPORT_SYMBOL(cm_class); + +static void cm_remove_fs_obj(struct kobject *obj) +{ + kobject_put(obj->parent); + kobject_put(obj); +} + +static int cm_create_port_fs(struct cm_port *port) +{ + int i, ret; + + ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, + kobject_get(&port->cm_dev->dev_obj), + "%d", port->port_num); + if (ret) { + kfree(port); + return ret; + } + + for (i = 0; i < CM_COUNTER_GROUPS; i++) { + ret = kobject_init_and_add(&port->counter_group[i].obj, + &cm_counter_obj_type, + kobject_get(&port->port_obj), + "%s", counter_group_names[i]); + if (ret) + goto error; + } + + return 0; + +error: + while (i--) + cm_remove_fs_obj(&port->counter_group[i].obj); + cm_remove_fs_obj(&port->port_obj); + return ret; + +} + +static void cm_remove_port_fs(struct cm_port *port) +{ + int i; + + for (i = 0; i < CM_COUNTER_GROUPS; i++) + cm_remove_fs_obj(&port->counter_group[i].obj); + + cm_remove_fs_obj(&port->port_obj); +} + static void cm_add_one(struct ib_device *device) { struct cm_device *cm_dev; @@ -3397,7 +3638,7 @@ static void cm_add_one(struct ib_device *device) if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; - cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * + cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; @@ -3405,11 +3646,27 @@ static void cm_add_one(struct ib_device *device) cm_dev->device = device; cm_get_ack_delay(cm_dev); + ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type, + &cm_class.subsys.kobj, "%s", device->name); + if (ret) { + kfree(cm_dev); + return; + } + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= device->phys_port_cnt; i++) { - port = &cm_dev->port[i-1]; + port = kzalloc(sizeof *port, GFP_KERNEL); + if (!port) + goto error1; + + cm_dev->port[i-1] = port; port->cm_dev = cm_dev; port->port_num = i; + + ret = cm_create_port_fs(port); + if (ret) + goto error1; + port->mad_agent = ib_register_mad_agent(device, i, IB_QPT_GSI, ®_req, @@ -3418,11 +3675,11 @@ static void cm_add_one(struct ib_device *device) cm_recv_handler, port); if (IS_ERR(port->mad_agent)) - goto error1; + goto error2; ret = ib_modify_port(device, i, 0, &port_modify); if (ret) - goto error2; + goto error3; } ib_set_client_data(device, &cm_client, cm_dev); @@ -3431,17 +3688,20 @@ static void cm_add_one(struct ib_device *device) write_unlock_irqrestore(&cm.device_lock, flags); return; -error2: +error3: ib_unregister_mad_agent(port->mad_agent); +error2: + cm_remove_port_fs(port); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { - port = &cm_dev->port[i-1]; + port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); } - kfree(cm_dev); + cm_remove_fs_obj(&cm_dev->dev_obj); } static void cm_remove_one(struct ib_device *device) @@ -3463,11 +3723,12 @@ static void cm_remove_one(struct ib_device *device) write_unlock_irqrestore(&cm.device_lock, flags); for (i = 1; i <= device->phys_port_cnt; i++) { - port = &cm_dev->port[i-1]; + port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); } - kfree(cm_dev); + cm_remove_fs_obj(&cm_dev->dev_obj); } static int __init ib_cm_init(void) @@ -3488,17 +3749,25 @@ static int __init ib_cm_init(void) idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); - cm.wq = create_workqueue("ib_cm"); - if (!cm.wq) + ret = class_register(&cm_class); + if (ret) return -ENOMEM; + cm.wq = create_workqueue("ib_cm"); + if (!cm.wq) { + ret = -ENOMEM; + goto error1; + } + ret = ib_register_client(&cm_client); if (ret) - goto error; + goto error2; return 0; -error: +error2: destroy_workqueue(cm.wq); +error1: + class_unregister(&cm_class); return ret; } @@ -3519,6 +3788,7 @@ static void __exit ib_cm_cleanup(void) } ib_unregister_client(&cm_client); + class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0751697ef98..1eff1b2c0e0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -488,7 +488,8 @@ void rdma_destroy_qp(struct rdma_cm_id *id) } EXPORT_SYMBOL(rdma_destroy_qp); -static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) +static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; @@ -514,13 +515,16 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) if (ret) goto out; + if (conn_param) + qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } -static int cma_modify_qp_rts(struct rdma_id_private *id_priv) +static int cma_modify_qp_rts(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; @@ -536,6 +540,8 @@ static int cma_modify_qp_rts(struct rdma_id_private *id_priv) if (ret) goto out; + if (conn_param) + qp_attr.max_rd_atomic = conn_param->initiator_depth; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); @@ -624,7 +630,8 @@ static inline int cma_zero_addr(struct sockaddr *addr) struct in6_addr *ip6; if (addr->sa_family == AF_INET) - return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_zeronet( + ((struct sockaddr_in *)addr)->sin_addr.s_addr); else { ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | @@ -634,7 +641,7 @@ static inline int cma_zero_addr(struct sockaddr *addr) static inline int cma_loopback_addr(struct sockaddr *addr) { - return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); } static inline int cma_any_addr(struct sockaddr *addr) @@ -866,11 +873,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) { int ret; - ret = cma_modify_qp_rtr(id_priv); + ret = cma_modify_qp_rtr(id_priv, NULL); if (ret) goto reject; - ret = cma_modify_qp_rts(id_priv); + ret = cma_modify_qp_rts(id_priv, NULL); if (ret) goto reject; @@ -1122,8 +1129,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) cm_id->cm_handler = cma_ib_handler; ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) + if (!ret) { + cma_enable_remove(conn_id); goto out; + } /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; @@ -1262,6 +1271,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct net_device *dev = NULL; struct rdma_cm_event event; int ret; + struct ib_device_attr attr; listen_id = cm_id->context; if (cma_disable_remove(listen_id, CMA_LISTEN)) @@ -1279,7 +1289,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, atomic_inc(&conn_id->dev_remove); conn_id->state = CMA_CONNECT; - dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; cma_enable_remove(conn_id); @@ -1311,10 +1321,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; *sin = iw_event->remote_addr; + ret = ib_query_device(conn_id->id.device, &attr); + if (ret) { + cma_enable_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; + event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; + event.param.conn.responder_resources = attr.max_qp_rd_atom; ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ @@ -2272,7 +2291,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; cm_id->remote_addr = *sin; - ret = cma_modify_qp_rtr(id_priv); + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; @@ -2335,25 +2354,15 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_rep_param rep; - struct ib_qp_attr qp_attr; - int qp_attr_mask, ret; - - if (id_priv->id.qp) { - ret = cma_modify_qp_rtr(id_priv); - if (ret) - goto out; + int ret; - qp_attr.qp_state = IB_QPS_RTS; - ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr, - &qp_attr_mask); - if (ret) - goto out; + ret = cma_modify_qp_rtr(id_priv, conn_param); + if (ret) + goto out; - qp_attr.max_rd_atomic = conn_param->initiator_depth; - ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); - if (ret) - goto out; - } + ret = cma_modify_qp_rts(id_priv, conn_param); + if (ret) + goto out; memset(&rep, 0, sizeof rep); rep.qp_num = id_priv->qp_num; @@ -2378,7 +2387,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; - ret = cma_modify_qp_rtr(id_priv); + ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; @@ -2598,11 +2607,9 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { - ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); + ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ - mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; - mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); *mgid = *(union ib_gid *) (mc_map + 4); } } diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index e8d5f6b6499..6c7aa59794d 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, static void ib_fmr_batch_release(struct ib_fmr_pool *pool) { int ret; - struct ib_pool_fmr *fmr; + struct ib_pool_fmr *fmr, *next; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); @@ -158,6 +158,20 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #endif } + /* + * The free_list may hold FMRs that have been put there + * because they haven't reached the max_remap count. + * Invalidate their mapping as well. + */ + list_for_each_entry_safe(fmr, next, &pool->free_list, list) { + if (fmr->remap_count == 0) + continue; + hlist_del_init(&fmr->cache_node); + fmr->remap_count = 0; + list_add_tail(&fmr->fmr->list, &fmr_list); + list_move(&fmr->list, &unmap_list); + } + list_splice(&pool->dirty_list, &unmap_list); INIT_LIST_HEAD(&pool->dirty_list); pool->dirty_len = 0; @@ -182,8 +196,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr) struct ib_fmr_pool *pool = pool_ptr; do { - if (pool->dirty_len >= pool->dirty_watermark || - atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { ib_fmr_batch_release(pool); atomic_inc(&pool->flush_ser); @@ -194,8 +207,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr) } set_current_state(TASK_INTERRUPTIBLE); - if (pool->dirty_len < pool->dirty_watermark && - atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && !kthread_should_stop()) schedule(); __set_current_state(TASK_RUNNING); @@ -369,11 +381,6 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { - if (fmr->remap_count) { - INIT_LIST_HEAD(&fmr_list); - list_add_tail(&fmr->fmr->list, &fmr_list); - ib_unmap_fmr(&fmr_list); - } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); @@ -511,8 +518,10 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) list_add_tail(&fmr->list, &pool->free_list); } else { list_add_tail(&fmr->list, &pool->dirty_list); - ++pool->dirty_len; - wake_up_process(pool->thread); + if (++pool->dirty_len >= pool->dirty_watermark) { + atomic_inc(&pool->req_ser); + wake_up_process(pool->thread); + } } } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 6f4287716ab..fbe16d5250a 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -701,7 +701,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD) + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) goto out; local = kmalloc(sizeof *local, GFP_ATOMIC); @@ -752,8 +753,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - mad_priv->mad.mad.mad_hdr.tid = - ((struct ib_mad *)smp)->mad_hdr.tid; + memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); recv_mad_agent = find_mad_agent(port_priv, &mad_priv->mad.mad); } @@ -1100,7 +1100,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); - mad_send_wr->retries = send_buf->retries; + mad_send_wr->max_retries = send_buf->retries; + mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -1931,15 +1933,6 @@ local: if (port_priv->device->process_mad) { int ret; - if (!response) { - printk(KERN_ERR PFX "No memory for response MAD\n"); - /* - * Is it better to assume that - * it wouldn't be processed ? - */ - goto out; - } - ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, @@ -2282,8 +2275,6 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - /* Empty local completion list as well */ - list_splice_init(&mad_agent_priv->local_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ @@ -2445,9 +2436,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; - if (!mad_send_wr->retries--) + if (!mad_send_wr->retries_left) return -ETIMEDOUT; + mad_send_wr->retries_left--; + mad_send_wr->send_buf.retries++; + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 9be5cc00a3a..8b75010016e 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -131,7 +131,8 @@ struct ib_mad_send_wr_private { struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; - int retries; + int max_retries; + int retries_left; int retry; int refcount; enum ib_wc_status status; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index d43bc62005b..a5e2a310f31 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -684,7 +684,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { adjust_last_ack(mad_send_wr, seg_num); - mad_send_wr->retries = mad_send_wr->send_buf.retries; + mad_send_wr->retries_left = mad_send_wr->max_retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1bc1fe60528..107f170c57c 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -73,11 +73,20 @@ struct mcast_device { }; enum mcast_state { - MCAST_IDLE, MCAST_JOINING, MCAST_MEMBER, + MCAST_ERROR, +}; + +enum mcast_group_state { + MCAST_IDLE, MCAST_BUSY, - MCAST_ERROR + MCAST_GROUP_ERROR, + MCAST_PKEY_EVENT +}; + +enum { + MCAST_INVALID_PKEY_INDEX = 0xFFFF }; struct mcast_member; @@ -93,9 +102,10 @@ struct mcast_group { struct mcast_member *last_join; int members[3]; atomic_t refcount; - enum mcast_state state; + enum mcast_group_state state; struct ib_sa_query *query; int query_id; + u16 pkey_index; }; struct mcast_member { @@ -378,9 +388,19 @@ static int fail_join(struct mcast_group *group, struct mcast_member *member, static void process_group_error(struct mcast_group *group) { struct mcast_member *member; - int ret; + int ret = 0; + u16 pkey_index; + + if (group->state == MCAST_PKEY_EVENT) + ret = ib_find_pkey(group->port->dev->device, + group->port->port_num, + be16_to_cpu(group->rec.pkey), &pkey_index); spin_lock_irq(&group->lock); + if (group->state == MCAST_PKEY_EVENT && !ret && + group->pkey_index == pkey_index) + goto out; + while (!list_empty(&group->active_list)) { member = list_entry(group->active_list.next, struct mcast_member, list); @@ -399,6 +419,7 @@ static void process_group_error(struct mcast_group *group) } group->rec.join_state = 0; +out: group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); } @@ -415,9 +436,9 @@ static void mcast_work_handler(struct work_struct *work) retest: spin_lock_irq(&group->lock); while (!list_empty(&group->pending_list) || - (group->state == MCAST_ERROR)) { + (group->state != MCAST_BUSY)) { - if (group->state == MCAST_ERROR) { + if (group->state != MCAST_BUSY) { spin_unlock_irq(&group->lock); process_group_error(group); goto retest; @@ -494,12 +515,19 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; + u16 pkey_index = MCAST_INVALID_PKEY_INDEX; if (status) process_join_error(group, status); else { + ib_find_pkey(group->port->dev->device, group->port->port_num, + be16_to_cpu(rec->pkey), &pkey_index); + spin_lock_irq(&group->port->lock); group->rec = *rec; + if (group->state == MCAST_BUSY && + group->pkey_index == MCAST_INVALID_PKEY_INDEX) + group->pkey_index = pkey_index; if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { rb_erase(&group->node, &group->port->table); mcast_insert(group->port, group, 1); @@ -539,6 +567,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port, group->port = port; group->rec.mgid = *mgid; + group->pkey_index = MCAST_INVALID_PKEY_INDEX; INIT_LIST_HEAD(&group->pending_list); INIT_LIST_HEAD(&group->active_list); INIT_WORK(&group->work, mcast_work_handler); @@ -707,7 +736,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_init_ah_from_mcmember); -static void mcast_groups_lost(struct mcast_port *port) +static void mcast_groups_event(struct mcast_port *port, + enum mcast_group_state state) { struct mcast_group *group; struct rb_node *node; @@ -721,7 +751,8 @@ static void mcast_groups_lost(struct mcast_port *port) atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } - group->state = MCAST_ERROR; + if (group->state != MCAST_GROUP_ERROR) + group->state = state; spin_unlock(&group->lock); } spin_unlock_irqrestore(&port->lock, flags); @@ -731,16 +762,20 @@ static void mcast_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct mcast_device *dev; + int index; dev = container_of(handler, struct mcast_device, event_handler); + index = event->element.port_num - dev->start_port; switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: - mcast_groups_lost(&dev->port[event->element.port_num - - dev->start_port]); + mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); + break; + case IB_EVENT_PKEY_CHANGE: + mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); break; default: break; diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 1cfc2984434..aff96bac49b 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -59,7 +59,8 @@ extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, u8 node_type, int port_num); /* - * Return 1 if the SMP should be handled by the local SMA/SM via process_mad + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad */ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, struct ib_device *device) @@ -71,4 +72,19 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD); } + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp, + struct ib_device *device) +{ + /* C14-13:3 -- We're at the end of the DR segment of path */ + /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ + return ((device->process_mad && + ib_get_smp_direction(smp) && + !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD); +} + #endif /* __SMI_H_ */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 3d405068132..c864ef70fdf 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -508,19 +508,10 @@ static int add_port(struct ib_device *device, int port_num) p->ibdev = device; p->port_num = port_num; - p->kobj.ktype = &port_type; - p->kobj.parent = kobject_get(&device->ports_parent); - if (!p->kobj.parent) { - ret = -EBUSY; - goto err; - } - - ret = kobject_set_name(&p->kobj, "%d", port_num); - if (ret) - goto err_put; - - ret = kobject_register(&p->kobj); + ret = kobject_init_and_add(&p->kobj, &port_type, + kobject_get(device->ports_parent), + "%d", port_num); if (ret) goto err_put; @@ -549,6 +540,7 @@ static int add_port(struct ib_device *device, int port_num) list_add_tail(&p->kobj.entry, &device->port_list); + kobject_uevent(&p->kobj, KOBJ_ADD); return 0; err_free_pkey: @@ -570,9 +562,7 @@ err_remove_pma: sysfs_remove_group(&p->kobj, &pma_group); err_put: - kobject_put(&device->ports_parent); - -err: + kobject_put(device->ports_parent); kfree(p); return ret; } @@ -694,16 +684,9 @@ int ib_device_register_sysfs(struct ib_device *device) goto err_unregister; } - device->ports_parent.parent = kobject_get(&class_dev->kobj); - if (!device->ports_parent.parent) { - ret = -EBUSY; - goto err_unregister; - } - ret = kobject_set_name(&device->ports_parent, "ports"); - if (ret) - goto err_put; - ret = kobject_register(&device->ports_parent); - if (ret) + device->ports_parent = kobject_create_and_add("ports", + kobject_get(&class_dev->kobj)); + if (!device->ports_parent) goto err_put; if (device->node_type == RDMA_NODE_IB_SWITCH) { @@ -731,7 +714,7 @@ err_put: sysfs_remove_group(p, &pma_group); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); - kobject_unregister(p); + kobject_put(p); } } @@ -755,10 +738,10 @@ void ib_device_unregister_sysfs(struct ib_device *device) sysfs_remove_group(p, &pma_group); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); - kobject_unregister(p); + kobject_put(p); } - kobject_unregister(&device->ports_parent); + kobject_put(device->ports_parent); class_device_unregister(&device->class_dev); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 424983f5b1e..4291ab42a5b 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -106,6 +106,9 @@ enum { IB_UCM_MAX_DEVICES = 32 }; +/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ +extern struct class cm_class; + #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); @@ -1199,7 +1202,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static void ib_ucm_release_class_dev(struct class_device *class_dev) +static void ucm_release_class_dev(struct class_device *class_dev) { struct ib_ucm_device *dev; @@ -1217,11 +1220,6 @@ static const struct file_operations ucm_fops = { .poll = ib_ucm_poll, }; -static struct class ucm_class = { - .name = "infiniband_cm", - .release = ib_ucm_release_class_dev -}; - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_ucm_device *dev; @@ -1257,9 +1255,10 @@ static void ib_ucm_add_one(struct ib_device *device) if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) goto err; - ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.class = &cm_class; ucm_dev->class_dev.dev = device->dma_device; ucm_dev->class_dev.devt = ucm_dev->dev.dev; + ucm_dev->class_dev.release = ucm_release_class_dev; snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", ucm_dev->devnum); if (class_device_register(&ucm_dev->class_dev)) @@ -1306,40 +1305,34 @@ static int __init ib_ucm_init(void) "infiniband_cm"); if (ret) { printk(KERN_ERR "ucm: couldn't register device number\n"); - goto err; + goto error1; } - ret = class_register(&ucm_class); - if (ret) { - printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); - goto err_chrdev; - } - - ret = class_create_file(&ucm_class, &class_attr_abi_version); + ret = class_create_file(&cm_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); - goto err_class; + goto error2; } ret = ib_register_client(&ucm_client); if (ret) { printk(KERN_ERR "ucm: couldn't register client\n"); - goto err_class; + goto error3; } return 0; -err_class: - class_unregister(&ucm_class); -err_chrdev: +error3: + class_remove_file(&cm_class, &class_attr_abi_version); +error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); -err: +error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); - class_unregister(&ucm_class); + class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 90d675ad9ec..15937eb38aa 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -31,6 +31,7 @@ */ #include <linux/completion.h> +#include <linux/file.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/idr.h> @@ -991,6 +992,96 @@ out: return ret; } +static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + /* Acquire mutex's based on pointer comparison to prevent deadlock. */ + if (file1 < file2) { + mutex_lock(&file1->mut); + mutex_lock(&file2->mut); + } else { + mutex_lock(&file2->mut); + mutex_lock(&file1->mut); + } +} + +static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + if (file1 < file2) { + mutex_unlock(&file2->mut); + mutex_unlock(&file1->mut); + } else { + mutex_unlock(&file1->mut); + mutex_unlock(&file2->mut); + } +} + +static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &file->event_list); +} + +static ssize_t ucma_migrate_id(struct ucma_file *new_file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_migrate_id cmd; + struct rdma_ucm_migrate_resp resp; + struct ucma_context *ctx; + struct file *filp; + struct ucma_file *cur_file; + int ret = 0; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + /* Get current fd to protect against it being closed */ + filp = fget(cmd.fd); + if (!filp) + return -ENOENT; + + /* Validate current fd and prevent destruction of id. */ + ctx = ucma_get_ctx(filp->private_data, cmd.id); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto file_put; + } + + cur_file = ctx->file; + if (cur_file == new_file) { + resp.events_reported = ctx->events_reported; + goto response; + } + + /* + * Migrate events between fd's, maintaining order, and avoiding new + * events being added before existing events. + */ + ucma_lock_files(cur_file, new_file); + mutex_lock(&mut); + + list_move_tail(&ctx->list, &new_file->ctx_list); + ucma_move_events(ctx, new_file); + ctx->file = new_file; + resp.events_reported = ctx->events_reported; + + mutex_unlock(&mut); + ucma_unlock_files(cur_file, new_file); + +response: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); +file_put: + fput(filp); + return ret; +} + static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { @@ -1012,6 +1103,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b53eac4611d..4e915104ac4 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -2,6 +2,7 @@ * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008 Cisco. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -42,7 +43,7 @@ #include <linux/cdev.h> #include <linux/dma-mapping.h> #include <linux/poll.h> -#include <linux/rwsem.h> +#include <linux/mutex.h> #include <linux/kref.h> #include <linux/compat.h> @@ -94,7 +95,7 @@ struct ib_umad_port { struct class_device *sm_class_dev; struct semaphore sm_sem; - struct rw_semaphore mutex; + struct mutex file_mutex; struct list_head file_list; struct ib_device *ib_dev; @@ -110,11 +111,11 @@ struct ib_umad_device { }; struct ib_umad_file { + struct mutex mutex; struct ib_umad_port *port; struct list_head recv_list; struct list_head send_list; struct list_head port_list; - spinlock_t recv_lock; spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; @@ -156,7 +157,7 @@ static int hdr_size(struct ib_umad_file *file) sizeof (struct ib_user_mad_hdr_old); } -/* caller must hold port->mutex at least for reading */ +/* caller must hold file->mutex */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { return file->agents_dead ? NULL : file->agent[id]; @@ -168,32 +169,30 @@ static int queue_packet(struct ib_umad_file *file, { int ret = 1; - down_read(&file->port->mutex); + mutex_lock(&file->mutex); for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { - spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); - spin_unlock_irq(&file->recv_lock); wake_up_interruptible(&file->recv_wait); ret = 0; break; } - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); return ret; } static void dequeue_send(struct ib_umad_file *file, struct ib_umad_packet *packet) - { +{ spin_lock_irq(&file->send_lock); list_del(&packet->list); spin_unlock_irq(&file->send_lock); - } +} static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) @@ -341,10 +340,10 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, if (count < hdr_size(file)) return -EINVAL; - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); while (list_empty(&file->recv_list)) { - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; @@ -353,13 +352,13 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, !list_empty(&file->recv_list))) return -ERESTARTSYS; - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); } packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); if (packet->recv_wc) ret = copy_recv_mad(file, buf, packet, count); @@ -368,9 +367,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, if (ret < 0) { /* Requeue packet */ - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); } else { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); @@ -481,7 +480,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - down_read(&file->port->mutex); + mutex_lock(&file->mutex); agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { @@ -577,7 +576,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (ret) goto err_send; - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); return count; err_send: @@ -587,7 +586,7 @@ err_msg: err_ah: ib_destroy_ah(ah); err_up: - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); err: kfree(packet); return ret; @@ -613,11 +612,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; - struct ib_mad_agent *agent; + struct ib_mad_agent *agent = NULL; int agent_id; int ret; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); if (!file->port->ib_dev) { ret = -EPIPE; @@ -666,13 +666,13 @@ found: send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); + agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; - ib_unregister_mad_agent(agent); goto out; } @@ -690,7 +690,13 @@ found: ret = 0; out: - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + return ret; } @@ -703,7 +709,8 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) if (get_user(id, arg)) return -EFAULT; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; @@ -714,11 +721,13 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) file->agent[id] = NULL; out: - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); if (agent) ib_unregister_mad_agent(agent); + mutex_unlock(&file->port->file_mutex); + return ret; } @@ -726,12 +735,12 @@ static long ib_umad_enable_pkey(struct ib_umad_file *file) { int ret = 0; - down_write(&file->port->mutex); + mutex_lock(&file->mutex); if (file->already_used) ret = -EINVAL; else file->use_pkey_index = 1; - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); return ret; } @@ -783,7 +792,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) if (!port) return -ENXIO; - down_write(&port->mutex); + mutex_lock(&port->file_mutex); if (!port->ib_dev) { ret = -ENXIO; @@ -797,7 +806,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) goto out; } - spin_lock_init(&file->recv_lock); + mutex_init(&file->mutex); spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); INIT_LIST_HEAD(&file->send_list); @@ -809,7 +818,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); out: - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); return ret; } @@ -821,7 +830,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp) int already_dead; int i; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; @@ -834,14 +844,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp) list_del(&file->port_list); - downgrade_write(&file->port->mutex); + mutex_unlock(&file->mutex); if (!already_dead) for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) if (file->agent[i]) ib_unregister_mad_agent(file->agent[i]); - up_read(&file->port->mutex); + mutex_unlock(&file->port->file_mutex); kfree(file); kref_put(&dev->ref, ib_umad_release_dev); @@ -914,10 +924,10 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) }; int ret = 0; - down_write(&port->mutex); + mutex_lock(&port->file_mutex); if (port->ib_dev) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); up(&port->sm_sem); @@ -981,7 +991,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); - init_rwsem(&port->mutex); + mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); port->dev = cdev_alloc(); @@ -1052,6 +1062,7 @@ err_cdev: static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; + int already_dead; int id; class_set_devdata(port->class_dev, NULL); @@ -1067,42 +1078,22 @@ static void ib_umad_kill_port(struct ib_umad_port *port) umad_port[port->dev_num] = NULL; spin_unlock(&port_lock); - down_write(&port->mutex); + mutex_lock(&port->file_mutex); port->ib_dev = NULL; - /* - * Now go through the list of files attached to this port and - * unregister all of their MAD agents. We need to hold - * port->mutex while doing this to avoid racing with - * ib_umad_close(), but we can't hold the mutex for writing - * while calling ib_unregister_mad_agent(), since that might - * deadlock by calling back into queue_packet(). So we - * downgrade our lock to a read lock, and then drop and - * reacquire the write lock for the next iteration. - * - * We do list_del_init() on the file's list_head so that the - * list_del in ib_umad_close() is still OK, even after the - * file is removed from the list. - */ - while (!list_empty(&port->file_list)) { - file = list_entry(port->file_list.next, struct ib_umad_file, - port_list); - + list_for_each_entry(file, &port->file_list, port_list) { + mutex_lock(&file->mutex); + already_dead = file->agents_dead; file->agents_dead = 1; - list_del_init(&file->port_list); - - downgrade_write(&port->mutex); + mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) if (file->agent[id]) ib_unregister_mad_agent(file->agent[id]); - - up_read(&port->mutex); - down_write(&port->mutex); } - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); clear_bit(port->dev_num, dev_map); } |