aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2008-01-31 11:25:51 +1100
committerPaul Mackerras <paulus@samba.org>2008-01-31 11:25:51 +1100
commitbd45ac0c5daae35e7c71138172e63df5cf644cf6 (patch)
tree5eb5a599bf6a9d7a8a34e802db932aa9e9555de4 /drivers/infiniband/core
parent4eece4ccf997c0e6d8fdad3d842e37b16b8d705f (diff)
parent5bdeae46be6dfe9efa44a548bd622af325f4bdb4 (diff)
Merge branch 'linux-2.6'
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/addr.c12
-rw-r--r--drivers/infiniband/core/cm.c306
-rw-r--r--drivers/infiniband/core/cma.c67
-rw-r--r--drivers/infiniband/core/fmr_pool.c33
-rw-r--r--drivers/infiniband/core/mad.c26
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--drivers/infiniband/core/multicast.c55
-rw-r--r--drivers/infiniband/core/smi.h18
-rw-r--r--drivers/infiniband/core/sysfs.c39
-rw-r--r--drivers/infiniband/core/ucm.c37
-rw-r--r--drivers/infiniband/core/ucma.c92
-rw-r--r--drivers/infiniband/core/user_mad.c115
13 files changed, 598 insertions, 207 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 5381c80de10..a58ad8a470f 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -110,7 +110,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
int ret;
- dev = ip_dev_find(ip);
+ dev = ip_dev_find(&init_net, ip);
if (!dev)
return -EADDRNOTAVAIL;
@@ -158,7 +158,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
- if (ip_route_output_key(&rt, &fl))
+ if (ip_route_output_key(&init_net, &rt, &fl))
return;
neigh_event_send(rt->u.dst.neighbour, NULL);
@@ -179,7 +179,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
- ret = ip_route_output_key(&rt, &fl);
+ ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
@@ -261,15 +261,15 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
__be32 dst_ip = dst_in->sin_addr.s_addr;
int ret;
- dev = ip_dev_find(dst_ip);
+ dev = ip_dev_find(&init_net, dst_ip);
if (!dev)
return -EADDRNOTAVAIL;
- if (ZERONET(src_ip)) {
+ if (ipv4_is_zeronet(src_ip)) {
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = dst_ip;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (LOOPBACK(src_ip)) {
+ } else if (ipv4_is_loopback(src_ip)) {
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2e39236d189..c0150147d34 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2006 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -37,12 +37,14 @@
#include <linux/completion.h>
#include <linux/dma-mapping.h>
+#include <linux/device.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
+#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <rdma/ib_cache.h>
@@ -78,17 +80,94 @@ static struct ib_cm {
struct workqueue_struct *wq;
} cm;
+/* Counter indexes ordered by attribute ID */
+enum {
+ CM_REQ_COUNTER,
+ CM_MRA_COUNTER,
+ CM_REJ_COUNTER,
+ CM_REP_COUNTER,
+ CM_RTU_COUNTER,
+ CM_DREQ_COUNTER,
+ CM_DREP_COUNTER,
+ CM_SIDR_REQ_COUNTER,
+ CM_SIDR_REP_COUNTER,
+ CM_LAP_COUNTER,
+ CM_APR_COUNTER,
+ CM_ATTR_COUNT,
+ CM_ATTR_ID_OFFSET = 0x0010,
+};
+
+enum {
+ CM_XMIT,
+ CM_XMIT_RETRIES,
+ CM_RECV,
+ CM_RECV_DUPLICATES,
+ CM_COUNTER_GROUPS
+};
+
+static char const counter_group_names[CM_COUNTER_GROUPS]
+ [sizeof("cm_rx_duplicates")] = {
+ "cm_tx_msgs", "cm_tx_retries",
+ "cm_rx_msgs", "cm_rx_duplicates"
+};
+
+struct cm_counter_group {
+ struct kobject obj;
+ atomic_long_t counter[CM_ATTR_COUNT];
+};
+
+struct cm_counter_attribute {
+ struct attribute attr;
+ int index;
+};
+
+#define CM_COUNTER_ATTR(_name, _index) \
+struct cm_counter_attribute cm_##_name##_counter_attr = { \
+ .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
+ .index = _index \
+}
+
+static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
+static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
+static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
+static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
+static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
+static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
+static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
+static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
+static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
+static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
+static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
+
+static struct attribute *cm_counter_default_attrs[] = {
+ &cm_req_counter_attr.attr,
+ &cm_mra_counter_attr.attr,
+ &cm_rej_counter_attr.attr,
+ &cm_rep_counter_attr.attr,
+ &cm_rtu_counter_attr.attr,
+ &cm_dreq_counter_attr.attr,
+ &cm_drep_counter_attr.attr,
+ &cm_sidr_req_counter_attr.attr,
+ &cm_sidr_rep_counter_attr.attr,
+ &cm_lap_counter_attr.attr,
+ &cm_apr_counter_attr.attr,
+ NULL
+};
+
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
+ struct kobject port_obj;
u8 port_num;
+ struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
};
struct cm_device {
struct list_head list;
struct ib_device *device;
+ struct kobject dev_obj;
u8 ack_delay;
- struct cm_port port[0];
+ struct cm_port *port[0];
};
struct cm_av {
@@ -278,7 +357,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->device, &path->sgid,
&p, NULL)) {
- port = &cm_dev->port[p-1];
+ port = cm_dev->port[p-1];
break;
}
}
@@ -1270,6 +1349,9 @@ static void cm_dup_req_handler(struct cm_work *work,
struct ib_mad_send_buf *msg = NULL;
int ret;
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_REQ_COUNTER]);
+
/* Quick state check to discard duplicate REQs. */
if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
return;
@@ -1616,6 +1698,8 @@ static void cm_dup_rep_handler(struct cm_work *work)
if (!cm_id_priv)
return;
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_REP_COUNTER]);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
if (ret)
goto deref;
@@ -1781,6 +1865,8 @@ static int cm_rtu_handler(struct cm_work *work)
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
spin_unlock_irq(&cm_id_priv->lock);
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_RTU_COUNTER]);
goto out;
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1958,6 +2044,8 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
dreq_msg->local_comm_id);
if (!cm_id_priv) {
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
return -EINVAL;
}
@@ -1977,6 +2065,8 @@ static int cm_dreq_handler(struct cm_work *work)
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_DREQ_COUNTER]);
if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
goto unlock;
@@ -1988,6 +2078,10 @@ static int cm_dreq_handler(struct cm_work *work)
if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
+ case IB_CM_DREQ_RCVD:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_DREQ_COUNTER]);
+ goto unlock;
default:
goto unlock;
}
@@ -2339,10 +2433,20 @@ static int cm_mra_handler(struct cm_work *work)
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout)) {
+ if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ atomic_long_inc(&work->port->
+ counter_group[CM_RECV_DUPLICATES].
+ counter[CM_MRA_COUNTER]);
goto out;
+ }
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_MRA_REP_RCVD:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_MRA_COUNTER]);
+ /* fall through */
default:
goto out;
}
@@ -2502,6 +2606,8 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_LAP_IDLE:
break;
case IB_CM_MRA_LAP_SENT:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_LAP_COUNTER]);
if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
goto unlock;
@@ -2515,6 +2621,10 @@ static int cm_lap_handler(struct cm_work *work)
if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
+ case IB_CM_LAP_RCVD:
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_LAP_COUNTER]);
+ goto unlock;
default:
goto unlock;
}
@@ -2796,6 +2906,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
if (cur_cm_id_priv) {
spin_unlock_irq(&cm.lock);
+ atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
+ counter[CM_SIDR_REQ_COUNTER]);
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
@@ -2990,6 +3102,27 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_port *port;
+ u16 attr_index;
+
+ port = mad_agent->context;
+ attr_index = be16_to_cpu(((struct ib_mad_hdr *)
+ msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
+
+ /*
+ * If the send was in response to a received message (context[0] is not
+ * set to a cm_id), and is not a REJ, then it is a send that was
+ * manually retried.
+ */
+ if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ msg->retries = 1;
+
+ atomic_long_add(1 + msg->retries,
+ &port->counter_group[CM_XMIT].counter[attr_index]);
+ if (msg->retries)
+ atomic_long_add(msg->retries,
+ &port->counter_group[CM_XMIT_RETRIES].
+ counter[attr_index]);
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
@@ -3148,8 +3281,10 @@ EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
+ struct cm_port *port = mad_agent->context;
struct cm_work *work;
enum ib_cm_event_type event;
+ u16 attr_id;
int paths = 0;
switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
@@ -3194,6 +3329,10 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
return;
}
+ attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
+ atomic_long_inc(&port->counter_group[CM_RECV].
+ counter[attr_id - CM_ATTR_ID_OFFSET]);
+
work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
GFP_KERNEL);
if (!work) {
@@ -3204,7 +3343,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
INIT_DELAYED_WORK(&work->work, cm_work_handler);
work->cm_event.event = event;
work->mad_recv_wc = mad_recv_wc;
- work->port = (struct cm_port *)mad_agent->context;
+ work->port = port;
queue_delayed_work(cm.wq, &work->work, 0);
}
@@ -3379,6 +3518,108 @@ static void cm_get_ack_delay(struct cm_device *cm_dev)
cm_dev->ack_delay = attr.local_ca_ack_delay;
}
+static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
+ char *buf)
+{
+ struct cm_counter_group *group;
+ struct cm_counter_attribute *cm_attr;
+
+ group = container_of(obj, struct cm_counter_group, obj);
+ cm_attr = container_of(attr, struct cm_counter_attribute, attr);
+
+ return sprintf(buf, "%ld\n",
+ atomic_long_read(&group->counter[cm_attr->index]));
+}
+
+static struct sysfs_ops cm_counter_ops = {
+ .show = cm_show_counter
+};
+
+static struct kobj_type cm_counter_obj_type = {
+ .sysfs_ops = &cm_counter_ops,
+ .default_attrs = cm_counter_default_attrs
+};
+
+static void cm_release_port_obj(struct kobject *obj)
+{
+ struct cm_port *cm_port;
+
+ printk(KERN_ERR "free cm port\n");
+
+ cm_port = container_of(obj, struct cm_port, port_obj);
+ kfree(cm_port);
+}
+
+static struct kobj_type cm_port_obj_type = {
+ .release = cm_release_port_obj
+};
+
+static void cm_release_dev_obj(struct kobject *obj)
+{
+ struct cm_device *cm_dev;
+
+ printk(KERN_ERR "free cm dev\n");
+
+ cm_dev = container_of(obj, struct cm_device, dev_obj);
+ kfree(cm_dev);
+}
+
+static struct kobj_type cm_dev_obj_type = {
+ .release = cm_release_dev_obj
+};
+
+struct class cm_class = {
+ .name = "infiniband_cm",
+};
+EXPORT_SYMBOL(cm_class);
+
+static void cm_remove_fs_obj(struct kobject *obj)
+{
+ kobject_put(obj->parent);
+ kobject_put(obj);
+}
+
+static int cm_create_port_fs(struct cm_port *port)
+{
+ int i, ret;
+
+ ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
+ kobject_get(&port->cm_dev->dev_obj),
+ "%d", port->port_num);
+ if (ret) {
+ kfree(port);
+ return ret;
+ }
+
+ for (i = 0; i < CM_COUNTER_GROUPS; i++) {
+ ret = kobject_init_and_add(&port->counter_group[i].obj,
+ &cm_counter_obj_type,
+ kobject_get(&port->port_obj),
+ "%s", counter_group_names[i]);
+ if (ret)
+ goto error;
+ }
+
+ return 0;
+
+error:
+ while (i--)
+ cm_remove_fs_obj(&port->counter_group[i].obj);
+ cm_remove_fs_obj(&port->port_obj);
+ return ret;
+
+}
+
+static void cm_remove_port_fs(struct cm_port *port)
+{
+ int i;
+
+ for (i = 0; i < CM_COUNTER_GROUPS; i++)
+ cm_remove_fs_obj(&port->counter_group[i].obj);
+
+ cm_remove_fs_obj(&port->port_obj);
+}
+
static void cm_add_one(struct ib_device *device)
{
struct cm_device *cm_dev;
@@ -3397,7 +3638,7 @@ static void cm_add_one(struct ib_device *device)
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
- cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
+ cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
device->phys_port_cnt, GFP_KERNEL);
if (!cm_dev)
return;
@@ -3405,11 +3646,27 @@ static void cm_add_one(struct ib_device *device)
cm_dev->device = device;
cm_get_ack_delay(cm_dev);
+ ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type,
+ &cm_class.subsys.kobj, "%s", device->name);
+ if (ret) {
+ kfree(cm_dev);
+ return;
+ }
+
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
for (i = 1; i <= device->phys_port_cnt; i++) {
- port = &cm_dev->port[i-1];
+ port = kzalloc(sizeof *port, GFP_KERNEL);
+ if (!port)
+ goto error1;
+
+ cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
port->port_num = i;
+
+ ret = cm_create_port_fs(port);
+ if (ret)
+ goto error1;
+
port->mad_agent = ib_register_mad_agent(device, i,
IB_QPT_GSI,
&reg_req,
@@ -3418,11 +3675,11 @@ static void cm_add_one(struct ib_device *device)
cm_recv_handler,
port);
if (IS_ERR(port->mad_agent))
- goto error1;
+ goto error2;
ret = ib_modify_port(device, i, 0, &port_modify);
if (ret)
- goto error2;
+ goto error3;
}
ib_set_client_data(device, &cm_client, cm_dev);
@@ -3431,17 +3688,20 @@ static void cm_add_one(struct ib_device *device)
write_unlock_irqrestore(&cm.device_lock, flags);
return;
-error2:
+error3:
ib_unregister_mad_agent(port->mad_agent);
+error2:
+ cm_remove_port_fs(port);
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
while (--i) {
- port = &cm_dev->port[i-1];
+ port = cm_dev->port[i-1];
ib_modify_port(device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
+ cm_remove_port_fs(port);
}
- kfree(cm_dev);
+ cm_remove_fs_obj(&cm_dev->dev_obj);
}
static void cm_remove_one(struct ib_device *device)
@@ -3463,11 +3723,12 @@ static void cm_remove_one(struct ib_device *device)
write_unlock_irqrestore(&cm.device_lock, flags);
for (i = 1; i <= device->phys_port_cnt; i++) {
- port = &cm_dev->port[i-1];
+ port = cm_dev->port[i-1];
ib_modify_port(device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
+ cm_remove_port_fs(port);
}
- kfree(cm_dev);
+ cm_remove_fs_obj(&cm_dev->dev_obj);
}
static int __init ib_cm_init(void)
@@ -3488,17 +3749,25 @@ static int __init ib_cm_init(void)
idr_pre_get(&cm.local_id_table, GFP_KERNEL);
INIT_LIST_HEAD(&cm.timewait_list);
- cm.wq = create_workqueue("ib_cm");
- if (!cm.wq)
+ ret = class_register(&cm_class);
+ if (ret)
return -ENOMEM;
+ cm.wq = create_workqueue("ib_cm");
+ if (!cm.wq) {
+ ret = -ENOMEM;
+ goto error1;
+ }
+
ret = ib_register_client(&cm_client);
if (ret)
- goto error;
+ goto error2;
return 0;
-error:
+error2:
destroy_workqueue(cm.wq);
+error1:
+ class_unregister(&cm_class);
return ret;
}
@@ -3519,6 +3788,7 @@ static void __exit ib_cm_cleanup(void)
}
ib_unregister_client(&cm_client);
+ class_unregister(&cm_class);
idr_destroy(&cm.local_id_table);
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0751697ef98..1eff1b2c0e0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -488,7 +488,8 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
}
EXPORT_SYMBOL(rdma_destroy_qp);
-static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
+static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -514,13 +515,16 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
if (ret)
goto out;
+ if (conn_param)
+ qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
}
-static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
+static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -536,6 +540,8 @@ static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
if (ret)
goto out;
+ if (conn_param)
+ qp_attr.max_rd_atomic = conn_param->initiator_depth;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
@@ -624,7 +630,8 @@ static inline int cma_zero_addr(struct sockaddr *addr)
struct in6_addr *ip6;
if (addr->sa_family == AF_INET)
- return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ return ipv4_is_zeronet(
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr);
else {
ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
@@ -634,7 +641,7 @@ static inline int cma_zero_addr(struct sockaddr *addr)
static inline int cma_loopback_addr(struct sockaddr *addr)
{
- return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
}
static inline int cma_any_addr(struct sockaddr *addr)
@@ -866,11 +873,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
{
int ret;
- ret = cma_modify_qp_rtr(id_priv);
+ ret = cma_modify_qp_rtr(id_priv, NULL);
if (ret)
goto reject;
- ret = cma_modify_qp_rts(id_priv);
+ ret = cma_modify_qp_rts(id_priv, NULL);
if (ret)
goto reject;
@@ -1122,8 +1129,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
cm_id->cm_handler = cma_ib_handler;
ret = conn_id->id.event_handler(&conn_id->id, &event);
- if (!ret)
+ if (!ret) {
+ cma_enable_remove(conn_id);
goto out;
+ }
/* Destroy the CM ID by returning a non-zero value. */
conn_id->cm_id.ib = NULL;
@@ -1262,6 +1271,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
+ struct ib_device_attr attr;
listen_id = cm_id->context;
if (cma_disable_remove(listen_id, CMA_LISTEN))
@@ -1279,7 +1289,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
atomic_inc(&conn_id->dev_remove);
conn_id->state = CMA_CONNECT;
- dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+ dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
if (!dev) {
ret = -EADDRNOTAVAIL;
cma_enable_remove(conn_id);
@@ -1311,10 +1321,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
*sin = iw_event->remote_addr;
+ ret = ib_query_device(conn_id->id.device, &attr);
+ if (ret) {
+ cma_enable_remove(conn_id);
+ rdma_destroy_id(new_cm_id);
+ goto out;
+ }
+
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
+ event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
+ event.param.conn.responder_resources = attr.max_qp_rd_atom;
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret) {
/* User wants to destroy the CM ID */
@@ -2272,7 +2291,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
cm_id->remote_addr = *sin;
- ret = cma_modify_qp_rtr(id_priv);
+ ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
goto out;
@@ -2335,25 +2354,15 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_cm_rep_param rep;
- struct ib_qp_attr qp_attr;
- int qp_attr_mask, ret;
-
- if (id_priv->id.qp) {
- ret = cma_modify_qp_rtr(id_priv);
- if (ret)
- goto out;
+ int ret;
- qp_attr.qp_state = IB_QPS_RTS;
- ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr,
- &qp_attr_mask);
- if (ret)
- goto out;
+ ret = cma_modify_qp_rtr(id_priv, conn_param);
+ if (ret)
+ goto out;
- qp_attr.max_rd_atomic = conn_param->initiator_depth;
- ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
- if (ret)
- goto out;
- }
+ ret = cma_modify_qp_rts(id_priv, conn_param);
+ if (ret)
+ goto out;
memset(&rep, 0, sizeof rep);
rep.qp_num = id_priv->qp_num;
@@ -2378,7 +2387,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
- ret = cma_modify_qp_rtr(id_priv);
+ ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -2598,11 +2607,9 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+ ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
- mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
- mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
*mgid = *(union ib_gid *) (mc_map + 4);
}
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index e8d5f6b6499..6c7aa59794d 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
{
int ret;
- struct ib_pool_fmr *fmr;
+ struct ib_pool_fmr *fmr, *next;
LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
@@ -158,6 +158,20 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
#endif
}
+ /*
+ * The free_list may hold FMRs that have been put there
+ * because they haven't reached the max_remap count.
+ * Invalidate their mapping as well.
+ */
+ list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
+ if (fmr->remap_count == 0)
+ continue;
+ hlist_del_init(&fmr->cache_node);
+ fmr->remap_count = 0;
+ list_add_tail(&fmr->fmr->list, &fmr_list);
+ list_move(&fmr->list, &unmap_list);
+ }
+
list_splice(&pool->dirty_list, &unmap_list);
INIT_LIST_HEAD(&pool->dirty_list);
pool->dirty_len = 0;
@@ -182,8 +196,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr)
struct ib_fmr_pool *pool = pool_ptr;
do {
- if (pool->dirty_len >= pool->dirty_watermark ||
- atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
ib_fmr_batch_release(pool);
atomic_inc(&pool->flush_ser);
@@ -194,8 +207,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr)
}
set_current_state(TASK_INTERRUPTIBLE);
- if (pool->dirty_len < pool->dirty_watermark &&
- atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
@@ -369,11 +381,6 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
i = 0;
list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
- if (fmr->remap_count) {
- INIT_LIST_HEAD(&fmr_list);
- list_add_tail(&fmr->fmr->list, &fmr_list);
- ib_unmap_fmr(&fmr_list);
- }
ib_dealloc_fmr(fmr->fmr);
list_del(&fmr->list);
kfree(fmr);
@@ -511,8 +518,10 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
list_add_tail(&fmr->list, &pool->free_list);
} else {
list_add_tail(&fmr->list, &pool->dirty_list);
- ++pool->dirty_len;
- wake_up_process(pool->thread);
+ if (++pool->dirty_len >= pool->dirty_watermark) {
+ atomic_inc(&pool->req_ser);
+ wake_up_process(pool->thread);
+ }
}
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 6f4287716ab..fbe16d5250a 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -701,7 +701,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
/* Check to post send on QP or process locally */
- if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD)
+ if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
+ smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
goto out;
local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -752,8 +753,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
mad_agent_priv->agent.port_num);
if (port_priv) {
- mad_priv->mad.mad.mad_hdr.tid =
- ((struct ib_mad *)smp)->mad_hdr.tid;
+ memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
recv_mad_agent = find_mad_agent(port_priv,
&mad_priv->mad.mad);
}
@@ -1100,7 +1100,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
/* Timeout will be updated after send completes */
mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
- mad_send_wr->retries = send_buf->retries;
+ mad_send_wr->max_retries = send_buf->retries;
+ mad_send_wr->retries_left = send_buf->retries;
+ send_buf->retries = 0;
/* Reference for work request to QP + response */
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
@@ -1931,15 +1933,6 @@ local:
if (port_priv->device->process_mad) {
int ret;
- if (!response) {
- printk(KERN_ERR PFX "No memory for response MAD\n");
- /*
- * Is it better to assume that
- * it wouldn't be processed ?
- */
- goto out;
- }
-
ret = port_priv->device->process_mad(port_priv->device, 0,
port_priv->port_num,
wc, &recv->grh,
@@ -2282,8 +2275,6 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
/* Empty wait list to prevent receives from finding a request */
list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
- /* Empty local completion list as well */
- list_splice_init(&mad_agent_priv->local_list, &cancel_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Report all cancelled requests */
@@ -2445,9 +2436,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
{
int ret;
- if (!mad_send_wr->retries--)
+ if (!mad_send_wr->retries_left)
return -ETIMEDOUT;
+ mad_send_wr->retries_left--;
+ mad_send_wr->send_buf.retries++;
+
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9be5cc00a3a..8b75010016e 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -131,7 +131,8 @@ struct ib_mad_send_wr_private {
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
unsigned long timeout;
- int retries;
+ int max_retries;
+ int retries_left;
int retry;
int refcount;
enum ib_wc_status status;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index d43bc62005b..a5e2a310f31 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -684,7 +684,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (seg_num > mad_send_wr->last_ack) {
adjust_last_ack(mad_send_wr, seg_num);
- mad_send_wr->retries = mad_send_wr->send_buf.retries;
+ mad_send_wr->retries_left = mad_send_wr->max_retries;
}
mad_send_wr->newwin = newwin;
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 1bc1fe60528..107f170c57c 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -73,11 +73,20 @@ struct mcast_device {
};
enum mcast_state {
- MCAST_IDLE,
MCAST_JOINING,
MCAST_MEMBER,
+ MCAST_ERROR,
+};
+
+enum mcast_group_state {
+ MCAST_IDLE,
MCAST_BUSY,
- MCAST_ERROR
+ MCAST_GROUP_ERROR,
+ MCAST_PKEY_EVENT
+};
+
+enum {
+ MCAST_INVALID_PKEY_INDEX = 0xFFFF
};
struct mcast_member;
@@ -93,9 +102,10 @@ struct mcast_group {
struct mcast_member *last_join;
int members[3];
atomic_t refcount;
- enum mcast_state state;
+ enum mcast_group_state state;
struct ib_sa_query *query;
int query_id;
+ u16 pkey_index;
};
struct mcast_member {
@@ -378,9 +388,19 @@ static int fail_join(struct mcast_group *group, struct mcast_member *member,
static void process_group_error(struct mcast_group *group)
{
struct mcast_member *member;
- int ret;
+ int ret = 0;
+ u16 pkey_index;
+
+ if (group->state == MCAST_PKEY_EVENT)
+ ret = ib_find_pkey(group->port->dev->device,
+ group->port->port_num,
+ be16_to_cpu(group->rec.pkey), &pkey_index);
spin_lock_irq(&group->lock);
+ if (group->state == MCAST_PKEY_EVENT && !ret &&
+ group->pkey_index == pkey_index)
+ goto out;
+
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
@@ -399,6 +419,7 @@ static void process_group_error(struct mcast_group *group)
}
group->rec.join_state = 0;
+out:
group->state = MCAST_BUSY;
spin_unlock_irq(&group->lock);
}
@@ -415,9 +436,9 @@ static void mcast_work_handler(struct work_struct *work)
retest:
spin_lock_irq(&group->lock);
while (!list_empty(&group->pending_list) ||
- (group->state == MCAST_ERROR)) {
+ (group->state != MCAST_BUSY)) {
- if (group->state == MCAST_ERROR) {
+ if (group->state != MCAST_BUSY) {
spin_unlock_irq(&group->lock);
process_group_error(group);
goto retest;
@@ -494,12 +515,19 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context)
{
struct mcast_group *group = context;
+ u16 pkey_index = MCAST_INVALID_PKEY_INDEX;
if (status)
process_join_error(group, status);
else {
+ ib_find_pkey(group->port->dev->device, group->port->port_num,
+ be16_to_cpu(rec->pkey), &pkey_index);
+
spin_lock_irq(&group->port->lock);
group->rec = *rec;
+ if (group->state == MCAST_BUSY &&
+ group->pkey_index == MCAST_INVALID_PKEY_INDEX)
+ group->pkey_index = pkey_index;
if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
rb_erase(&group->node, &group->port->table);
mcast_insert(group->port, group, 1);
@@ -539,6 +567,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
group->port = port;
group->rec.mgid = *mgid;
+ group->pkey_index = MCAST_INVALID_PKEY_INDEX;
INIT_LIST_HEAD(&group->pending_list);
INIT_LIST_HEAD(&group->active_list);
INIT_WORK(&group->work, mcast_work_handler);
@@ -707,7 +736,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
-static void mcast_groups_lost(struct mcast_port *port)
+static void mcast_groups_event(struct mcast_port *port,
+ enum mcast_group_state state)
{
struct mcast_group *group;
struct rb_node *node;
@@ -721,7 +751,8 @@ static void mcast_groups_lost(struct mcast_port *port)
atomic_inc(&group->refcount);
queue_work(mcast_wq, &group->work);
}
- group->state = MCAST_ERROR;
+ if (group->state != MCAST_GROUP_ERROR)
+ group->state = state;
spin_unlock(&group->lock);
}
spin_unlock_irqrestore(&port->lock, flags);
@@ -731,16 +762,20 @@ static void mcast_event_handler(struct ib_event_handler *handler,
struct ib_event *event)
{
struct mcast_device *dev;
+ int index;
dev = container_of(handler, struct mcast_device, event_handler);
+ index = event->element.port_num - dev->start_port;
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
- mcast_groups_lost(&dev->port[event->element.port_num -
- dev->start_port]);
+ mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
+ break;
+ case IB_EVENT_PKEY_CHANGE:
+ mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT);
break;
default:
break;
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 1cfc2984434..aff96bac49b 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -59,7 +59,8 @@ extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
u8 node_type, int port_num);
/*
- * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
+ * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
+ * via process_mad
*/
static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
struct ib_device *device)
@@ -71,4 +72,19 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
+
+/*
+ * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
+ * via process_mad
+ */
+static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
+ struct ib_device *device)
+{
+ /* C14-13:3 -- We're at the end of the DR segment of path */
+ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */
+ return ((device->process_mad &&
+ ib_get_smp_direction(smp) &&
+ !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
+}
+
#endif /* __SMI_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 3d405068132..c864ef70fdf 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -508,19 +508,10 @@ static int add_port(struct ib_device *device, int port_num)
p->ibdev = device;
p->port_num = port_num;
- p->kobj.ktype = &port_type;
- p->kobj.parent = kobject_get(&device->ports_parent);
- if (!p->kobj.parent) {
- ret = -EBUSY;
- goto err;
- }
-
- ret = kobject_set_name(&p->kobj, "%d", port_num);
- if (ret)
- goto err_put;
-
- ret = kobject_register(&p->kobj);
+ ret = kobject_init_and_add(&p->kobj, &port_type,
+ kobject_get(device->ports_parent),
+ "%d", port_num);
if (ret)
goto err_put;
@@ -549,6 +540,7 @@ static int add_port(struct ib_device *device, int port_num)
list_add_tail(&p->kobj.entry, &device->port_list);
+ kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
err_free_pkey:
@@ -570,9 +562,7 @@ err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
err_put:
- kobject_put(&device->ports_parent);
-
-err:
+ kobject_put(device->ports_parent);
kfree(p);
return ret;
}
@@ -694,16 +684,9 @@ int ib_device_register_sysfs(struct ib_device *device)
goto err_unregister;
}
- device->ports_parent.parent = kobject_get(&class_dev->kobj);
- if (!device->ports_parent.parent) {
- ret = -EBUSY;
- goto err_unregister;
- }
- ret = kobject_set_name(&device->ports_parent, "ports");
- if (ret)
- goto err_put;
- ret = kobject_register(&device->ports_parent);
- if (ret)
+ device->ports_parent = kobject_create_and_add("ports",
+ kobject_get(&class_dev->kobj));
+ if (!device->ports_parent)
goto err_put;
if (device->node_type == RDMA_NODE_IB_SWITCH) {
@@ -731,7 +714,7 @@ err_put:
sysfs_remove_group(p, &pma_group);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
- kobject_unregister(p);
+ kobject_put(p);
}
}
@@ -755,10 +738,10 @@ void ib_device_unregister_sysfs(struct ib_device *device)
sysfs_remove_group(p, &pma_group);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
- kobject_unregister(p);
+ kobject_put(p);
}
- kobject_unregister(&device->ports_parent);
+ kobject_put(device->ports_parent);
class_device_unregister(&device->class_dev);
}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 424983f5b1e..4291ab42a5b 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -106,6 +106,9 @@ enum {
IB_UCM_MAX_DEVICES = 32
};
+/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
+extern struct class cm_class;
+
#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
static void ib_ucm_add_one(struct ib_device *device);
@@ -1199,7 +1202,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
return 0;
}
-static void ib_ucm_release_class_dev(struct class_device *class_dev)
+static void ucm_release_class_dev(struct class_device *class_dev)
{
struct ib_ucm_device *dev;
@@ -1217,11 +1220,6 @@ static const struct file_operations ucm_fops = {
.poll = ib_ucm_poll,
};
-static struct class ucm_class = {
- .name = "infiniband_cm",
- .release = ib_ucm_release_class_dev
-};
-
static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
struct ib_ucm_device *dev;
@@ -1257,9 +1255,10 @@ static void ib_ucm_add_one(struct ib_device *device)
if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
goto err;
- ucm_dev->class_dev.class = &ucm_class;
+ ucm_dev->class_dev.class = &cm_class;
ucm_dev->class_dev.dev = device->dma_device;
ucm_dev->class_dev.devt = ucm_dev->dev.dev;
+ ucm_dev->class_dev.release = ucm_release_class_dev;
snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
ucm_dev->devnum);
if (class_device_register(&ucm_dev->class_dev))
@@ -1306,40 +1305,34 @@ static int __init ib_ucm_init(void)
"infiniband_cm");
if (ret) {
printk(KERN_ERR "ucm: couldn't register device number\n");
- goto err;
+ goto error1;
}
- ret = class_register(&ucm_class);
- if (ret) {
- printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
- goto err_chrdev;
- }
-
- ret = class_create_file(&ucm_class, &class_attr_abi_version);
+ ret = class_create_file(&cm_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
- goto err_class;
+ goto error2;
}
ret = ib_register_client(&ucm_client);
if (ret) {
printk(KERN_ERR "ucm: couldn't register client\n");
- goto err_class;
+ goto error3;
}
return 0;
-err_class:
- class_unregister(&ucm_class);
-err_chrdev:
+error3:
+ class_remove_file(&cm_class, &class_attr_abi_version);
+error2:
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
-err:
+error1:
return ret;
}
static void __exit ib_ucm_cleanup(void)
{
ib_unregister_client(&ucm_client);
- class_unregister(&ucm_class);
+ class_remove_file(&cm_class, &class_attr_abi_version);
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
idr_destroy(&ctx_id_table);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 90d675ad9ec..15937eb38aa 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -31,6 +31,7 @@
*/
#include <linux/completion.h>
+#include <linux/file.h>
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/idr.h>
@@ -991,6 +992,96 @@ out:
return ret;
}
+static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
+{
+ /* Acquire mutex's based on pointer comparison to prevent deadlock. */
+ if (file1 < file2) {
+ mutex_lock(&file1->mut);
+ mutex_lock(&file2->mut);
+ } else {
+ mutex_lock(&file2->mut);
+ mutex_lock(&file1->mut);
+ }
+}
+
+static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
+{
+ if (file1 < file2) {
+ mutex_unlock(&file2->mut);
+ mutex_unlock(&file1->mut);
+ } else {
+ mutex_unlock(&file1->mut);
+ mutex_unlock(&file2->mut);
+ }
+}
+
+static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
+{
+ struct ucma_event *uevent, *tmp;
+
+ list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &file->event_list);
+}
+
+static ssize_t ucma_migrate_id(struct ucma_file *new_file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_migrate_id cmd;
+ struct rdma_ucm_migrate_resp resp;
+ struct ucma_context *ctx;
+ struct file *filp;
+ struct ucma_file *cur_file;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ /* Get current fd to protect against it being closed */
+ filp = fget(cmd.fd);
+ if (!filp)
+ return -ENOENT;
+
+ /* Validate current fd and prevent destruction of id. */
+ ctx = ucma_get_ctx(filp->private_data, cmd.id);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ goto file_put;
+ }
+
+ cur_file = ctx->file;
+ if (cur_file == new_file) {
+ resp.events_reported = ctx->events_reported;
+ goto response;
+ }
+
+ /*
+ * Migrate events between fd's, maintaining order, and avoiding new
+ * events being added before existing events.
+ */
+ ucma_lock_files(cur_file, new_file);
+ mutex_lock(&mut);
+
+ list_move_tail(&ctx->list, &new_file->ctx_list);
+ ucma_move_events(ctx, new_file);
+ ctx->file = new_file;
+ resp.events_reported = ctx->events_reported;
+
+ mutex_unlock(&mut);
+ ucma_unlock_files(cur_file, new_file);
+
+response:
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ ucma_put_ctx(ctx);
+file_put:
+ fput(filp);
+ return ret;
+}
+
static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len) = {
@@ -1012,6 +1103,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
[RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
[RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
+ [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index b53eac4611d..4e915104ac4 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -2,6 +2,7 @@
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2008 Cisco. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -42,7 +43,7 @@
#include <linux/cdev.h>
#include <linux/dma-mapping.h>
#include <linux/poll.h>
-#include <linux/rwsem.h>
+#include <linux/mutex.h>
#include <linux/kref.h>
#include <linux/compat.h>
@@ -94,7 +95,7 @@ struct ib_umad_port {
struct class_device *sm_class_dev;
struct semaphore sm_sem;
- struct rw_semaphore mutex;
+ struct mutex file_mutex;
struct list_head file_list;
struct ib_device *ib_dev;
@@ -110,11 +111,11 @@ struct ib_umad_device {
};
struct ib_umad_file {
+ struct mutex mutex;
struct ib_umad_port *port;
struct list_head recv_list;
struct list_head send_list;
struct list_head port_list;
- spinlock_t recv_lock;
spinlock_t send_lock;
wait_queue_head_t recv_wait;
struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
@@ -156,7 +157,7 @@ static int hdr_size(struct ib_umad_file *file)
sizeof (struct ib_user_mad_hdr_old);
}
-/* caller must hold port->mutex at least for reading */
+/* caller must hold file->mutex */
static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
{
return file->agents_dead ? NULL : file->agent[id];
@@ -168,32 +169,30 @@ static int queue_packet(struct ib_umad_file *file,
{
int ret = 1;
- down_read(&file->port->mutex);
+ mutex_lock(&file->mutex);
for (packet->mad.hdr.id = 0;
packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
packet->mad.hdr.id++)
if (agent == __get_agent(file, packet->mad.hdr.id)) {
- spin_lock_irq(&file->recv_lock);
list_add_tail(&packet->list, &file->recv_list);
- spin_unlock_irq(&file->recv_lock);
wake_up_interruptible(&file->recv_wait);
ret = 0;
break;
}
- up_read(&file->port->mutex);
+ mutex_unlock(&file->mutex);
return ret;
}
static void dequeue_send(struct ib_umad_file *file,
struct ib_umad_packet *packet)
- {
+{
spin_lock_irq(&file->send_lock);
list_del(&packet->list);
spin_unlock_irq(&file->send_lock);
- }
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *send_wc)
@@ -341,10 +340,10 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
if (count < hdr_size(file))
return -EINVAL;
- spin_lock_irq(&file->recv_lock);
+ mutex_lock(&file->mutex);
while (list_empty(&file->recv_list)) {
- spin_unlock_irq(&file->recv_lock);
+ mutex_unlock(&file->mutex);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
@@ -353,13 +352,13 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
!list_empty(&file->recv_list)))
return -ERESTARTSYS;
- spin_lock_irq(&file->recv_lock);
+ mutex_lock(&file->mutex);
}
packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
list_del(&packet->list);
- spin_unlock_irq(&file->recv_lock);
+ mutex_unlock(&file->mutex);
if (packet->recv_wc)
ret = copy_recv_mad(file, buf, packet, count);
@@ -368,9 +367,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
if (ret < 0) {
/* Requeue packet */
- spin_lock_irq(&file->recv_lock);
+ mutex_lock(&file->mutex);
list_add(&packet->list, &file->recv_list);
- spin_unlock_irq(&file->recv_lock);
+ mutex_unlock(&file->mutex);
} else {
if (packet->recv_wc)
ib_free_recv_mad(packet->recv_wc);
@@ -481,7 +480,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
- down_read(&file->port->mutex);
+ mutex_lock(&file->mutex);
agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
@@ -577,7 +576,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
if (ret)
goto err_send;
- up_read(&file->port->mutex);
+ mutex_unlock(&file->mutex);
return count;
err_send:
@@ -587,7 +586,7 @@ err_msg:
err_ah:
ib_destroy_ah(ah);
err_up:
- up_read(&file->port->mutex);
+ mutex_unlock(&file->mutex);
err:
kfree(packet);
return ret;
@@ -613,11 +612,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
{
struct ib_user_mad_reg_req ureq;
struct ib_mad_reg_req req;
- struct ib_mad_agent *agent;
+ struct ib_mad_agent *agent = NULL;
int agent_id;
int ret;
- down_write(&file->port->mutex);
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
ret = -EPIPE;
@@ -666,13 +666,13 @@ found:
send_handler, recv_handler, file);
if (IS_ERR(agent)) {
ret = PTR_ERR(agent);
+ agent = NULL;
goto out;
}
if (put_user(agent_id,
(u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
ret = -EFAULT;
- ib_unregister_mad_agent(agent);
goto out;
}
@@ -690,7 +690,13 @@ found:
ret = 0;
out:
- up_write(&file->port->mutex);
+ mutex_unlock(&file->mutex);
+
+ if (ret && agent)
+ ib_unregister_mad_agent(agent);
+
+ mutex_unlock(&file->port->file_mutex);
+
return ret;
}
@@ -703,7 +709,8 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
if (get_user(id, arg))
return -EFAULT;
- down_write(&file->port->mutex);
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
ret = -EINVAL;
@@ -714,11 +721,13 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
file->agent[id] = NULL;
out:
- up_write(&file->port->mutex);
+ mutex_unlock(&file->mutex);
if (agent)
ib_unregister_mad_agent(agent);
+ mutex_unlock(&file->port->file_mutex);
+
return ret;
}
@@ -726,12 +735,12 @@ static long ib_umad_enable_pkey(struct ib_umad_file *file)
{
int ret = 0;
- down_write(&file->port->mutex);
+ mutex_lock(&file->mutex);
if (file->already_used)
ret = -EINVAL;
else
file->use_pkey_index = 1;
- up_write(&file->port->mutex);
+ mutex_unlock(&file->mutex);
return ret;
}
@@ -783,7 +792,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
if (!port)
return -ENXIO;
- down_write(&port->mutex);
+ mutex_lock(&port->file_mutex);
if (!port->ib_dev) {
ret = -ENXIO;
@@ -797,7 +806,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
goto out;
}
- spin_lock_init(&file->recv_lock);
+ mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
INIT_LIST_HEAD(&file->recv_list);
INIT_LIST_HEAD(&file->send_list);
@@ -809,7 +818,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
out:
- up_write(&port->mutex);
+ mutex_unlock(&port->file_mutex);
return ret;
}
@@ -821,7 +830,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
int already_dead;
int i;
- down_write(&file->port->mutex);
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
already_dead = file->agents_dead;
file->agents_dead = 1;
@@ -834,14 +844,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
list_del(&file->port_list);
- downgrade_write(&file->port->mutex);
+ mutex_unlock(&file->mutex);
if (!already_dead)
for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
if (file->agent[i])
ib_unregister_mad_agent(file->agent[i]);
- up_read(&file->port->mutex);
+ mutex_unlock(&file->port->file_mutex);
kfree(file);
kref_put(&dev->ref, ib_umad_release_dev);
@@ -914,10 +924,10 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
};
int ret = 0;
- down_write(&port->mutex);
+ mutex_lock(&port->file_mutex);
if (port->ib_dev)
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
- up_write(&port->mutex);
+ mutex_unlock(&port->file_mutex);
up(&port->sm_sem);
@@ -981,7 +991,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
port->ib_dev = device;
port->port_num = port_num;
init_MUTEX(&port->sm_sem);
- init_rwsem(&port->mutex);
+ mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
port->dev = cdev_alloc();
@@ -1052,6 +1062,7 @@ err_cdev:
static void ib_umad_kill_port(struct ib_umad_port *port)
{
struct ib_umad_file *file;
+ int already_dead;
int id;
class_set_devdata(port->class_dev, NULL);
@@ -1067,42 +1078,22 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
umad_port[port->dev_num] = NULL;
spin_unlock(&port_lock);
- down_write(&port->mutex);
+ mutex_lock(&port->file_mutex);
port->ib_dev = NULL;
- /*
- * Now go through the list of files attached to this port and
- * unregister all of their MAD agents. We need to hold
- * port->mutex while doing this to avoid racing with
- * ib_umad_close(), but we can't hold the mutex for writing
- * while calling ib_unregister_mad_agent(), since that might
- * deadlock by calling back into queue_packet(). So we
- * downgrade our lock to a read lock, and then drop and
- * reacquire the write lock for the next iteration.
- *
- * We do list_del_init() on the file's list_head so that the
- * list_del in ib_umad_close() is still OK, even after the
- * file is removed from the list.
- */
- while (!list_empty(&port->file_list)) {
- file = list_entry(port->file_list.next, struct ib_umad_file,
- port_list);
-
+ list_for_each_entry(file, &port->file_list, port_list) {
+ mutex_lock(&file->mutex);
+ already_dead = file->agents_dead;
file->agents_dead = 1;
- list_del_init(&file->port_list);
-
- downgrade_write(&port->mutex);
+ mutex_unlock(&file->mutex);
for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
if (file->agent[id])
ib_unregister_mad_agent(file->agent[id]);
-
- up_read(&port->mutex);
- down_write(&port->mutex);
}
- up_write(&port->mutex);
+ mutex_unlock(&port->file_mutex);
clear_bit(port->dev_num, dev_map);
}