diff options
Diffstat (limited to 'drivers/infiniband/core/user_mad.c')
| -rw-r--r-- | drivers/infiniband/core/user_mad.c | 887 |
1 files changed, 563 insertions, 324 deletions
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c908de8db5a..1acb9910055 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,7 +1,8 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008 Cisco. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ */ #include <linux/module.h> @@ -40,14 +39,16 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/cdev.h> -#include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/poll.h> -#include <linux/rwsem.h> +#include <linux/mutex.h> #include <linux/kref.h> +#include <linux/compat.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/slab.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <rdma/ib_mad.h> #include <rdma/ib_user_mad.h> @@ -65,12 +66,9 @@ enum { }; /* - * Our lifetime rules for these structs are the following: each time a - * device special file is opened, we look up the corresponding struct - * ib_umad_port by minor in the umad_port[] table while holding the - * port_lock. If this lookup succeeds, we take a reference on the - * ib_umad_port's struct ib_umad_device while still holding the - * port_lock; if the lookup fails, we fail the open(). We drop these + * Our lifetime rules for these structs are the following: + * device special file is opened, we take a reference on the + * ib_umad_port's struct ib_umad_device. We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there @@ -78,23 +76,18 @@ enum { * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * - * When destroying an ib_umad_device, we clear all of its - * ib_umad_ports from umad_port[] while holding port_lock before - * dropping the module's reference to the ib_umad_device. This is - * always safe because any open() calls will either succeed and obtain - * a reference before we clear the umad_port[] entries, or fail after - * we clear the umad_port[] entries. + * When destroying an ib_umad_device, we drop the module's reference. */ struct ib_umad_port { - struct cdev *dev; - struct class_device *class_dev; + struct cdev cdev; + struct device *dev; - struct cdev *sm_dev; - struct class_device *sm_class_dev; + struct cdev sm_cdev; + struct device *sm_dev; struct semaphore sm_sem; - struct rw_semaphore mutex; + struct mutex file_mutex; struct list_head file_list; struct ib_device *ib_dev; @@ -105,22 +98,27 @@ struct ib_umad_port { struct ib_umad_device { int start_port, end_port; - struct kref ref; + struct kobject kobj; struct ib_umad_port port[0]; }; struct ib_umad_file { + struct mutex mutex; struct ib_umad_port *port; struct list_head recv_list; + struct list_head send_list; struct list_head port_list; - spinlock_t recv_lock; + spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; int agents_dead; + u8 use_pkey_index; + u8 already_used; }; struct ib_umad_packet { struct ib_mad_send_buf *msg; + struct ib_mad_recv_wc *recv_wc; struct list_head list; int length; struct ib_user_mad mad; @@ -131,21 +129,30 @@ static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); -static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; -static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); +static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static void ib_umad_release_dev(struct kobject *kobj) { struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); + container_of(kobj, struct ib_umad_device, kobj); kfree(dev); } -/* caller must hold port->mutex at least for reading */ +static struct kobj_type ib_umad_dev_ktype = { + .release = ib_umad_release_dev, +}; + +static int hdr_size(struct ib_umad_file *file) +{ + return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : + sizeof (struct ib_user_mad_hdr_old); +} + +/* caller must hold file->mutex */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { return file->agents_dead ? NULL : file->agent[id]; @@ -157,50 +164,47 @@ static int queue_packet(struct ib_umad_file *file, { int ret = 1; - down_read(&file->port->mutex); + mutex_lock(&file->mutex); for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { - spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); - spin_unlock_irq(&file->recv_lock); wake_up_interruptible(&file->recv_wait); ret = 0; break; } - up_read(&file->port->mutex); + mutex_unlock(&file->mutex); return ret; } +static void dequeue_send(struct ib_umad_file *file, + struct ib_umad_packet *packet) +{ + spin_lock_irq(&file->send_lock); + list_del(&packet->list); + spin_unlock_irq(&file->send_lock); +} + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; + dequeue_send(file, packet); ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); - if (!timeout) - goto out; - - timeout->length = IB_MGMT_MAD_HDR; - timeout->mad.hdr.id = packet->mad.hdr.id; - timeout->mad.hdr.status = ETIMEDOUT; - memcpy(timeout->mad.data, packet->mad.data, - sizeof (struct ib_mad_hdr)); - - if (queue_packet(file, agent, timeout)) - kfree(timeout); + packet->length = IB_MGMT_MAD_HDR; + packet->mad.hdr.status = ETIMEDOUT; + if (!queue_packet(file, agent, packet)) + return; } -out: kfree(packet); } @@ -209,43 +213,118 @@ static void recv_handler(struct ib_mad_agent *agent, { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; - int length; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) - goto out; + goto err1; - length = mad_recv_wc->mad_len; - packet = kzalloc(sizeof *packet + length, GFP_KERNEL); + packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) - goto out; - - packet->length = length; - - ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); - - packet->mad.hdr.status = 0; - packet->mad.hdr.length = length + sizeof (struct ib_user_mad); - packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.hdr.sl = mad_recv_wc->wc->sl; - packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + goto err1; + + packet->length = mad_recv_wc->mad_len; + packet->recv_wc = mad_recv_wc; + + packet->mad.hdr.status = 0; + packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); if (packet->mad.hdr.grh_present) { - /* XXX parse GRH */ - packet->mad.hdr.gid_index = 0; - packet->mad.hdr.hop_limit = 0; - packet->mad.hdr.traffic_class = 0; - memset(packet->mad.hdr.gid, 0, 16); - packet->mad.hdr.flow_label = 0; + struct ib_ah_attr ah_attr; + + ib_init_ah_from_wc(agent->device, agent->port_num, + mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, + &ah_attr); + + packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; + packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; + packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; + memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); + packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); } if (queue_packet(file, agent, packet)) - kfree(packet); + goto err2; + return; -out: +err2: + kfree(packet); +err1: ib_free_recv_mad(mad_recv_wc); } +static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, + struct ib_umad_packet *packet, size_t count) +{ + struct ib_mad_recv_buf *recv_buf; + int left, seg_payload, offset, max_seg_payload; + + /* We need enough room to copy the first (or only) MAD segment. */ + recv_buf = &packet->recv_wc->recv_buf; + if ((packet->length <= sizeof (*recv_buf->mad) && + count < hdr_size(file) + packet->length) || + (packet->length > sizeof (*recv_buf->mad) && + count < hdr_size(file) + sizeof (*recv_buf->mad))) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, hdr_size(file))) + return -EFAULT; + + buf += hdr_size(file); + seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + if (copy_to_user(buf, recv_buf->mad, seg_payload)) + return -EFAULT; + + if (seg_payload < packet->length) { + /* + * Multipacket RMPP MAD message. Copy remainder of message. + * Note that last segment may have a shorter payload. + */ + if (count < hdr_size(file) + packet->length) { + /* + * The buffer is too small, return the first RMPP segment, + * which includes the RMPP message length. + */ + return -ENOSPC; + } + offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); + max_seg_payload = sizeof (struct ib_mad) - offset; + + for (left = packet->length - seg_payload, buf += seg_payload; + left; left -= seg_payload, buf += seg_payload) { + recv_buf = container_of(recv_buf->list.next, + struct ib_mad_recv_buf, list); + seg_payload = min(left, max_seg_payload); + if (copy_to_user(buf, ((void *) recv_buf->mad) + offset, + seg_payload)) + return -EFAULT; + } + } + return hdr_size(file) + packet->length; +} + +static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, + struct ib_umad_packet *packet, size_t count) +{ + ssize_t size = hdr_size(file) + packet->length; + + if (count < size) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, hdr_size(file))) + return -EFAULT; + + buf += hdr_size(file); + + if (copy_to_user(buf, packet->mad.data, packet->length)) + return -EFAULT; + + return size; +} + static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -253,13 +332,13 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) + if (count < hdr_size(file)) return -EINVAL; - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); while (list_empty(&file->recv_list)) { - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; @@ -268,36 +347,97 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, !list_empty(&file->recv_list))) return -ERESTARTSYS; - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); } packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); - spin_unlock_irq(&file->recv_lock); + mutex_unlock(&file->mutex); - if (count < packet->length + sizeof (struct ib_user_mad)) { - /* Return length needed (and first RMPP segment) if too small */ - if (copy_to_user(buf, &packet->mad, - sizeof (struct ib_user_mad) + sizeof (struct ib_mad))) - ret = -EFAULT; - else - ret = -ENOSPC; - } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) - ret = -EFAULT; + if (packet->recv_wc) + ret = copy_recv_mad(file, buf, packet, count); else - ret = packet->length + sizeof (struct ib_user_mad); + ret = copy_send_mad(file, buf, packet, count); + if (ret < 0) { /* Requeue packet */ - spin_lock_irq(&file->recv_lock); + mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); - spin_unlock_irq(&file->recv_lock); - } else + mutex_unlock(&file->mutex); + } else { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } return ret; } +static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) +{ + int left, seg; + + /* Copy class specific header */ + if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && + copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, + msg->hdr_len - IB_MGMT_RMPP_HDR)) + return -EFAULT; + + /* All headers are in place. Copy data segments. */ + for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; + seg++, left -= msg->seg_size, buf += msg->seg_size) { + if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, + min(left, msg->seg_size))) + return -EFAULT; + } + return 0; +} + +static int same_destination(struct ib_user_mad_hdr *hdr1, + struct ib_user_mad_hdr *hdr2) +{ + if (!hdr1->grh_present && !hdr2->grh_present) + return (hdr1->lid == hdr2->lid); + + if (hdr1->grh_present && hdr2->grh_present) + return !memcmp(hdr1->gid, hdr2->gid, 16); + + return 0; +} + +static int is_duplicate(struct ib_umad_file *file, + struct ib_umad_packet *packet) +{ + struct ib_umad_packet *sent_packet; + struct ib_mad_hdr *sent_hdr, *hdr; + + hdr = (struct ib_mad_hdr *) packet->mad.data; + list_for_each_entry(sent_packet, &file->send_list, list) { + sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; + + if ((hdr->tid != sent_hdr->tid) || + (hdr->mgmt_class != sent_hdr->mgmt_class)) + continue; + + /* + * No need to be overly clever here. If two new operations have + * the same TID, reject the second as a duplicate. This is more + * restrictive than required by the spec. + */ + if (!ib_response_mad((struct ib_mad *) hdr)) { + if (!ib_response_mad((struct ib_mad *) sent_hdr)) + return 1; + continue; + } else if (!ib_response_mad((struct ib_mad *) sent_hdr)) + continue; + + if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) + return 1; + } + + return 0; +} + static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -307,32 +447,34 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_ah_attr ah_attr; struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; - u8 method; __be64 *tid; - int ret, length, hdr_len, copy_offset; - int rmpp_active, has_rmpp_header; + int ret, data_len, hdr_len, copy_offset, rmpp_active; - if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; - length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; - if (copy_from_user(&packet->mad, buf, - sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { + if (copy_from_user(&packet->mad, buf, hdr_size(file))) { ret = -EFAULT; goto err; } - if (packet->mad.hdr.id < 0 || - packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } - down_read(&file->port->mutex); + buf += hdr_size(file); + + if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { + ret = -EFAULT; + goto err; + } + + mutex_lock(&file->mutex); agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { @@ -348,8 +490,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); - ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); - ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } @@ -360,91 +503,84 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, } rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { - hdr_len = IB_MGMT_SA_HDR; - copy_offset = IB_MGMT_RMPP_HDR; - has_rmpp_header = 1; - } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START && - rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) { - hdr_len = IB_MGMT_VENDOR_HDR; - copy_offset = IB_MGMT_RMPP_HDR; - has_rmpp_header = 1; - } else { - hdr_len = IB_MGMT_MAD_HDR; + hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) { copy_offset = IB_MGMT_MAD_HDR; - has_rmpp_header = 0; - } - - if (has_rmpp_header) + rmpp_active = 0; + } else { + copy_offset = IB_MGMT_RMPP_HDR; rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; - else - rmpp_active = 0; - - /* Validate that the management class can support RMPP */ - if (rmpp_active && !agent->rmpp_version) { - ret = -EINVAL; - goto err_ah; } + data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, rmpp_active, - hdr_len, length - hdr_len, - GFP_KERNEL); + packet->mad.hdr.pkey_index, rmpp_active, + hdr_len, data_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } - packet->msg->ah = ah; + packet->msg->ah = ah; packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->retries = packet->mad.hdr.retries; + packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; - /* Copy MAD headers (RMPP header in place) */ + /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(packet->msg->mad + copy_offset, - buf + sizeof (struct ib_user_mad) + copy_offset, - length - copy_offset)) { - ret = -EFAULT; - goto err_msg; + + if (!rmpp_active) { + if (copy_from_user(packet->msg->mad + copy_offset, + buf + copy_offset, + hdr_len + data_len - copy_offset)) { + ret = -EFAULT; + goto err_msg; + } + } else { + ret = copy_rmpp_mad(packet->msg, buf); + if (ret) + goto err_msg; } /* - * If userspace is generating a request that will generate a - * response, we need to make sure the high-order part of the - * transaction ID matches the agent being used to send the - * MAD. + * Set the high-order part of the transaction ID to make MADs from + * different agents unique, and allow routing responses back to the + * original requestor. */ - method = ((struct ib_mad_hdr *) packet->msg->mad)->method; - - if (!(method & IB_MGMT_METHOD_RESP) && - method != IB_MGMT_METHOD_TRAP_REPRESS && - method != IB_MGMT_METHOD_SEND) { + if (!ib_response_mad(packet->msg->mad)) { tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); + rmpp_mad->mad_hdr.tid = *tid; } - ret = ib_post_send_mad(packet->msg, NULL); - if (ret) + spin_lock_irq(&file->send_lock); + ret = is_duplicate(file, packet); + if (!ret) + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); + if (ret) { + ret = -EINVAL; goto err_msg; + } - up_read(&file->port->mutex); + ret = ib_post_send_mad(packet->msg, NULL); + if (ret) + goto err_send; + mutex_unlock(&file->mutex); return count; +err_send: + dequeue_send(file, packet); err_msg: ib_free_send_mad(packet->msg); - err_ah: ib_destroy_ah(ah); - err_up: - up_read(&file->port->mutex); - + mutex_unlock(&file->mutex); err: kfree(packet); return ret; @@ -465,22 +601,24 @@ static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wa return mask; } -static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) +static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, + int compat_method_mask) { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; - struct ib_mad_agent *agent; + struct ib_mad_agent *agent = NULL; int agent_id; int ret; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); if (!file->port->ib_dev) { ret = -EPIPE; goto out; } - if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) { + if (copy_from_user(&ureq, arg, sizeof ureq)) { ret = -EFAULT; goto out; } @@ -501,8 +639,18 @@ found: if (ureq.mgmt_class) { req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; - memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); - memcpy(req.oui, ureq.oui, sizeof req.oui); + memcpy(req.oui, ureq.oui, sizeof req.oui); + + if (compat_method_mask) { + u32 *umm = (u32 *) ureq.method_mask; + int i; + + for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) + req.method_mask[i] = + umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); + } else + memcpy(req.method_mask, ureq.method_mask, + sizeof req.method_mask); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, @@ -512,36 +660,53 @@ found: send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); + agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; - ib_unregister_mad_agent(agent); goto out; } + if (!file->already_used) { + file->already_used = 1; + if (!file->use_pkey_index) { + printk(KERN_WARNING "user_mad: process %s did not enable " + "P_Key index support.\n", current->comm); + printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " + "has info on the new ABI.\n"); + } + } + file->agent[agent_id] = agent; ret = 0; out: - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + return ret; } -static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) +static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; - if (get_user(id, (u32 __user *) arg)) + if (get_user(id, arg)) return -EFAULT; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } @@ -550,11 +715,27 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) file->agent[id] = NULL; out: - up_write(&file->port->mutex); + mutex_unlock(&file->mutex); if (agent) ib_unregister_mad_agent(agent); + mutex_unlock(&file->port->file_mutex); + + return ret; +} + +static long ib_umad_enable_pkey(struct ib_umad_file *file) +{ + int ret = 0; + + mutex_lock(&file->mutex); + if (file->already_used) + ret = -EINVAL; + else + file->use_pkey_index = 1; + mutex_unlock(&file->mutex); + return ret; } @@ -563,45 +744,64 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: - return ib_umad_reg_agent(filp->private_data, arg); + return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); case IB_USER_MAD_UNREGISTER_AGENT: - return ib_umad_unreg_agent(filp->private_data, arg); + return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); + case IB_USER_MAD_ENABLE_PKEY: + return ib_umad_enable_pkey(filp->private_data); default: return -ENOIOCTLCMD; } } +#ifdef CONFIG_COMPAT +static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case IB_USER_MAD_REGISTER_AGENT: + return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); + case IB_USER_MAD_UNREGISTER_AGENT: + return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); + case IB_USER_MAD_ENABLE_PKEY: + return ib_umad_enable_pkey(filp->private_data); + default: + return -ENOIOCTLCMD; + } +} +#endif + +/* + * ib_umad_open() does not need the BKL: + * + * - the ib_umad_port structures are properly reference counted, and + * everything else is purely local to the file being created, so + * races against other open calls are not a problem; + * - the ioctl method does not affect any global state outside of the + * file structure being operated on; + */ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret = 0; - - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; - if (port) - kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); + int ret = -ENXIO; - if (!port) - return -ENXIO; + port = container_of(inode->i_cdev, struct ib_umad_port, cdev); - down_write(&port->mutex); + mutex_lock(&port->file_mutex); - if (!port->ib_dev) { - ret = -ENXIO; + if (!port->ib_dev) goto out; - } + ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - ret = -ENOMEM; + if (!file) goto out; - } - spin_lock_init(&file->recv_lock); + mutex_init(&file->mutex); + spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); + INIT_LIST_HEAD(&file->send_list); init_waitqueue_head(&file->recv_wait); file->port = port; @@ -609,8 +809,17 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); + ret = nonseekable_open(inode, filp); + if (ret) { + list_del(&file->port_list); + kfree(file); + goto out; + } + + kobject_get(&port->umad_dev->kobj); + out: - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); return ret; } @@ -622,40 +831,47 @@ static int ib_umad_close(struct inode *inode, struct file *filp) int already_dead; int i; - down_write(&file->port->mutex); + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; - list_for_each_entry_safe(packet, tmp, &file->recv_list, list) + list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } list_del(&file->port_list); - downgrade_write(&file->port->mutex); + mutex_unlock(&file->mutex); if (!already_dead) for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) if (file->agent[i]) ib_unregister_mad_agent(file->agent[i]); - up_read(&file->port->mutex); + mutex_unlock(&file->port->file_mutex); kfree(file); - kref_put(&dev->ref, ib_umad_release_dev); + kobject_put(&dev->kobj); return 0; } -static struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, +static const struct file_operations umad_fops = { + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, - .compat_ioctl = ib_umad_ioctl, - .open = ib_umad_open, - .release = ib_umad_close +#ifdef CONFIG_COMPAT + .compat_ioctl = ib_umad_compat_ioctl, +#endif + .open = ib_umad_open, + .release = ib_umad_close, + .llseek = no_llseek, }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) @@ -666,14 +882,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) }; int ret; - spin_lock(&port_lock); - port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; - if (port) - kref_get(&port->umad_dev->ref); - spin_unlock(&port_lock); - - if (!port) - return -ENXIO; + port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { @@ -688,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - if (ret) { - up(&port->sm_sem); - goto fail; - } + if (ret) + goto err_up_sem; filp->private_data = port; + ret = nonseekable_open(inode, filp); + if (ret) + goto err_clr_sm_cap; + + kobject_get(&port->umad_dev->kobj); + return 0; +err_clr_sm_cap: + swap(props.set_port_cap_mask, props.clr_port_cap_mask); + ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: + up(&port->sm_sem); + fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } @@ -710,22 +929,23 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) }; int ret = 0; - down_write(&port->mutex); + mutex_lock(&port->file_mutex); if (port->ib_dev) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); up(&port->sm_sem); - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + kobject_put(&port->umad_dev->kobj); return ret; } -static struct file_operations umad_sm_fops = { - .owner = THIS_MODULE, - .open = ib_umad_sm_open, - .release = ib_umad_sm_close +static const struct file_operations umad_sm_fops = { + .owner = THIS_MODULE, + .open = ib_umad_sm_open, + .release = ib_umad_sm_close, + .llseek = no_llseek, }; static struct ib_client umad_client = { @@ -734,113 +954,141 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, + char *buf) { - struct ib_umad_port *port = class_get_devdata(class_dev); + struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%s\n", port->ib_dev->name); } -static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static ssize_t show_port(struct class_device *class_dev, char *buf) +static ssize_t show_port(struct device *dev, struct device_attribute *attr, + char *buf) { - struct ib_umad_port *port = class_get_devdata(class_dev); + struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%d\n", port->port_num); } -static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static ssize_t show_abi_version(struct class *class, char *buf) +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_MAD_ABI_VERSION)); + +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); +static int find_overflow_devnum(void) { - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, + "infiniband_mad"); + if (ret) { + printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); + if (ret >= IB_UMAD_MAX_PORTS) + return -1; + + return ret; } -static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, + struct ib_umad_device *umad_dev, struct ib_umad_port *port) { + int devnum; + dev_t base; + spin_lock(&port_lock); - port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->dev_num >= IB_UMAD_MAX_PORTS) { + devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); - return -1; + devnum = find_overflow_devnum(); + if (devnum < 0) + return -1; + + spin_lock(&port_lock); + port->dev_num = devnum + IB_UMAD_MAX_PORTS; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + port->dev_num = devnum; + base = devnum + base_dev; + set_bit(devnum, dev_map); } - set_bit(port->dev_num, dev_map); spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; - init_MUTEX(&port->sm_sem); - init_rwsem(&port->mutex); + sema_init(&port->sm_sem, 1); + mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); - port->dev = cdev_alloc(); - if (!port->dev) - return -1; - port->dev->owner = THIS_MODULE; - port->dev->ops = &umad_fops; - kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num); - if (cdev_add(port->dev, base_dev + port->dev_num, 1)) + cdev_init(&port->cdev, &umad_fops); + port->cdev.owner = THIS_MODULE; + port->cdev.kobj.parent = &umad_dev->kobj; + kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); + if (cdev_add(&port->cdev, base, 1)) goto err_cdev; - port->class_dev = class_device_create(umad_class, NULL, port->dev->dev, - device->dma_device, - "umad%d", port->dev_num); - if (IS_ERR(port->class_dev)) + port->dev = device_create(umad_class, device->dma_device, + port->cdev.dev, port, + "umad%d", port->dev_num); + if (IS_ERR(port->dev)) goto err_cdev; - if (class_device_create_file(port->class_dev, &class_device_attr_ibdev)) - goto err_class; - if (class_device_create_file(port->class_dev, &class_device_attr_port)) - goto err_class; - - port->sm_dev = cdev_alloc(); - if (!port->sm_dev) - goto err_class; - port->sm_dev->owner = THIS_MODULE; - port->sm_dev->ops = &umad_sm_fops; - kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num); - if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + if (device_create_file(port->dev, &dev_attr_ibdev)) + goto err_dev; + if (device_create_file(port->dev, &dev_attr_port)) + goto err_dev; + + base += IB_UMAD_MAX_PORTS; + cdev_init(&port->sm_cdev, &umad_sm_fops); + port->sm_cdev.owner = THIS_MODULE; + port->sm_cdev.kobj.parent = &umad_dev->kobj; + kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); + if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; - port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev, - device->dma_device, - "issm%d", port->dev_num); - if (IS_ERR(port->sm_class_dev)) + port->sm_dev = device_create(umad_class, device->dma_device, + port->sm_cdev.dev, port, + "issm%d", port->dev_num); + if (IS_ERR(port->sm_dev)) goto err_sm_cdev; - class_set_devdata(port->class_dev, port); - class_set_devdata(port->sm_class_dev, port); - - if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev)) - goto err_sm_class; - if (class_device_create_file(port->sm_class_dev, &class_device_attr_port)) - goto err_sm_class; - - spin_lock(&port_lock); - umad_port[port->dev_num] = port; - spin_unlock(&port_lock); + if (device_create_file(port->sm_dev, &dev_attr_ibdev)) + goto err_sm_dev; + if (device_create_file(port->sm_dev, &dev_attr_port)) + goto err_sm_dev; return 0; -err_sm_class: - class_device_destroy(umad_class, port->sm_dev->dev); +err_sm_dev: + device_destroy(umad_class, port->sm_cdev.dev); err_sm_cdev: - cdev_del(port->sm_dev); + cdev_del(&port->sm_cdev); -err_class: - class_device_destroy(umad_class, port->dev->dev); +err_dev: + device_destroy(umad_class, port->cdev.dev); err_cdev: - cdev_del(port->dev); - clear_bit(port->dev_num, dev_map); + cdev_del(&port->cdev); + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); return -1; } @@ -850,57 +1098,35 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; - class_set_devdata(port->class_dev, NULL); - class_set_devdata(port->sm_class_dev, NULL); + dev_set_drvdata(port->dev, NULL); + dev_set_drvdata(port->sm_dev, NULL); - class_device_destroy(umad_class, port->dev->dev); - class_device_destroy(umad_class, port->sm_dev->dev); + device_destroy(umad_class, port->cdev.dev); + device_destroy(umad_class, port->sm_cdev.dev); - cdev_del(port->dev); - cdev_del(port->sm_dev); - - spin_lock(&port_lock); - umad_port[port->dev_num] = NULL; - spin_unlock(&port_lock); + cdev_del(&port->cdev); + cdev_del(&port->sm_cdev); - down_write(&port->mutex); + mutex_lock(&port->file_mutex); port->ib_dev = NULL; - /* - * Now go through the list of files attached to this port and - * unregister all of their MAD agents. We need to hold - * port->mutex while doing this to avoid racing with - * ib_umad_close(), but we can't hold the mutex for writing - * while calling ib_unregister_mad_agent(), since that might - * deadlock by calling back into queue_packet(). So we - * downgrade our lock to a read lock, and then drop and - * reacquire the write lock for the next iteration. - * - * We do list_del_init() on the file's list_head so that the - * list_del in ib_umad_close() is still OK, even after the - * file is removed from the list. - */ - while (!list_empty(&port->file_list)) { - file = list_entry(port->file_list.next, struct ib_umad_file, - port_list); - + list_for_each_entry(file, &port->file_list, port_list) { + mutex_lock(&file->mutex); file->agents_dead = 1; - list_del_init(&file->port_list); - - downgrade_write(&port->mutex); + mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) if (file->agent[id]) ib_unregister_mad_agent(file->agent[id]); - - up_read(&port->mutex); - down_write(&port->mutex); } - up_write(&port->mutex); + mutex_unlock(&port->file_mutex); - clear_bit(port->dev_num, dev_map); + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(port->dev_num, dev_map); + else + clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); } static void ib_umad_add_one(struct ib_device *device) @@ -908,7 +1134,10 @@ static void ib_umad_add_one(struct ib_device *device) struct ib_umad_device *umad_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; @@ -921,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device) if (!umad_dev) return; - kref_init(&umad_dev->ref); + kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); umad_dev->start_port = s; umad_dev->end_port = e; @@ -929,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device) for (i = s; i <= e; ++i) { umad_dev->port[i - s].umad_dev = umad_dev; - if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) + if (ib_umad_init_port(device, i, umad_dev, + &umad_dev->port[i - s])) goto err; } @@ -941,7 +1171,7 @@ err: while (--i >= s) ib_umad_kill_port(&umad_dev->port[i - s]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static void ib_umad_remove_one(struct ib_device *device) @@ -955,7 +1185,12 @@ static void ib_umad_remove_one(struct ib_device *device) for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) ib_umad_kill_port(&umad_dev->port[i]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); +} + +static char *umad_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_umad_init(void) @@ -976,7 +1211,9 @@ static int __init ib_umad_init(void) goto out_chrdev; } - ret = class_create_file(umad_class, &class_attr_abi_version); + umad_class->devnode = umad_devnode; + + ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -1005,6 +1242,8 @@ static void __exit ib_umad_cleanup(void) ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); } module_init(ib_umad_init); |
