diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2008-07-21 00:55:14 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2008-07-21 00:55:14 -0400 |
commit | 908cf4b925e419bc74f3297b2f0e51d6f8a81da2 (patch) | |
tree | 6c2da79366d4695a9c2560ab18259eca8a2a25b4 /drivers/infiniband | |
parent | 92c49890922d54cba4b1eadeb0b185773c2c9570 (diff) | |
parent | 14b395e35d1afdd8019d11b92e28041fad591b71 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into next
Diffstat (limited to 'drivers/infiniband')
107 files changed, 1829 insertions, 1207 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 781ea595037..09a2bec7fd3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -4,28 +4,33 @@ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include <linux/mutex.h> @@ -100,6 +105,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); + dev_addr->src_dev = dev; return 0; } EXPORT_SYMBOL(rdma_copy_addr); diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index fb9ed1489f9..6669287009c 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $ */ #ifndef __AGENT_H_ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e85f7013de5..68883565b72 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/module.h> diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index a47fe64e5c3..55738eead3b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include <linux/completion.h> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 671f1373805..ae11d5cc74d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4,29 +4,33 @@ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include <linux/completion.h> @@ -126,8 +130,7 @@ struct rdma_id_private { struct completion comp; atomic_t refcount; - wait_queue_head_t wait_remove; - atomic_t dev_remove; + struct mutex handler_mutex; int backlog; int timeout_ms; @@ -351,26 +354,15 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -static int cma_disable_remove(struct rdma_id_private *id_priv, +static int cma_disable_callback(struct rdma_id_private *id_priv, enum cma_state state) { - unsigned long flags; - int ret; - - spin_lock_irqsave(&id_priv->lock, flags); - if (id_priv->state == state) { - atomic_inc(&id_priv->dev_remove); - ret = 0; - } else - ret = -EINVAL; - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} - -static void cma_enable_remove(struct rdma_id_private *id_priv) -{ - if (atomic_dec_and_test(&id_priv->dev_remove)) - wake_up(&id_priv->wait_remove); + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != state) { + mutex_unlock(&id_priv->handler_mutex); + return -EINVAL; + } + return 0; } static int cma_has_cm_dev(struct rdma_id_private *id_priv) @@ -395,8 +387,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); - init_waitqueue_head(&id_priv->wait_remove); - atomic_set(&id_priv->dev_remove, 0); + mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -923,7 +914,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; - if (cma_disable_remove(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, CMA_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -970,7 +961,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -980,12 +971,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -998,6 +989,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, union cma_ip_addr *src, *dst; __be16 port; u8 ip_ver; + int ret; if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) @@ -1022,10 +1014,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA; + ret = rdma_translate_ip(&id->route.addr.src_addr, + &id->route.addr.dev_addr); + if (ret) + goto destroy_id; id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1095,7 +1088,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int offset, ret; listen_id = cm_id->context; - if (cma_disable_remove(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, CMA_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); @@ -1116,7 +1109,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto out; } - atomic_inc(&conn_id->dev_remove); + mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); mutex_lock(&lock); ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); @@ -1138,7 +1131,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) !cma_is_ud_ps(conn_id->id.ps)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); mutex_unlock(&lock); - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); goto out; } @@ -1147,11 +1140,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) release_conn_id: cma_exch(conn_id, CMA_DESTROYING); - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(&conn_id->id); out: - cma_enable_remove(listen_id); + mutex_unlock(&listen_id->handler_mutex); return ret; } @@ -1217,7 +1210,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr_in *sin; int ret = 0; - if (cma_disable_remove(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, CMA_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -1261,12 +1254,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -1282,7 +1275,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct ib_device_attr attr; listen_id = cm_id->context; - if (cma_disable_remove(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, CMA_LISTEN)) return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ @@ -1294,19 +1287,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); - atomic_inc(&conn_id->dev_remove); + mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = CMA_CONNECT; dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); if (ret) { - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } @@ -1315,7 +1308,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); if (ret) { - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } @@ -1331,7 +1324,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = ib_query_device(conn_id->id.device, &attr); if (ret) { - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } @@ -1347,14 +1340,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, CMA_DESTROYING); - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(&conn_id->id); + goto out; } + mutex_unlock(&conn_id->handler_mutex); + out: if (dev) dev_put(dev); - cma_enable_remove(listen_id); + mutex_unlock(&listen_id->handler_mutex); return ret; } @@ -1446,7 +1442,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(id, id_priv->backlog); if (ret) printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " - "listening on device %s", ret, cma_dev->device->name); + "listening on device %s\n", ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) @@ -1586,7 +1582,7 @@ static void cma_work_handler(struct work_struct *_work) struct rdma_id_private *id_priv = work->id; int destroy = 0; - atomic_inc(&id_priv->dev_remove); + mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; @@ -1595,7 +1591,7 @@ static void cma_work_handler(struct work_struct *_work) destroy = 1; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); @@ -1758,7 +1754,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_cm_event event; memset(&event, 0, sizeof event); - atomic_inc(&id_priv->dev_remove); + mutex_lock(&id_priv->handler_mutex); /* * Grab mutex to block rdma_destroy_id() from removing the device while @@ -1787,13 +1783,13 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); } @@ -2120,7 +2116,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - if (cma_disable_remove(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, CMA_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -2151,7 +2147,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = 0; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -2161,12 +2157,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -2564,8 +2560,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) int ret; id_priv = mc->id_priv; - if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) && - cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) + if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) && + cma_disable_callback(id_priv, CMA_ADDR_RESOLVED)) return 0; mutex_lock(&id_priv->qp_mutex); @@ -2590,12 +2586,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; } - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return 0; } @@ -2754,6 +2750,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; enum cma_state state; + int ret = 0; /* Record that we want to remove the device */ state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); @@ -2761,15 +2758,18 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) return 0; cma_cancel_operation(id_priv, state); - wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove)); + mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) - return 0; + goto out; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - return id_priv->id.event_handler(&id_priv->id, &event); + ret = id_priv->id.event_handler(&id_priv->id, &event); +out: + mutex_unlock(&id_priv->handler_mutex); + return ret; } static void cma_process_remove(struct cma_device *cma_dev) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 7ad47a4b166..05ac36e6acd 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef _CORE_PRIV_H diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5ac5ffee05c..7913b804311 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: device.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/module.h> diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 1286dc1b98b..4507043d24c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index fbe16d5250a..1adf2efd3cb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -747,7 +747,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: kmem_cache_free(ib_mad_cache, mad_priv); - break; + kfree(local); + ret = 1; + goto out; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 8b75010016e..05ce331733b 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index a5e2a310f31..d0ef7d61c03 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ */ #include "mad_priv.h" diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h index f0616fd2249..3d336bff114 100644 --- a/drivers/infiniband/core/mad_rmpp.h +++ b/drivers/infiniband/core/mad_rmpp.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $ */ #ifndef __MAD_RMPP_H__ diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index c972d723576..019bd4b0863 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index cf474ec2707..1341de793e5 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $ */ #include <linux/module.h> @@ -361,7 +359,7 @@ static void update_sm_ah(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, update_task); - struct ib_sa_sm_ah *new_ah, *old_ah; + struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct ib_ah_attr ah_attr; @@ -397,12 +395,9 @@ static void update_sm_ah(struct work_struct *work) } spin_lock_irq(&port->ah_lock); - old_ah = port->sm_ah; port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); - if (old_ah) - kref_put(&old_ah->ref, free_sm_ah); } static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) @@ -413,8 +408,17 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER) { - struct ib_sa_device *sa_dev; - sa_dev = container_of(handler, typeof(*sa_dev), event_handler); + unsigned long flags; + struct ib_sa_device *sa_dev = + container_of(handler, typeof(*sa_dev), event_handler); + struct ib_sa_port *port = + &sa_dev->port[event->element.port_num - sa_dev->start_port]; + + spin_lock_irqsave(&port->ah_lock, flags); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); + port->sm_ah = NULL; + spin_unlock_irqrestore(&port->ah_lock, flags); schedule_work(&sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); @@ -519,6 +523,10 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) unsigned long flags; spin_lock_irqsave(&query->port->ah_lock, flags); + if (!query->port->sm_ah) { + spin_unlock_irqrestore(&query->port->ah_lock, flags); + return -EAGAIN; + } kref_get(&query->port->sm_ah->ref); query->sm_ah = query->port->sm_ah; spin_unlock_irqrestore(&query->port->ah_lock, flags); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 95756551cf7..4d104211559 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $ */ #include "core_priv.h" @@ -665,6 +663,120 @@ static struct class ib_class = { .dev_uevent = ib_device_uevent, }; +/* Show a given an attribute in the statistics group */ +static ssize_t show_protocol_stat(const struct device *device, + struct device_attribute *attr, char *buf, + unsigned offset) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + union rdma_protocol_stats stats; + ssize_t ret; + + ret = dev->get_protocol_stats(dev, &stats); + if (ret) + return ret; + + return sprintf(buf, "%llu\n", + (unsigned long long) ((u64 *) &stats)[offset]); +} + +/* generate a read-only iwarp statistics attribute */ +#define IW_STATS_ENTRY(name) \ +static ssize_t show_##name(struct device *device, \ + struct device_attribute *attr, char *buf) \ +{ \ + return show_protocol_stat(device, attr, buf, \ + offsetof(struct iw_protocol_stats, name) / \ + sizeof (u64)); \ +} \ +static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +IW_STATS_ENTRY(ipInReceives); +IW_STATS_ENTRY(ipInHdrErrors); +IW_STATS_ENTRY(ipInTooBigErrors); +IW_STATS_ENTRY(ipInNoRoutes); +IW_STATS_ENTRY(ipInAddrErrors); +IW_STATS_ENTRY(ipInUnknownProtos); +IW_STATS_ENTRY(ipInTruncatedPkts); +IW_STATS_ENTRY(ipInDiscards); +IW_STATS_ENTRY(ipInDelivers); +IW_STATS_ENTRY(ipOutForwDatagrams); +IW_STATS_ENTRY(ipOutRequests); +IW_STATS_ENTRY(ipOutDiscards); +IW_STATS_ENTRY(ipOutNoRoutes); +IW_STATS_ENTRY(ipReasmTimeout); +IW_STATS_ENTRY(ipReasmReqds); +IW_STATS_ENTRY(ipReasmOKs); +IW_STATS_ENTRY(ipReasmFails); +IW_STATS_ENTRY(ipFragOKs); +IW_STATS_ENTRY(ipFragFails); +IW_STATS_ENTRY(ipFragCreates); +IW_STATS_ENTRY(ipInMcastPkts); +IW_STATS_ENTRY(ipOutMcastPkts); +IW_STATS_ENTRY(ipInBcastPkts); +IW_STATS_ENTRY(ipOutBcastPkts); +IW_STATS_ENTRY(tcpRtoAlgorithm); +IW_STATS_ENTRY(tcpRtoMin); +IW_STATS_ENTRY(tcpRtoMax); +IW_STATS_ENTRY(tcpMaxConn); +IW_STATS_ENTRY(tcpActiveOpens); +IW_STATS_ENTRY(tcpPassiveOpens); +IW_STATS_ENTRY(tcpAttemptFails); +IW_STATS_ENTRY(tcpEstabResets); +IW_STATS_ENTRY(tcpCurrEstab); +IW_STATS_ENTRY(tcpInSegs); +IW_STATS_ENTRY(tcpOutSegs); +IW_STATS_ENTRY(tcpRetransSegs); +IW_STATS_ENTRY(tcpInErrs); +IW_STATS_ENTRY(tcpOutRsts); + +static struct attribute *iw_proto_stats_attrs[] = { + &dev_attr_ipInReceives.attr, + &dev_attr_ipInHdrErrors.attr, + &dev_attr_ipInTooBigErrors.attr, + &dev_attr_ipInNoRoutes.attr, + &dev_attr_ipInAddrErrors.attr, + &dev_attr_ipInUnknownProtos.attr, + &dev_attr_ipInTruncatedPkts.attr, + &dev_attr_ipInDiscards.attr, + &dev_attr_ipInDelivers.attr, + &dev_attr_ipOutForwDatagrams.attr, + &dev_attr_ipOutRequests.attr, + &dev_attr_ipOutDiscards.attr, + &dev_attr_ipOutNoRoutes.attr, + &dev_attr_ipReasmTimeout.attr, + &dev_attr_ipReasmReqds.attr, + &dev_attr_ipReasmOKs.attr, + &dev_attr_ipReasmFails.attr, + &dev_attr_ipFragOKs.attr, + &dev_attr_ipFragFails.attr, + &dev_attr_ipFragCreates.attr, + &dev_attr_ipInMcastPkts.attr, + &dev_attr_ipOutMcastPkts.attr, + &dev_attr_ipInBcastPkts.attr, + &dev_attr_ipOutBcastPkts.attr, + &dev_attr_tcpRtoAlgorithm.attr, + &dev_attr_tcpRtoMin.attr, + &dev_attr_tcpRtoMax.attr, + &dev_attr_tcpMaxConn.attr, + &dev_attr_tcpActiveOpens.attr, + &dev_attr_tcpPassiveOpens.attr, + &dev_attr_tcpAttemptFails.attr, + &dev_attr_tcpEstabResets.attr, + &dev_attr_tcpCurrEstab.attr, + &dev_attr_tcpInSegs.attr, + &dev_attr_tcpOutSegs.attr, + &dev_attr_tcpRetransSegs.attr, + &dev_attr_tcpInErrs.attr, + &dev_attr_tcpOutRsts.attr, + NULL +}; + +static struct attribute_group iw_stats_group = { + .name = "proto_stats", + .attrs = iw_proto_stats_attrs, +}; + int ib_device_register_sysfs(struct ib_device *device) { struct device *class_dev = &device->dev; @@ -707,6 +819,12 @@ int ib_device_register_sysfs(struct ib_device *device) } } + if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { + ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); + if (ret) + goto err_put; + } + return 0; err_put: diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index d7a6881b571..9494005d1c9 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include <linux/completion.h> @@ -45,6 +43,7 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/mutex.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -1159,6 +1158,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; + cycle_kernel_lock(); file = kmalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ca4cf3a511a..195f97302fe 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -38,6 +38,7 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/miscdevice.h> +#include <linux/smp_lock.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -1156,6 +1157,7 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; + lock_kernel(); INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); @@ -1163,6 +1165,7 @@ static int ucma_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; + unlock_kernel(); return 0; } diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 997c07db6d8..8ec7876bedc 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe78f7d2509..6f7c096abf1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $ */ #include <linux/mm.h> @@ -150,7 +148,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = 0; while (npages) { ret = get_user_pages(current, current->mm, cur_base, - min_t(int, npages, + min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), 1, !umem->writable, page_list, vma_list); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 3aa2db54eae..268a2d23b7c 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include <linux/module.h> @@ -777,6 +775,19 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, } #endif +/* + * ib_umad_open() does not need the BKL: + * + * - umad_port[] accesses are protected by port_lock, the + * ib_umad_port structures are properly reference counted, and + * everything else is purely local to the file being created, so + * races against other open calls are not a problem; + * - the ioctl method does not affect any global state outside of the + * file structure being operated on; + * - the port is added to umad_port[] as the last part of module + * initialization so the open method will either immediately run + * -ENXIO, or all required initialization will be done. + */ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; @@ -1005,8 +1016,9 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(port->cdev, base_dev + port->dev_num, 1)) goto err_cdev; - port->dev = device_create(umad_class, device->dma_device, - port->cdev->dev, "umad%d", port->dev_num); + port->dev = device_create_drvdata(umad_class, device->dma_device, + port->cdev->dev, port, + "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; @@ -1024,15 +1036,12 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) goto err_sm_cdev; - port->sm_dev = device_create(umad_class, device->dma_device, - port->sm_cdev->dev, - "issm%d", port->dev_num); + port->sm_dev = device_create_drvdata(umad_class, device->dma_device, + port->sm_cdev->dev, port, + "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; - dev_set_drvdata(port->dev, port); - dev_set_drvdata(port->sm_dev, port); - if (device_create_file(port->sm_dev, &dev_attr_ibdev)) goto err_sm_dev; if (device_create_file(port->sm_dev, &dev_attr_port)) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 376a57ce1b4..b3ea9587dc8 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $ */ #ifndef UVERBS_H diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c3bff5fe86..56feab6c251 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ #include <linux/file.h> @@ -919,7 +917,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, resp->wc[i].opcode = wc[i].opcode; resp->wc[i].vendor_err = wc[i].vendor_err; resp->wc[i].byte_len = wc[i].byte_len; - resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; + resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data; resp->wc[i].qp_num = wc[i].qp->qp_num; resp->wc[i].src_qp = wc[i].src_qp; resp->wc[i].wc_flags = wc[i].wc_flags; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index cc1afa28c18..aeee856c406 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $ */ #include <linux/module.h> @@ -423,7 +421,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); - if (!file->async_file->is_closed) { + if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } @@ -610,6 +608,18 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) return file->device->ib_dev->mmap(file->ucontext, vma); } +/* + * ib_uverbs_open() does not need the BKL: + * + * - dev_table[] accesses are protected by map_lock, the + * ib_uverbs_device structures are properly reference counted, and + * everything else is purely local to the file being created, so + * races against other open calls are not a problem; + * - there is no ioctl method to race against; + * - the device is added to dev_table[] as the last part of module + * initialization, the open method will either immediately run + * -ENXIO, or all required initialization will be done. + */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; @@ -651,7 +661,6 @@ err_module: err: kref_put(&dev->ref, ib_uverbs_release_dev); - return ret; } @@ -755,14 +764,15 @@ static void ib_uverbs_add_one(struct ib_device *device) if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err_cdev; - uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev->dev, - "uverbs%d", uverbs_dev->devnum); + uverbs_dev->dev = device_create_drvdata(uverbs_class, + device->dma_device, + uverbs_dev->cdev->dev, + uverbs_dev, + "uverbs%d", + uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; - dev_set_drvdata(uverbs_dev->dev, uverbs_dev); - if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 05042089de6..a7da9be43e6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -34,8 +34,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/errno.h> @@ -317,7 +315,6 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, - [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { @@ -755,6 +752,52 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) +{ + struct ib_mr *mr; + + if (!pd->device->alloc_fast_reg_mr) + return ERR_PTR(-ENOSYS); + + mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_alloc_fast_reg_mr); + +struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device, + int max_page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + if (!device->alloc_fast_reg_page_list) + return ERR_PTR(-ENOSYS); + + page_list = device->alloc_fast_reg_page_list(device, max_page_list_len); + + if (!IS_ERR(page_list)) { + page_list->device = device; + page_list->max_page_list_len = max_page_list_len; + } + + return page_list; +} +EXPORT_SYMBOL(ib_alloc_fast_reg_page_list); + +void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + page_list->device->free_fast_reg_page_list(page_list); +} +EXPORT_SYMBOL(ib_free_fast_reg_page_list); + /* Memory windows */ struct ib_mw *ib_alloc_mw(struct ib_pd *pd) diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index 9a054c6941a..dd05c483564 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -454,9 +454,8 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) (IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_ZERO_STAG | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_SEND_W_INV); + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 3f441fc57c1..f6d5747153a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -145,7 +145,9 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) } wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, + T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, + T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -276,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->qpid) return -ENOMEM; - wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL); + wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL); if (!wq->rq) goto err1; @@ -300,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!kernel_domain) wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + (wq->qpid << rdev_p->qpshift); + wq->rdev = rdev_p; PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, wq->qpid, wq->doorbell, (unsigned long long) wq->udb); return 0; @@ -558,7 +561,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7); + T3_CTL_QP_TID, 7, T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -674,7 +677,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, Q_GENBIT(rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len); + wr_len, T3_SOPEOP); if (flag == T3_COMPLETION_FLAG) ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); len -= 96; @@ -816,6 +819,13 @@ int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) 0, 0); } +int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, + 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); +} + int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) { struct t3_rdma_init_wr *wqe; @@ -1257,13 +1267,16 @@ proc_cqe: wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); PDBG("%s completing sq idx %ld\n", __func__, Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = (wq->sq + - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id; + *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; wq->sq_rptr++; } else { PDBG("%s completing rq idx %ld\n", __func__, Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; + if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) + cxio_hal_pblpool_free(wq->rdev, + wq->rq[Q_PTR2IDX(wq->rq_rptr, + wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); wq->rq_rptr++; } diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 6e128f6bab0..656fe47bc84 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -45,15 +45,17 @@ #define T3_CTRL_QP_SIZE_LOG2 8 #define T3_CTRL_CQ_ID 0 -/* TBD */ #define T3_MAX_NUM_RI (1<<15) #define T3_MAX_NUM_QP (1<<15) #define T3_MAX_NUM_CQ (1<<15) #define T3_MAX_NUM_PD (1<<15) #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 +#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) +#define T3_MAX_CQ_DEPTH 8192 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL +#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ #define T3_STAG_UNSET 0xffffffff @@ -165,6 +167,7 @@ int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, u32 pbl_addr); int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); +int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index f1a25a821a4..04618f7bfbb 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -39,6 +39,9 @@ #define T3_MAX_SGE 4 #define T3_MAX_INLINE 64 +#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) +#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) +#define T3_STAG0_PAGE_SHIFT 15 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ @@ -72,7 +75,8 @@ enum t3_wr_opcode { T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP + T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, + T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR } __attribute__ ((packed)); enum t3_rdma_opcode { @@ -89,7 +93,8 @@ enum t3_rdma_opcode { T3_FAST_REGISTER, T3_LOCAL_INV, T3_QP_MOD, - T3_BYPASS + T3_BYPASS, + T3_RDMA_READ_REQ_WITH_INV, } __attribute__ ((packed)); static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) @@ -103,6 +108,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) case T3_WR_BIND: return T3_BIND_MW; case T3_WR_INIT: return T3_RDMA_INIT; case T3_WR_QP_MOD: return T3_QP_MOD; + case T3_WR_FASTREG: return T3_FAST_REGISTER; default: break; } return -1; @@ -170,11 +176,54 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; +#define T3_MAX_FASTREG_DEPTH 24 +#define T3_MAX_FASTREG_FRAG 10 + +struct t3_fastreg_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 len; + __be32 va_base_hi; /* 3 */ + __be32 va_base_lo_fbo; + __be32 page_type_perms; /* 4 */ + __be32 reserved1; + __be64 pbl_addrs[0]; /* 5+ */ +}; + +/* + * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. + */ +struct t3_pbl_frag { + struct fw_riwrh wrh; /* 0 */ + __be64 pbl_addrs[14]; /* 1..14 */ +}; + +#define S_FR_PAGE_COUNT 24 +#define M_FR_PAGE_COUNT 0xff +#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) +#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) + +#define S_FR_PAGE_SIZE 16 +#define M_FR_PAGE_SIZE 0x1f +#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) +#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) + +#define S_FR_TYPE 8 +#define M_FR_TYPE 0x1 +#define V_FR_TYPE(x) ((x) << S_FR_TYPE) +#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) + +#define S_FR_PERMS 0 +#define M_FR_PERMS 0xff +#define V_FR_PERMS(x) ((x) << S_FR_PERMS) +#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) + struct t3_local_inv_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ __be32 stag; /* 2 */ - __be32 reserved3; + __be32 reserved; }; struct t3_rdma_write_wr { @@ -193,7 +242,8 @@ struct t3_rdma_read_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ u8 rdmaop; /* 2 */ - u8 reserved[3]; + u8 local_inv; + u8 reserved[2]; __be32 rem_stag; __be64 rem_to; /* 3 */ __be32 local_stag; /* 4 */ @@ -201,18 +251,6 @@ struct t3_rdma_read_wr { __be64 local_to; /* 5 */ }; -enum t3_addr_type { - T3_VA_BASED_TO = 0x0, - T3_ZERO_BASED_TO = 0x1 -} __attribute__ ((packed)); - -enum t3_mem_perms { - T3_MEM_ACCESS_LOCAL_READ = 0x1, - T3_MEM_ACCESS_LOCAL_WRITE = 0x2, - T3_MEM_ACCESS_REM_READ = 0x4, - T3_MEM_ACCESS_REM_WRITE = 0x8 -} __attribute__ ((packed)); - struct t3_bind_mw_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ @@ -336,6 +374,11 @@ struct t3_genbit { __be64 genbit; }; +struct t3_wq_in_err { + u64 flit[13]; + u64 err; +}; + enum rdma_init_wr_flags { MPA_INITIATOR = (1<<0), PRIV_QP = (1<<1), @@ -346,13 +389,16 @@ union t3_wr { struct t3_rdma_write_wr write; struct t3_rdma_read_wr read; struct t3_receive_wr recv; + struct t3_fastreg_wr fastreg; + struct t3_pbl_frag pbl_frag; struct t3_local_inv_wr local_inv; struct t3_bind_mw_wr bind; struct t3_bypass_wr bypass; struct t3_rdma_init_wr init; struct t3_modify_qp_wr qp_mod; struct t3_genbit genbit; - u64 flit[16]; + struct t3_wq_in_err wq_in_err; + __be64 flit[16]; }; #define T3_SQ_CQE_FLIT 13 @@ -366,12 +412,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); } +enum t3_wr_hdr_bits { + T3_EOP = 1, + T3_SOP = 2, + T3_SOPEOP = T3_EOP|T3_SOP, +}; + static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len) + u8 len, u8 sopeop) { wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) | + V_FW_RIWR_SOPEOP(sopeop) | V_FW_RIWR_FLAGS(flags)); wmb(); wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | @@ -404,6 +456,7 @@ enum tpt_addr_type { }; enum tpt_mem_perm { + TPT_MW_BIND = 0x10, TPT_LOCAL_READ = 0x8, TPT_LOCAL_WRITE = 0x4, TPT_REMOTE_READ = 0x2, @@ -615,6 +668,11 @@ struct t3_swsq { int signaled; }; +struct t3_swrq { + __u64 wr_id; + __u32 pbl_addr; +}; + /* * A T3 WQ implements both the SQ and RQ. */ @@ -631,14 +689,15 @@ struct t3_wq { u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ u32 sq_rptr; /* pending wrs */ u32 sq_size_log2; /* sq size */ - u64 *rq; /* SW RQ (holds consumer wr_ids */ + struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ u32 rq_rptr; /* pending wrs */ - u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ + struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ u32 rq_size_log2; /* rq size */ u32 rq_addr; /* rq adapter address */ void __iomem *doorbell; /* kernel db */ u64 udb; /* user db if any */ + struct cxio_rdev *rdev; }; struct t3_cq { @@ -659,7 +718,7 @@ struct t3_cq { static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->flit[13] = 1; + wq->queue->wq_in_err.err = 1; } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 71554eacb13..4489c89d671 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -71,18 +71,16 @@ static void rnic_init(struct iwch_dev *rnicp) idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); - rnicp->attr.vendor_id = 0x168; - rnicp->attr.vendor_part_id = 7; rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = (1UL << 24) - 1; + rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; rnicp->attr.max_sge_per_wr = T3_MAX_SGE; rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1; + rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index d2409a505e8..3773453b2cf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -48,8 +48,6 @@ struct iwch_qp; struct iwch_mr; struct iwch_rnic_attributes { - u32 vendor_id; - u32 vendor_part_id; u32 max_qps; u32 max_wrs; /* Max for any SQ/RQ */ u32 max_sge_per_wr; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 4ee8ccd0a9e..cf5474ae68f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -81,6 +81,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, wc->wr_id = cookie; wc->qp = &qhp->ibqp; wc->vendor_err = CQE_STATUS(cqe); + wc->wc_flags = 0; PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " "lo 0x%x cookie 0x%llx\n", __func__, @@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, else wc->byte_len = 0; wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || + CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } } else { switch (CQE_OPCODE(cqe)) { case T3_RDMA_WRITE: @@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, break; case T3_SEND: case T3_SEND_WITH_SE: + case T3_SEND_WITH_INV: + case T3_SEND_WITH_SE_INV: wc->opcode = IB_WC_SEND; break; case T3_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; - /* these aren't supported yet */ - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: case T3_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; case T3_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; default: printk(KERN_ERR MOD "Unexpected opcode %d " "in the CQE received for QPID=0x%0x\n", diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 8934178a23e..b89640aa6e1 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -56,6 +56,7 @@ #include "iwch_provider.h" #include "iwch_cm.h" #include "iwch_user.h" +#include "common.h" static int iwch_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, @@ -747,6 +748,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) mhp->attr.type = TPT_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; + mhp->ibmw.rkey = stag; insert_handle(rhp, &rhp->mmidr, mhp, mmid); PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); @@ -768,6 +770,68 @@ static int iwch_dealloc_mw(struct ib_mw *mw) return 0; } +static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, pbl_depth); + if (ret) { + kfree(mhp); + return ERR_PTR(ret); + } + mhp->attr.pbl_size = pbl_depth; + ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) { + iwch_free_pbl(mhp); + kfree(mhp); + return ERR_PTR(ret); + } + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_NON_SHARED_MR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(rhp, &rhp->mmidr, mhp, mmid); + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +} + +static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( + struct ib_device *device, + int page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), + GFP_KERNEL); + if (!page_list) + return ERR_PTR(-ENOMEM); + + page_list->page_list = (u64 *)(page_list + 1); + page_list->max_page_list_len = page_list_len; + + return page_list; +} + +static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} + static int iwch_destroy_qp(struct ib_qp *ib_qp) { struct iwch_dev *rhp; @@ -843,6 +907,15 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, */ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); wqsize = roundup_pow_of_two(rqsize + sqsize); + + /* + * Kernel users need more wq space for fastreg WRs which can take + * 2 WR fragments. + */ + ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + if (!ucontext && wqsize < (rqsize + (2 * sqsize))) + wqsize = roundup_pow_of_two(rqsize + + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__, wqsize, sqsize, rqsize); qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); @@ -851,7 +924,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->wq.size_log2 = ilog2(wqsize); qhp->wq.rq_size_log2 = ilog2(rqsize); qhp->wq.sq_size_log2 = ilog2(sqsize); - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { kfree(qhp); @@ -935,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->ibqp.qp_num = qhp->wq.qpid; init_timer(&(qhp->timer)); PDBG("%s sq_num_entries %d, rq_num_entries %d " - "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n", + "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, - 1 << qhp->wq.size_log2); + 1 << qhp->wq.size_log2, qhp->wq.rq_addr); return &qhp->ibqp; } @@ -1023,6 +1095,29 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port, return 0; } +static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min, fw_mic; + + rtnl_lock(); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); + + next = info.fw_version + 1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_mic); + + return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | + (fw_mic & 0xffff); +} + static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -1033,7 +1128,10 @@ static int iwch_query_device(struct ib_device *ibdev, dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); + props->hw_ver = dev->rdev.t3cdev_p->type; + props->fw_ver = fw_vers_string_to_u64(dev); props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = dev->attr.mem_pgsizes_bitmask; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; props->max_mr_size = dev->attr.max_mr_size; @@ -1048,6 +1146,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_mr = dev->attr.max_mem_regs; props->max_pd = dev->attr.max_pds; props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; return 0; } @@ -1088,6 +1187,28 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } +static int fw_supports_fastreg(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min; + + rtnl_lock(); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); + + next = info.fw_version+1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + + PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min); + + return fw_maj > 6 || (fw_maj == 6 && fw_min > 0); +} + static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) { struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, @@ -1096,7 +1217,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; PDBG("%s dev 0x%p\n", __func__, dev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.fw_version); } @@ -1109,7 +1232,9 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; PDBG("%s dev 0x%p\n", __func__, dev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.driver); } @@ -1123,6 +1248,61 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, iwch_dev->rdev.rnic_info.pdev->device); } +static int iwch_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) +{ + struct iwch_dev *dev; + struct tp_mib_stats m; + int ret; + + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); + if (ret) + return -ENOSYS; + + memset(stats, 0, sizeof *stats); + stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + + m.ipInReceive_lo; + stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + + m.ipInHdrErrors_lo; + stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + + m.ipInAddrErrors_lo; + stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + + m.ipInUnknownProtos_lo; + stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + + m.ipInDiscards_lo; + stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + + m.ipInDelivers_lo; + stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + + m.ipOutRequests_lo; + stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + + m.ipOutDiscards_lo; + stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + + m.ipOutNoRoutes_lo; + stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; + stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; + stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; + stats->iw.ipReasmFails = (u64) m.ipReasmFails; + stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; + stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; + stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; + stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; + stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; + stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; + stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + + m.tcpInSegs_lo; + stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + + m.tcpOutSegs_lo; + stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + + m.tcpRetransSeg_lo; + stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + + m.tcpInErrs_lo; + stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; + stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; + return 0; +} + static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); @@ -1132,7 +1312,7 @@ static struct device_attribute *iwch_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, - &dev_attr_board_id + &dev_attr_board_id, }; int iwch_register_device(struct iwch_dev *dev) @@ -1145,8 +1325,12 @@ int iwch_register_device(struct iwch_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = - (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW); + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + + /* cxgb3 supports STag 0. */ + dev->ibdev.local_dma_lkey = 0; + if (fw_supports_fastreg(dev)) + dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1198,15 +1382,16 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.alloc_mw = iwch_alloc_mw; dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; - + dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; - dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; - + dev->ibdev.get_protocol_stats = iwch_get_mib; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 836163fc542..f5ceca05c43 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -296,14 +296,6 @@ static inline u32 iwch_ib_to_tpt_access(int acc) TPT_LOCAL_READ; } -static inline u32 iwch_ib_to_mwbind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) | - T3_MEM_ACCESS_LOCAL_READ; -} - enum iwch_mmid_state { IWCH_STAG_STATE_VALID, IWCH_STAG_STATE_INVALID diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 79dbe5beae5..9a3be3a9d5d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -33,10 +33,11 @@ #include "iwch.h" #include "iwch_cm.h" #include "cxio_hal.h" +#include "cxio_resource.h" #define NO_SUPPORT -1 -static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, u8 * flit_cnt) { int i; @@ -44,59 +45,44 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.rdmaop = T3_SEND_WITH_SE; else wqe->send.rdmaop = T3_SEND; wqe->send.rem_stag = 0; break; -#if 0 /* Not currently supported */ - case TYPE_SEND_INVALIDATE: - case TYPE_SEND_INVALIDATE_IMMEDIATE: - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - break; - case TYPE_SEND_SE_INVALIDATE: - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE_INV; + else + wqe->send.rdmaop = T3_SEND_WITH_INV; + wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); break; -#endif default: - break; + return -EINVAL; } if (wr->num_sge > T3_MAX_SGE) return -EINVAL; wqe->send.reserved[0] = 0; wqe->send.reserved[1] = 0; wqe->send.reserved[2] = 0; - if (wr->opcode == IB_WR_SEND_WITH_IMM) { - plen = 4; - wqe->send.sgl[0].stag = wr->ex.imm_data; - wqe->send.sgl[0].len = __constant_cpu_to_be32(0); - wqe->send.num_sgle = __constant_cpu_to_be32(0); - *flit_cnt = 5; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) + return -EMSGSIZE; + + plen += wr->sg_list[i].length; + wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); + wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } + wqe->send.num_sgle = cpu_to_be32(wr->num_sge); + *flit_cnt = 4 + ((wr->num_sge) << 1); wqe->send.plen = cpu_to_be32(plen); return 0; } -static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { int i; @@ -137,15 +123,18 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; wqe->read.rdmaop = T3_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + wqe->read.local_inv = 1; + else + wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.reserved[2] = 0; wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); @@ -155,6 +144,57 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } +static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +{ + int i; + __be64 *p; + + if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + return -EINVAL; + *wr_cnt = 1; + wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.va_base_lo_fbo = + cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + wqe->fastreg.page_type_perms = cpu_to_be32( + V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | + V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_TYPE(TPT_VATO) | + V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + p = &wqe->fastreg.pbl_addrs[0]; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + + /* If we need a 2nd WR, then set it up */ + if (i == T3_MAX_FASTREG_FRAG) { + *wr_cnt = 2; + wqe = (union t3_wr *)(wq->queue + + Q_PTR2IDX((wq->wptr+1), wq->size_log2)); + build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, + Q_GENBIT(wq->wptr + 1, wq->size_log2), + 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + T3_EOP); + + p = &wqe->pbl_frag.pbl_addrs[0]; + } + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + } + *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + if (*flit_cnt > 15) + *flit_cnt = 15; + return 0; +} + +static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->local_inv.reserved = 0; + *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; + return 0; +} + /* * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. */ @@ -205,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, return 0; } -static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, +static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, struct ib_recv_wr *wr) { - int i; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; + int i, err = 0; + u32 pbl_addr[T3_MAX_SGE]; + u8 page_size[T3_MAX_SGE]; + + err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, + page_size); + if (err) + return err; + wqe->recv.pagesz[0] = page_size[0]; + wqe->recv.pagesz[1] = page_size[1]; + wqe->recv.pagesz[2] = page_size[2]; + wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + + /* to in the WQE == the offset into the page */ + wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % + (1UL << (12 + page_size[i]))); + + /* pbl_addr is the adapters address in the PBL */ + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = 0; + return 0; +} + +static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i; + u32 pbl_addr; + u32 pbl_offset; + + + /* + * The T3 HW requires the PBL in the HW recv descriptor to reference + * a PBL entry. So we allocate the max needed PBL memory here and pass + * it to the uP in the recv WR. The uP will build the PBL and setup + * the HW recv descriptor. + */ + pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); + if (!pbl_addr) + return -ENOMEM; + + /* + * Compute the 8B aligned offset. + */ + pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; + + wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + + /* + * Use a 128MB page size. This and an imposed 128MB + * sge length limit allows us to require only a 2-entry HW + * PBL for each SGE. This restriction is acceptable since + * since it is not possible to allocate 128MB of contiguous + * DMA coherent memory! + */ + if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) + return -EINVAL; + wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; + + /* + * T3 restricts a recv to all zero-stag or all non-zero-stag. + */ + if (wr->sg_list[i].lkey != 0) + return -EINVAL; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); + pbl_offset += 2; } for (; i < T3_MAX_SGE; i++) { + wqe->recv.pagesz[i] = 0; wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; return 0; } @@ -229,7 +352,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; - u8 t3_wr_flit_cnt; + u8 uninitialized_var(t3_wr_flit_cnt); enum t3_wr_opcode t3_wr_opcode = 0; enum t3_wr_flags t3_wr_flags; struct iwch_qp *qhp; @@ -238,6 +361,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u32 num_wrs; unsigned long flag; struct t3_swsq *sqp; + int wr_cnt = 1; qhp = to_iwch_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); @@ -262,33 +386,45 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t3_wr_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) t3_wr_flags |= T3_COMPLETION_FLAG; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_READ_FENCE_FLAG; t3_wr_opcode = T3_WR_SEND; - err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: t3_wr_opcode = T3_WR_WRITE; - err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: t3_wr_opcode = T3_WR_READ; t3_wr_flags = 0; /* T3 reads are always signaled */ - err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); if (err) break; sqp->read_len = wqe->read.local_len; if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; + case IB_WR_FAST_REG_MR: + t3_wr_opcode = T3_WR_FASTREG; + err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_LOCAL_FENCE_FLAG; + t3_wr_opcode = T3_WR_INV_STAG; + err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); + break; default: PDBG("%s post of type=%d TBD!\n", __func__, wr->opcode); @@ -307,14 +443,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt); + 0, t3_wr_flit_cnt, + (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", __func__, (unsigned long long) wr->wr_id, idx, Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), sqp->opcode); wr = wr->next; num_wrs--; - ++(qhp->wq.wptr); + qhp->wq.wptr += wr_cnt; ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); @@ -345,21 +482,27 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } while (wr) { + if (wr->num_sge > T3_MAX_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); wqe = (union t3_wr *) (qhp->wq.queue + idx); if (num_wrs) - err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); + if (wr->sg_list[0].lkey) + err = build_rdma_recv(qhp, wqe, wr); + else + err = build_zero_stag_recv(qhp, wqe, wr); else err = -ENOMEM; if (err) { *bad_wr = wr; break; } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = - wr->wr_id; build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3); + 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " "wqe %p \n", __func__, (unsigned long long) wr->wr_id, idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); @@ -419,10 +562,10 @@ int iwch_bind_mw(struct ib_qp *qp, sgl.lkey = mw_bind->mr->lkey; sgl.length = mw_bind->length; wqe->bind.reserved = 0; - wqe->bind.type = T3_VA_BASED_TO; + wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); + wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags); wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); wqe->bind.mw_len = cpu_to_be32(mw_bind->length); @@ -430,7 +573,7 @@ int iwch_bind_mw(struct ib_qp *qp, err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { spin_unlock_irqrestore(&qhp->lock, flag); - return err; + return err; } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); @@ -441,10 +584,9 @@ int iwch_bind_mw(struct ib_qp *qp, sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); wqe->bind.mr_pagesz = page_size; - wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, - sizeof(struct t3_bind_mw_wr) >> 3); + sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP); ++(qhp->wq.wptr); ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); @@ -758,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.rqe_count = iwch_rqes_posted(qhp); init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; + if (!qhp->ibqp.uobject) + init_attr.flags |= PRIV_QP; if (peer2peer) { init_attr.rtr_type = RTR_READ; if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index ce1ab0571be..0792d930c48 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -531,7 +531,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) { struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value; + u64 eqe_value, ret; unsigned long flags; int eqe_cnt, i; int eq_empty = 0; @@ -583,8 +583,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ehca_dbg(&shca->ib_device, "No eqe found for irq event"); goto unlock_irq_spinlock; - } else if (!is_irq) + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %ld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); /* enable irq for new packets */ diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 482103eb6ea..598844d2edc 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] = }, {}, }; +MODULE_DEVICE_TABLE(of, ehca_device_table); static struct of_platform_driver ehca_driver = { .name = "ehca", diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index bbe0436f4f7..dd9bc68f1c7 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -421,8 +421,10 @@ int ehca_post_send(struct ib_qp *qp, int ret = 0; unsigned long flags; - if (unlikely(my_qp->state != IB_QPS_RTS)) { - ehca_err(qp->device, "QP not in RTS state qpn=%x", qp->qp_num); + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); return -EINVAL; } @@ -542,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), - qp->device, recv_wr, bad_recv_wr); + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); } int ehca_post_srq_recv(struct ib_srq *srq, @@ -679,7 +689,7 @@ poll_cq_one_read_cqe: wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; wc->wc_flags = cqe->w_completion_flags; - wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; poll_cq_one_exit0: diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5245e13c3a3..415d3a465de 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, r_cb, 0, 0, 0, 0); } + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 60ce02b7066..2c3c6e0ea5c 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, const u64 ressource_handle, void *rblock, unsigned long *byte_count); +u64 hipz_h_eoi(int irq); #endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index a03bd28d9b4..d385e4168c9 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) wc->uqueue[head].opcode = entry->opcode; wc->uqueue[head].vendor_err = entry->vendor_err; wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; + wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; wc->uqueue[head].qp_num = entry->qp->qp_num; wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index b472b15637f..35f301c88b5 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -39,6 +39,7 @@ #include <linux/highmem.h> #include <linux/io.h> #include <linux/jiffies.h> +#include <linux/smp_lock.h> #include <asm/pgtable.h> #include "ipath_kernel.h" @@ -1815,6 +1816,7 @@ done: static int ipath_open(struct inode *in, struct file *fp) { /* The real work is performed later in ipath_assign_port() */ + cycle_kernel_lock(); fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); return fp->private_data ? 0 : -ENOMEM; } diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index 8eee7830f04..fb70712ac85 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -2228,8 +2228,8 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which) 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40000001, 0x1388, 0x15e, /* rest 0's */ }; - dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]); - hcnt = sizeof(hdr)/sizeof(hdr[0]); + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); if (!swapped) { /* for maintainability, do it at runtime */ for (i = 0; i < hcnt; i++) { diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 59a8b254b97..0bd8bcb184a 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -232,6 +232,11 @@ struct ipath_sdma_desc { #define IPATH_SDMA_TXREQ_S_ABORTED 2 #define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 +#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) +#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) +#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) +#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) + /* max dwords in small buffer packet */ #define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 1ff46ae7dd9..be4fc9ada8e 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; vendor = dd->ipath_vendorid; - nip->vendor_id[0] = 0; - nip->vendor_id[1] = vendor >> 8; - nip->vendor_id[2] = vendor; + nip->vendor_id[0] = IPATH_SRC_OUI_1; + nip->vendor_id[1] = IPATH_SRC_OUI_2; + nip->vendor_id[2] = IPATH_SRC_OUI_3; return reply(smp); } @@ -1492,6 +1492,10 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, goto bail; } + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_TRAP_REPRESS: case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 108df667d2e..97710522624 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1703,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index a4b5521567f..af051f75766 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -331,7 +331,7 @@ again: switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.ex.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: if (!ipath_get_rwqe(qp, 0)) @@ -342,7 +342,7 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.ex.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 3697449c1ba..eaba03273e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -263,14 +263,10 @@ static void sdma_abort_task(unsigned long opaque) hwstatus = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); - if (/* ScoreBoardDrainInProg */ - test_bit(63, &hwstatus) || - /* AbortInProg */ - test_bit(62, &hwstatus) || - /* InternalSDmaEnable */ - test_bit(61, &hwstatus) || - /* ScbEmpty */ - !test_bit(30, &hwstatus)) { + if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | + IPATH_SDMA_STATUS_ABORT_IN_PROG | + IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || + !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { if (dd->ipath_sdma_reset_wait > 0) { /* not done shutting down sdma */ --dd->ipath_sdma_reset_wait; @@ -345,7 +341,7 @@ resched: * state change */ if (jiffies > dd->ipath_sdma_abort_jiffies) { - ipath_dbg("looping with status 0x%016llx\n", + ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; } @@ -615,7 +611,7 @@ void ipath_restart_sdma(struct ipath_devdata *dd) } spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); if (!needed) { - ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n", + ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", dd->ipath_sdma_status); goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 7fd18e83390..82cc588b8bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -379,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -407,12 +407,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - /* XXX Need to free SGEs */ + wc.opcode = IB_WC_RECV; last_imm: ipath_copy_sge(&qp->r_sge, data, tlen); wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; @@ -484,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): rdma_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -514,6 +513,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto done; } wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; goto last_imm; case OP(RDMA_WRITE_LAST): diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 77ca8ca74e7..36aa242c487 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -96,7 +96,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = swqe->wr.ex.imm_data; + wc.ex.imm_data = swqe->wr.ex.imm_data; } /* @@ -492,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else - wc.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; hdrsize += sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.imm_data = 0; + wc.ex.imm_data = 0; wc.wc_flags = 0; } else { dev->n_pkt_drops++; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index e0ec540042b..55c71882882 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <linux/io.h> #include <linux/utsname.h> +#include <linux/rculist.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -1494,9 +1495,11 @@ static int ipath_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID; + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; props->page_size_cap = PAGE_SIZE; - props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_id = + IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; props->vendor_part_id = dev->dd->ipath_deviceid; props->hw_ver = dev->dd->ipath_pcirev; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index 9e5abf9c309..d73e3223287 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -31,8 +31,7 @@ * SOFTWARE. */ -#include <linux/list.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 4521319b140..299f20832ab 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -663,18 +663,18 @@ repoll: switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: - wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: - wc->opcode = IB_WC_RECV; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; break; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 4c1e72fc8f5..cdca3a511e1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -255,7 +255,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) { + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4d61e32866c..bcf50648fa1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -90,7 +90,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN; + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -437,7 +438,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + &to_mqp(ibqp)->mqp, gid->raw, + !!(to_mqp(ibqp)->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 5cf994794d2..c4cf5b69eef 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -101,7 +101,8 @@ struct mlx4_ib_wq { }; enum mlx4_ib_qp_flags { - MLX4_IB_QP_LSO = 1 << 0 + MLX4_IB_QP_LSO = 1 << 0, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; struct mlx4_ib_qp { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8e02ecfec18..89eb6cbe592 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -129,9 +129,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) int ind; void *buf; __be32 stamp; + struct mlx4_wqe_ctrl_seg *ctrl; - s = roundup(size, 1U << qp->sq.wqe_shift); if (qp->sq_max_wqes_per_wr > 1) { + s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : @@ -141,7 +142,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) *wqe = stamp; } } else { - buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); @@ -333,6 +335,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + send_wqe_overhead(type, qp->flags); + if (s > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; + /* * Hermon supports shrinking WQEs, such that a single work * request can include multiple units of 1 << wqe_shift. This @@ -372,9 +377,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); for (;;) { - if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz) - return -EINVAL; - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); /* @@ -395,7 +397,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, ++qp->sq.wqe_shift; } - qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - + qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, + (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); @@ -411,7 +414,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; - cap->max_send_sge = qp->sq.max_gs; + cap->max_send_sge = min(qp->sq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); /* We don't support inline sends for kernel QPs (yet) */ cap->max_inline_data = 0; @@ -449,19 +454,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->rq.lock); qp->state = IB_QPS_RESET; - qp->atomic_rd_en = 0; - qp->resp_depth = 0; - - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - else - qp->sq_signal_bits = 0; err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) @@ -506,6 +500,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; @@ -679,10 +676,15 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp; int err; - /* We only support LSO, and only for kernel UD QPs. */ - if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO) + /* + * We only support LSO and multicast loopback blocking, and + * only for kernel UD QPs. + */ + if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) return ERR_PTR(-EINVAL); - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO && + + if (init_attr->create_flags && (pd->uobject || init_attr->qp_type != IB_QPT_UD)) return ERR_PTR(-EINVAL); @@ -691,7 +693,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: { - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -712,7 +714,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (pd->uobject) return ERR_PTR(-EINVAL); - sqp = kmalloc(sizeof *sqp, GFP_KERNEL); + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); if (!sqp) return ERR_PTR(-ENOMEM); @@ -903,7 +905,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, attr->path_mtu); goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | 31; + context->mtu_msgmax = (attr->path_mtu << 5) | + ilog2(dev->dev->caps.max_msg_sz); } if (qp->rq.wqe_cnt) @@ -1060,6 +1063,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); + if (qp->sq_max_wqes_per_wr == 1) + ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } @@ -1124,23 +1129,6 @@ out: return err; } -static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; -static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -1183,15 +1171,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, - mlx4_ib_qp_attr_mask_table[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: @@ -1457,7 +1436,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); - unsigned seglen; + unsigned uninitialized_var(seglen); int i; spin_lock_irqsave(&qp->sq.lock, flags); @@ -1862,6 +1841,13 @@ done: qp_init_attr->cap = qp_attr->cap; + qp_init_attr->create_flags = 0; + if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + if (qp->flags & MLX4_IB_QP_LSO) + qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + out: mutex_unlock(&qp->mutex); return err; diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index a7630670961..c5ccc2daab6 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 4b111a852ff..32f6c631545 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index e948158a28d..cc440f90000 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <linux/jiffies.h> @@ -128,7 +126,6 @@ static void handle_catas(struct mthca_dev *dev) static void poll_catas(unsigned long dev_ptr) { struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; - unsigned long flags; int i; for (i = 0; i < dev->catas_err.size; ++i) @@ -137,13 +134,8 @@ static void poll_catas(unsigned long dev_ptr) return; } - spin_lock_irqsave(&catas_lock, flags); - if (!dev->catas_err.stop) - mod_timer(&dev->catas_err.timer, - jiffies + MTHCA_CATAS_POLL_INTERVAL); - spin_unlock_irqrestore(&catas_lock, flags); - - return; + mod_timer(&dev->catas_err.timer, + round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL)); } void mthca_start_catas_poll(struct mthca_dev *dev) @@ -151,7 +143,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev) unsigned long addr; init_timer(&dev->catas_err.timer); - dev->catas_err.stop = 0; dev->catas_err.map = NULL; addr = pci_resource_start(dev->pdev, 0) + @@ -182,10 +173,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev) void mthca_stop_catas_poll(struct mthca_dev *dev) { - spin_lock_irq(&catas_lock); - dev->catas_err.stop = 1; - spin_unlock_irq(&catas_lock); - del_timer_sync(&dev->catas_err.timer); if (dev->catas_err.map) { diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 54d230ee7d6..c33e1c53c79 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/completion.h> diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 8928ca4a932..6efd3265f24 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_CMD_H diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index afa56bfaab2..75671f75cac 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_CONFIG_REG_H diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 20401d2ba6b..d9f4735c2b3 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $ */ #include <linux/hardirq.h> @@ -622,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev, case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; entry->opcode = IB_WC_RECV; break; case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; break; default: diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bc32f8e377..ee4d073c889 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_DEV_H @@ -279,7 +277,6 @@ struct mthca_mcg_table { struct mthca_catas_err { u64 addr; u32 __iomem *map; - unsigned long stop; u32 size; struct timer_list timer; struct list_head list; diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index b374dc395be..14f51ef97d7 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/types.h> diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8bde7f98e58..4e36aa7cb3d 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 8b7e83e6e88..640449582ab 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 9ebadd6e0cf..fb9f91b60f3 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $ */ #include <linux/module.h> @@ -45,6 +43,7 @@ #include "mthca_cmd.h" #include "mthca_profile.h" #include "mthca_memfree.h" +#include "mthca_wqe.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); @@ -200,7 +199,18 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) mdev->limits.gid_table_len = dev_lim->max_gids; mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; - mdev->limits.max_sg = dev_lim->max_sg; + /* + * Need to allow for worst case send WQE overhead and check + * whether max_desc_sz imposes a lower limit than max_sg; UD + * send has the biggest overhead. + */ + mdev->limits.max_sg = min_t(int, dev_lim->max_sg, + (dev_lim->max_desc_sz - + sizeof (struct mthca_next_seg) - + (mthca_is_memfree(mdev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg))) / + sizeof (struct mthca_data_seg)); mdev->limits.max_wqes = dev_lim->max_qp_sz; mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a8ad072be07..3f5f9487920 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index b224079d4e1..1f7d1a29d2a 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <linux/mm.h> @@ -109,7 +107,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m { struct page *page; - page = alloc_pages(gfp_mask, order); + /* + * Use __GFP_ZERO because buggy firmware assumes ICM pages are + * cleared, and subtle failures are seen if they aren't. + */ + page = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!page) return -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index a1ab06847b7..da9b8f9b884 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #ifndef MTHCA_MEMFREE_H diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 820205dec56..8489b1e81c0 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/slab.h> diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index c1e950764bd..266f14e4740 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 605a8d57fac..d168c254061 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/module.h> diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index e76cb62d8e3..62b009cc873 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_PROFILE_H diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index be34f99ca62..87ad889e367 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $ */ #include <rdma/ib_smi.h> diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 934bf954403..c621f8794b8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_PROVIDER_H diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 09dc3614cf2..f5081bfde6d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $ */ #include <linux/string.h> @@ -850,23 +848,6 @@ out: return err; } -static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 }; -static const int dummy_init_attr_mask[] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -928,15 +909,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mthca_modify_qp(ibqp, &dummy_init_attr, - dummy_init_attr_mask[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: @@ -1277,10 +1249,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, return -EINVAL; /* - * For MLX transport we need 2 extra S/G entries: + * For MLX transport we need 2 extra send gather entries: * one for the header and one for the checksum at the end */ - if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) + if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 91934f2d9db..acb6817f606 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/init.h> diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index a5ffff6e102..4fabe62aab8 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ */ #include <linux/slab.h> diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c index 8b728486410..ca5900c96fc 100644 --- a/drivers/infiniband/hw/mthca/mthca_uar.c +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <asm/page.h> /* PAGE_SHIFT */ diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index e1262c942db..5fe56e81073 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -29,7 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * */ #ifndef MTHCA_USER_H diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index b3551a8dea1..341a5ae881c 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $ */ #ifndef MTHCA_WQE_H diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a4e9269a29b..d2884e77809 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -328,7 +328,7 @@ void nes_rem_ref(struct ib_qp *ibqp) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); u64temp = (u64)nesqp->nesqp_context_pbase; set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); } } diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 61b46e9c7d2..39bd897b40c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -94,9 +94,6 @@ #define MAX_DPC_ITERATIONS 128 -#define NES_CQP_REQUEST_NO_DOORBELL_RING 0 -#define NES_CQP_REQUEST_RING_DOORBELL 1 - #define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 #define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 #define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 @@ -538,7 +535,11 @@ void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); -void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *); int nes_arp_table(struct nes_device *, u32, u8 *, u32); void nes_mh_fix(unsigned long); void nes_clc(unsigned long); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9a4b40fae40..6aa531d5276 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1603,7 +1603,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, return NULL; } - memset(listener, 0, sizeof(struct nes_cm_listener)); listener->loc_addr = htonl(cm_info->loc_addr); listener->loc_port = htons(cm_info->loc_port); listener->reused_node = 0; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index d3278f111ca..85f26d19a32 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -398,7 +398,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->base_pd = 1; nesadapter->device_cap_flags = - IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW; + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); @@ -2710,39 +2710,11 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) barrier(); cqp_request->request_done = 1; wake_up(&cqp_request->waitq); - if (atomic_dec_and_test(&cqp_request->refcount)) { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - } else if (cqp_request->callback) { - /* Envoke the callback routine */ - cqp_request->cqp_callback(nesdev, cqp_request); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } + nes_put_cqp_request(nesdev, cqp_request); } else { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } + if (cqp_request->callback) + cqp_request->cqp_callback(nesdev, cqp_request); + nes_free_cqp_request(nesdev, cqp_request); } } else { wake_up(&nesdev->cqp.waitq); @@ -3149,7 +3121,6 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, { struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; - unsigned long flags; struct nes_cqp_request *cqp_request; int ret = 0; u16 major_code; @@ -3176,7 +3147,7 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); if (add_port == NES_MANAGE_APBVT_ADD) ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -3184,15 +3155,9 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -3252,7 +3217,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); } @@ -3262,7 +3227,6 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, u32 which_wq, u32 wait_completion) { - unsigned long flags; struct nes_cqp_request *cqp_request; struct nes_hw_cqp_wqe *cqp_wqe; int ret; @@ -3285,7 +3249,7 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); if (wait_completion) { /* Wait for CQP */ @@ -3294,14 +3258,6 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," " CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); } } diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 745bf94f3f0..7b81e0ae007 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1172,7 +1172,7 @@ struct nes_vnic { u32 mcrq_qp_id; struct nes_ucontext *mcrq_ucontext; struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev); - void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int); + void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *); int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr ); struct net_device_stats netstats; /* used to put the netdev on the adapters logical port list */ diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index fe83d1b2b17..fb8cbd71a2e 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -567,12 +567,36 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) return cqp_request; } +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + unsigned long flags; + + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); + + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } +} + +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + if (atomic_dec_and_test(&cqp_request->refcount)) + nes_free_cqp_request(nesdev, cqp_request); +} /** * nes_post_cqp_request */ void nes_post_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request, int ring_doorbell) + struct nes_cqp_request *cqp_request) { struct nes_hw_cqp_wqe *cqp_wqe; unsigned long flags; @@ -600,10 +624,9 @@ void nes_post_cqp_request(struct nes_device *nesdev, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, cqp_request->waiting, atomic_read(&cqp_request->refcount)); barrier(); - if (ring_doorbell) { - /* Ring doorbell (1 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); - } + + /* Ring doorbell (1 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); barrier(); } else { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 99b3c4ae86e..e3939d13484 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -55,7 +55,6 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); * nes_alloc_mw */ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { - unsigned long flags; struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -119,7 +118,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -128,15 +127,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { " CQP Major:Minor codes = 0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); if (!ret) { @@ -144,17 +135,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { } else { return ERR_PTR(-ENOMEM); } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); nesmr->ibmw.rkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MW; @@ -178,7 +160,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; int err = 0; - unsigned long flags; int ret; /* Deallocate the window with the adapter */ @@ -194,7 +175,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", @@ -204,32 +185,12 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," " CQP Major:Minor codes = 0x%04X:0x%04X.\n", ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - if (!ret) { - err = -ETIME; - } else { - err = -EIO; - } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - } + if (!ret) + err = -ETIME; + else if (cqp_request->major_code) + err = -EIO; + + nes_put_cqp_request(nesdev, cqp_request); nes_free_resource(nesadapter, nesadapter->allocated_mrs, (ibmw->rkey & 0x0fffff00) >> 8); @@ -516,7 +477,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -526,29 +487,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, stag, ret, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); ret = (!ret) ? -ETIME : -EIO; goto failed_leaf_vpbl_pages_alloc; - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } - + nes_put_cqp_request(nesdev, cqp_request); nesfmr->nesmr.ibfmr.lkey = stag; nesfmr->nesmr.ibfmr.rkey = stag; nesfmr->attr = *ibfmr_attr; @@ -1474,7 +1417,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", @@ -1487,15 +1430,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); nes_free_qp_mem(nesdev, nesqp,virt_wqs); kfree(nesqp->allocated_buffer); @@ -1504,18 +1439,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } else { return ERR_PTR(-EIO); } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); + if (ibpd->uobject) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.actual_sq_size = sq_size; @@ -1817,7 +1744,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", @@ -1827,32 +1754,15 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", nescq->hw_cq.cq_number, ret); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-EIO); - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); if (context) { /* free the nespbl */ @@ -1931,7 +1841,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", @@ -1942,37 +1852,18 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) " CQP Major:Minor codes = 0x%04X:0x%04X.\n", nescq->hw_cq.cq_number, ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - if (!ret) { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", + if (!ret) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", nescq->hw_cq.cq_number); - ret = -ETIME; - } else { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", + ret = -ETIME; + } else if (cqp_request->major_code) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", nescq->hw_cq.cq_number); - ret = -EIO; - } + ret = -EIO; } else { ret = 0; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); if (nescq->cq_mem_size) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, @@ -2096,7 +1987,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, barrier(); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), @@ -2105,15 +1996,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, " CQP Major:Minor codes = 0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -2456,10 +2340,8 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) goto enough_pages; if ((page_count&0x01FF) == 0) { - if (page_count>(1024*512)) { + if (page_count >= 1024 * 512) { ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); kfree(nesmr); @@ -2756,7 +2638,7 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); @@ -2773,15 +2655,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) major_code = cqp_request->major_code; minor_code = cqp_request->minor_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) { nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," " ib_mr=%p, rkey = 0x%08X\n", @@ -2906,7 +2782,6 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, /* struct iw_cm_id *cm_id = nesqp->cm_id; */ /* struct iw_cm_event cm_event; */ struct nes_cqp_request *cqp_request; - unsigned long flags; int ret; u16 major_code; @@ -2934,7 +2809,7 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ if (wait_completion) { @@ -2952,15 +2827,9 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, nesqp->hwqp.qp_id, cqp_request->major_code, cqp_request->minor_code, next_iwarp_state); } - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 1f76bad020f..691525cf394 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" depends on NETDEVICES && INET && (IPV6 || IPV6=n) + select INET_LRO ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca126fc2b85..b0ffc9abe8c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ */ #ifndef _IPOIB_H @@ -52,9 +50,16 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> #include <rdma/ib_sa.h> +#include <linux/inet_lro.h> /* constants */ +enum ipoib_flush_level { + IPOIB_FLUSH_LIGHT, + IPOIB_FLUSH_NORMAL, + IPOIB_FLUSH_HEAVY +}; + enum { IPOIB_ENCAP_LEN = 4, @@ -65,8 +70,8 @@ enum { IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, - IPOIB_RX_RING_SIZE = 128, - IPOIB_TX_RING_SIZE = 64, + IPOIB_RX_RING_SIZE = 256, + IPOIB_TX_RING_SIZE = 128, IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_CM_MAX_CONN_QP = 4096, @@ -84,7 +89,6 @@ enum { IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, - IPOIB_MCAST_STARTED = 8, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_CSUM = 11, @@ -96,7 +100,11 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + IPOIB_MAX_LRO_DESCRIPTORS = 8, + IPOIB_LRO_MAX_AGGR = 64, + MAX_SEND_CQE = 16, + IPOIB_CM_COPYBREAK = 256, }; #define IPOIB_OP_RECV (1ul << 31) @@ -149,6 +157,11 @@ struct ipoib_tx_buf { u64 mapping[MAX_SKB_FRAGS + 1]; }; +struct ipoib_cm_tx_buf { + struct sk_buff *skb; + u64 mapping; +}; + struct ib_cm_id; struct ipoib_cm_data { @@ -207,7 +220,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_tx_buf *tx_ring; + struct ipoib_cm_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; @@ -249,6 +262,11 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_lro { + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; +}; + /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -264,7 +282,6 @@ struct ipoib_dev_priv { unsigned long flags; - struct mutex mcast_mutex; struct mutex vlan_mutex; struct rb_root path_tree; @@ -276,10 +293,11 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; - struct work_struct flush_task; + struct work_struct flush_light; + struct work_struct flush_normal; + struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - struct work_struct pkey_event_task; struct ib_device *ca; u8 port; @@ -335,6 +353,8 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + + struct ipoib_lro lro; }; struct ipoib_ah { @@ -359,6 +379,7 @@ struct ipoib_path { struct rb_node rb_node; struct list_head list; + int valid; }; struct ipoib_neigh { @@ -423,11 +444,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_ib_dev_flush(struct work_struct *work); +void ipoib_ib_dev_flush_light(struct work_struct *work); +void ipoib_ib_dev_flush_normal(struct work_struct *work); +void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); @@ -466,9 +490,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif int ipoib_mcast_attach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); + union ib_gid *mgid, int set_qkey); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 97e67d36378..0f2d3045061 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <rdma/ib_cm.h> @@ -113,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) } static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, - struct ipoib_cm_rx *rx, int id) + struct ipoib_cm_rx *rx, + struct ib_recv_wr *wr, + struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < IPOIB_CM_RX_SG; ++i) - priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; + sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -322,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, return 0; } +static void ipoib_cm_init_rx_wr(struct net_device *dev, + struct ib_recv_wr *wr, + struct ib_sge *sge) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->cm.num_frags; ++i) + sge[i].lkey = priv->mr->lkey; + + sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < priv->cm.num_frags; ++i) + sge[i].length = PAGE_SIZE; + + wr->next = NULL; + wr->sg_list = priv->cm.rx_sge; + wr->num_sge = priv->cm.num_frags; +} + static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); + struct { + struct ib_recv_wr wr; + struct ib_sge sge[IPOIB_CM_RX_SG]; + } *t; int ret; int i; @@ -333,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; + t = kmalloc(sizeof *t, GFP_KERNEL); + if (!t) { + ret = -ENOMEM; + goto err_free; + } + + ipoib_cm_init_rx_wr(dev, &t->wr, t->sge); + spin_lock_irq(&priv->lock); if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { @@ -351,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; - } - ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); + } + ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " "failed for buf %d\n", i); @@ -363,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->recv_count = ipoib_recvq_size; + kfree(t); + return 0; err_count: @@ -371,6 +404,7 @@ err_count: spin_unlock_irq(&priv->lock); err_free: + kfree(t); ipoib_cm_free_rx_ring(dev, rx->rx_ring); return ret; @@ -525,6 +559,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) u64 mapping[IPOIB_CM_RX_SG]; int frags; int has_srq; + struct sk_buff *small_skb; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -579,6 +614,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } + if (wc->byte_len < IPOIB_CM_COPYBREAK) { + int dlen = wc->byte_len; + + small_skb = dev_alloc_skb(dlen + 12); + if (small_skb) { + skb_reserve(small_skb, 12); + ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_copy_from_linear_data(skb, small_skb->data, dlen); + ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_put(small_skb, dlen); + skb = small_skb; + goto copied; + } + } + frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; @@ -601,6 +653,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); +copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); @@ -620,7 +673,10 @@ repost: ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " "for buf %d\n", wr_id); } else { - if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { + if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, + &priv->cm.rx_wr, + priv->cm.rx_sge, + wr_id))) { --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); @@ -647,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; u64 addr; if (unlikely(skb->len > tx->mtu)) { @@ -678,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } - tx_req->mapping[0] = addr; + tx_req->mapping = addr; if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), addr, skb->len))) { @@ -703,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -717,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1087,7 +1143,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; unsigned long begin; @@ -1115,7 +1171,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; @@ -1384,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + rtnl_lock(); dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); + rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); @@ -1393,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); - dev->mtu = min(priv->mcast_mtu, dev->mtu); - ipoib_flush_paths(dev); + rtnl_lock(); if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (priv->hca_caps & IB_DEVICE_UD_TSO) dev->features |= NETIF_F_TSO; } + dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + rtnl_unlock(); + ipoib_flush_paths(dev); return count; } @@ -1485,15 +1545,7 @@ int ipoib_cm_dev_init(struct net_device *dev) priv->cm.num_frags = IPOIB_CM_RX_SG; } - for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].lkey = priv->mr->lkey; - - priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].length = PAGE_SIZE; - priv->cm.rx_wr.next = NULL; - priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = priv->cm.num_frags; + ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge); if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 10279b79c44..66af5c1a76e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev, return 0; } +static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { + "LRO aggregated", "LRO flushed", + "LRO avg aggr", "LRO no desc" +}; + +static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_STATS: + memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); + break; + } +} + +static int ipoib_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ipoib_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ipoib_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int index = 0; + + /* Get LRO statistics */ + data[index++] = priv->lro.lro_mgr.stats.aggregated; + data[index++] = priv->lro.lro_mgr.stats.flushed; + if (priv->lro.lro_mgr.stats.flushed) + data[index++] = priv->lro.lro_mgr.stats.aggregated / + priv->lro.lro_mgr.stats.flushed; + else + data[index++] = 0; + data[index++] = priv->lro.lro_mgr.stats.no_desc; +} + static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_tso = ethtool_op_get_tso, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = ipoib_get_strings, + .get_sset_count = ipoib_get_sset_count, + .get_ethtool_stats = ipoib_get_ethtool_stats, }; void ipoib_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 8b882bbd1d0..961c585da21 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $ */ #include <linux/err.h> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f429bce24c2..66cafa20c24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $ */ #include <linux/delay.h> @@ -290,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) skb->ip_summed = CHECKSUM_UNNECESSARY; - netif_receive_skb(skb); + if (dev->features & NETIF_F_LRO) + lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); + else + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -442,6 +443,9 @@ poll_more: } if (done < budget) { + if (dev->features & NETIF_F_LRO) + lro_flush_all(&priv->lro.lro_mgr); + netif_rx_complete(dev, napi); if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | @@ -898,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; } -static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; @@ -911,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, pkey_event); + __ipoib_ib_dev_flush(cpriv, level); mutex_unlock(&priv->vlan_mutex); @@ -925,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) return; } - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ipoib_ib_dev_down(dev, 0); @@ -943,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) priv->pkey_index = new_index; } - ipoib_dbg(priv, "flushing\n"); + if (level == IPOIB_FLUSH_LIGHT) { + ipoib_mark_paths_invalid(dev); + ipoib_mcast_dev_flush(dev); + } - ipoib_ib_dev_down(dev, 0); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_down(dev, 0); - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_open(dev); } @@ -957,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * we get here, don't bring it back up if it's not configured up */ if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { - ipoib_ib_dev_up(dev); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_up(dev); ipoib_mcast_restart_task(&priv->restart_task); } } -void ipoib_ib_dev_flush(struct work_struct *work) +void ipoib_ib_dev_flush_light(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, flush_light); + + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); +} + +void ipoib_ib_dev_flush_normal(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, flush_task); + container_of(work, struct ipoib_dev_priv, flush_normal); - ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); } -void ipoib_pkey_event(struct work_struct *work) +void ipoib_ib_dev_flush_heavy(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_event_task); + container_of(work, struct ipoib_dev_priv, flush_heavy); - ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 1); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); } void ipoib_ib_dev_cleanup(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2442090ac8d..8be9ea0436e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ */ #include "ipoib.h" @@ -62,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +static int lro; +module_param(lro, bool, 0444); +MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); + +static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; +module_param(lro_max_aggr, int, 0644); +MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " + "(default = 64)"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -350,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ +void ipoib_mark_paths_invalid(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path, *tp; + + spin_lock_irq(&priv->lock); + + list_for_each_entry_safe(path, tp, &priv->path_list, list) { + ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n", + be16_to_cpu(path->pathrec.dlid), + IPOIB_GID_ARG(path->pathrec.dgid)); + path->valid = 0; + } + + spin_unlock_irq(&priv->lock); +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -386,6 +410,7 @@ static void path_rec_completion(int status, struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; + struct ipoib_ah *old_ah; struct ipoib_neigh *neigh, *tn; struct sk_buff_head skqueue; struct sk_buff *skb; @@ -409,6 +434,7 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); + old_ah = path->ah; path->ah = ah; if (ah) { @@ -421,6 +447,17 @@ static void path_rec_completion(int status, __skb_queue_tail(&skqueue, skb); list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { + if (neigh->ah) { + WARN_ON(neigh->ah != old_ah); + /* + * Dropping the ah reference inside + * priv->lock is safe here, because we + * will hold one more reference from + * the original value of path->ah (ie + * old_ah). + */ + ipoib_put_ah(neigh->ah); + } kref_get(&path->ah->ref); neigh->ah = path->ah; memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, @@ -443,6 +480,7 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } + path->valid = 1; } path->query = NULL; @@ -450,6 +488,9 @@ static void path_rec_completion(int status, spin_unlock_irqrestore(&priv->lock, flags); + if (old_ah) + ipoib_put_ah(old_ah); + while ((skb = __skb_dequeue(&skqueue))) { skb->dev = dev; if (dev_queue_xmit(skb)) @@ -623,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock(&priv->lock); path = __path_find(dev, phdr->hwaddr + 4); - if (!path) { - path = path_rec_create(dev, phdr->hwaddr + 4); + if (!path || !path->valid) { + if (!path) + path = path_rec_create(dev, phdr->hwaddr + 4); if (path) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof *phdr); @@ -938,6 +980,54 @@ static const struct header_ops ipoib_header_ops = { .create = ipoib_hard_header, }; +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + + if (unlikely(skb->protocol != htons(ETH_P_IP))) + return -1; + + /* + * In the future we may add an else clause that verifies the + * checksum and allows devices which do not calculate checksum + * to use LRO. + */ + if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) + return -1; + + /* Check for non-TCP packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if IP header and TCP header are complete */ + if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ipoib_lro_setup(struct ipoib_dev_priv *priv) +{ + priv->lro.lro_mgr.max_aggr = lro_max_aggr; + priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; + priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; + priv->lro.lro_mgr.get_skb_header = get_skb_hdr; + priv->lro.lro_mgr.features = LRO_F_NAPI; + priv->lro.lro_mgr.dev = priv->dev; + priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; +} + static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -977,10 +1067,11 @@ static void ipoib_setup(struct net_device *dev) priv->dev = dev; + ipoib_lro_setup(priv); + spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - mutex_init(&priv->mcast_mutex); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); @@ -989,9 +1080,10 @@ static void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); - INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); - INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); + INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); + INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); + INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); } @@ -1154,6 +1246,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; } + if (lro) + priv->dev->features |= NETIF_F_LRO; + /* * Set the full membership bit, so that we join the right * broadcast group, etc. @@ -1304,6 +1399,12 @@ static int __init ipoib_init_module(void) ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif + /* + * When copying small received packets, we only copy from the + * linear data part of the SKB, so we rely on this condition. + */ + BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); + ret = ipoib_register_debugfs(); if (ret) return ret; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index d00a2c174ae..8950e9546f4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ */ #include <linux/skbuff.h> @@ -188,14 +186,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah; int ret; + int set_qkey = 0; mcast->mcmember = *mcmember; /* Set the cached Q_Key before we attach if it's the broadcast group */ if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, sizeof (union ib_gid))) { + spin_lock_irq(&priv->lock); + if (!priv->broadcast) { + spin_unlock_irq(&priv->lock); + return -EAGAIN; + } priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); + spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + set_qkey = 1; } if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -208,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + &mcast->mcmember.mgid, set_qkey); if (ret < 0) { ipoib_warn(priv, "couldn't attach QP to multicast group " IPOIB_GID_FMT "\n", @@ -569,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - if (!ipoib_cm_admin_enabled(dev)) - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + if (!ipoib_cm_admin_enabled(dev)) { + rtnl_lock(); + dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); + rtnl_unlock(); + } ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); @@ -588,10 +597,6 @@ int ipoib_mcast_start_thread(struct net_device *dev) queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); - spin_lock_irq(&priv->lock); - set_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - return 0; } @@ -601,10 +606,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) ipoib_dbg_mcast(priv, "stopping multicast thread\n"); - spin_lock_irq(&priv->lock); - clear_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - mutex_lock(&mcast_mutex); clear_bit(IPOIB_MCAST_RUN, &priv->flags); cancel_delayed_work(&priv->mcast_task); @@ -629,10 +630,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) IPOIB_GID_ARG(mcast->mcmember.mgid)); /* Remove ourselves from the multicast group */ - ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, + be16_to_cpu(mcast->mcmember.mlid)); if (ret) - ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); } return 0; @@ -768,7 +769,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_mcast_stop_thread(dev, 0); local_irq_save(flags); - netif_tx_lock(dev); + netif_addr_lock(dev); spin_lock(&priv->lock); /* @@ -845,7 +846,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) } spin_unlock(&priv->lock); - netif_tx_unlock(dev); + netif_addr_unlock(dev); local_irq_restore(flags); /* We have to cancel outside of the spinlock */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8766d29ce3b..68325119f74 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -29,24 +29,17 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ #include "ipoib.h" -int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) +int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_qp_attr *qp_attr; + struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; - ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); - if (!qp_attr) - goto out; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; @@ -54,18 +47,23 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - /* set correct QKey for QP */ - qp_attr->qkey = priv->qkey; - ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); - if (ret) { - ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); - goto out; + if (set_qkey) { + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto out; + + /* set correct QKey for QP */ + qp_attr->qkey = priv->qkey; + ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); + if (ret) { + ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); + goto out; + } } /* attach QP to multicast group */ - mutex_lock(&priv->mcast_mutex); ret = ib_attach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); @@ -74,20 +72,6 @@ out: return ret; } -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - int ret; - - mutex_lock(&priv->mcast_mutex); - ret = ib_detach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); - if (ret) - ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); - - return ret; -} - int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -201,7 +185,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) - init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; + init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + + if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) + init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (dev->features & NETIF_F_SG) init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; @@ -289,15 +276,17 @@ void ipoib_event(struct ib_event_handler *handler, if (record->element.port_num != priv->port) return; - if (record->event == IB_EVENT_PORT_ERR || - record->event == IB_EVENT_PORT_ACTIVE || - record->event == IB_EVENT_LID_CHANGE || - record->event == IB_EVENT_SM_CHANGE || + ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, + record->device->name, record->element.port_num); + + if (record->event == IB_EVENT_SM_CHANGE || record->event == IB_EVENT_CLIENT_REREGISTER) { - ipoib_dbg(priv, "Port state change event\n"); - queue_work(ipoib_workqueue, &priv->flush_task); + queue_work(ipoib_workqueue, &priv->flush_light); + } else if (record->event == IB_EVENT_PORT_ERR || + record->event == IB_EVENT_PORT_ACTIVE || + record->event == IB_EVENT_LID_CHANGE) { + queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { - ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); - queue_work(ipoib_workqueue, &priv->pkey_event_task); + queue_work(ipoib_workqueue, &priv->flush_heavy); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 1cdb5cfb0ff..b08eb56196d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/module.h> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index aeb58cae9a3..5a1cf2580e1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -42,9 +42,6 @@ * Zhenyu Wang * Modified by: * Erez Zilber - * - * - * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $ */ #include <linux/types.h> @@ -74,6 +71,10 @@ #include "iscsi_iser.h" +static struct scsi_host_template iscsi_iser_sht; +static struct iscsi_transport iscsi_iser_transport; +static struct scsi_transport_template *iscsi_iser_scsi_transport; + static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); @@ -94,7 +95,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) { int rc = 0; - uint32_t ret_itt; int datalen; int ahslen; @@ -110,12 +110,7 @@ iscsi_iser_recv(struct iscsi_conn *conn, /* read AHS */ ahslen = hdr->hlength * 4; - /* verify itt (itt encoding: age+cid+itt) */ - rc = iscsi_verify_itt(conn, hdr, &ret_itt); - - if (!rc) - rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); - + rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) goto error; @@ -126,25 +121,33 @@ error: /** - * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands + * iscsi_iser_task_init - Initialize task + * @task: iscsi task * - **/ + * Initialize the task for the scsi command or mgmt command. + */ static int -iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) +iscsi_iser_task_init(struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_conn *iser_conn = task->conn->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; - iser_ctask->command_sent = 0; - iser_ctask->iser_conn = iser_conn; - iser_ctask_rdma_init(iser_ctask); + /* mgmt task */ + if (!task->sc) { + iser_task->desc.data = task->data; + return 0; + } + + iser_task->command_sent = 0; + iser_task->iser_conn = iser_conn; + iser_task_rdma_init(iser_task); return 0; } /** - * iscsi_mtask_xmit - xmit management(immediate) task + * iscsi_iser_mtask_xmit - xmit management(immediate) task * @conn: iscsi connection - * @mtask: task management task + * @task: task management task * * Notes: * The function can return -EAGAIN in which case caller must @@ -153,20 +156,19 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) * **/ static int -iscsi_iser_mtask_xmit(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask) +iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; - debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt); + debug_scsi("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); - error = iser_send_control(conn, mtask); + error = iser_send_control(conn, task); - /* since iser xmits control with zero copy, mtasks can not be recycled + /* since iser xmits control with zero copy, tasks can not be recycled * right after sending them. * The recycling scheme is based on whether a response is expected - * - if yes, the mtask is recycled at iscsi_complete_pdu - * - if no, the mtask is recycled at iser_snd_completion + * - if yes, the task is recycled at iscsi_complete_pdu + * - if no, the task is recycled at iser_snd_completion */ if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); @@ -175,97 +177,86 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, } static int -iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn, + struct iscsi_task *task) { struct iscsi_data hdr; int error = 0; /* Send data-out PDUs while there's still unsolicited data to send */ - while (ctask->unsol_count > 0) { - iscsi_prep_unsolicit_data_pdu(ctask, &hdr); + while (task->unsol_count > 0) { + iscsi_prep_unsolicit_data_pdu(task, &hdr); debug_scsi("Sending data-out: itt 0x%x, data count %d\n", - hdr.itt, ctask->data_count); + hdr.itt, task->data_count); /* the buffer description has been passed with the command */ /* Send the command */ - error = iser_send_data_out(conn, ctask, &hdr); + error = iser_send_data_out(conn, task, &hdr); if (error) { - ctask->unsol_datasn--; - goto iscsi_iser_ctask_xmit_unsol_data_exit; + task->unsol_datasn--; + goto iscsi_iser_task_xmit_unsol_data_exit; } - ctask->unsol_count -= ctask->data_count; + task->unsol_count -= task->data_count; debug_scsi("Need to send %d more as data-out PDUs\n", - ctask->unsol_count); + task->unsol_count); } -iscsi_iser_ctask_xmit_unsol_data_exit: +iscsi_iser_task_xmit_unsol_data_exit: return error; } static int -iscsi_iser_ctask_xmit(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +iscsi_iser_task_xmit(struct iscsi_task *task) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_conn *conn = task->conn; + struct iscsi_iser_task *iser_task = task->dd_data; int error = 0; - if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) { - BUG_ON(scsi_bufflen(ctask->sc) == 0); + if (!task->sc) + return iscsi_iser_mtask_xmit(conn, task); + + if (task->sc->sc_data_direction == DMA_TO_DEVICE) { + BUG_ON(scsi_bufflen(task->sc) == 0); debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", - ctask->itt, scsi_bufflen(ctask->sc), - ctask->imm_count, ctask->unsol_count); + task->itt, scsi_bufflen(task->sc), + task->imm_count, task->unsol_count); } - debug_scsi("ctask deq [cid %d itt 0x%x]\n", - conn->id, ctask->itt); + debug_scsi("task deq [cid %d itt 0x%x]\n", + conn->id, task->itt); /* Send the cmd PDU */ - if (!iser_ctask->command_sent) { - error = iser_send_command(conn, ctask); + if (!iser_task->command_sent) { + error = iser_send_command(conn, task); if (error) - goto iscsi_iser_ctask_xmit_exit; - iser_ctask->command_sent = 1; + goto iscsi_iser_task_xmit_exit; + iser_task->command_sent = 1; } /* Send unsolicited data-out PDU(s) if necessary */ - if (ctask->unsol_count) - error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); + if (task->unsol_count) + error = iscsi_iser_task_xmit_unsol_data(conn, task); - iscsi_iser_ctask_xmit_exit: + iscsi_iser_task_xmit_exit: if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } static void -iscsi_iser_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +iscsi_iser_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; - - if (iser_ctask->status == ISER_TASK_STATUS_STARTED) { - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); - } -} + struct iscsi_iser_task *iser_task = task->dd_data; -static struct iser_conn * -iscsi_iser_ib_conn_lookup(__u64 ep_handle) -{ - struct iser_conn *ib_conn; - struct iser_conn *uib_conn = (struct iser_conn *)(unsigned long)ep_handle; + /* mgmt tasks do not need special cleanup */ + if (!task->sc) + return; - mutex_lock(&ig.connlist_mutex); - list_for_each_entry(ib_conn, &ig.connlist, conn_list) { - if (ib_conn == uib_conn) { - mutex_unlock(&ig.connlist_mutex); - return ib_conn; - } + if (iser_task->status == ISER_TASK_STATUS_STARTED) { + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); } - mutex_unlock(&ig.connlist_mutex); - iser_err("no conn exists for eph %llx\n",(unsigned long long)ep_handle); - return NULL; } static struct iscsi_cls_conn * @@ -275,7 +266,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) struct iscsi_cls_conn *cls_conn; struct iscsi_iser_conn *iser_conn; - cls_conn = iscsi_conn_setup(cls_session, conn_idx); + cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); if (!cls_conn) return NULL; conn = cls_conn->dd_data; @@ -286,21 +277,11 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) */ conn->max_recv_dlength = 128; - iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); - if (!iser_conn) - goto conn_alloc_fail; - - /* currently this is the only field which need to be initiated */ - rwlock_init(&iser_conn->lock); - + iser_conn = conn->dd_data; conn->dd_data = iser_conn; iser_conn->iscsi_conn = conn; return cls_conn; - -conn_alloc_fail: - iscsi_conn_teardown(cls_conn); - return NULL; } static void @@ -308,11 +289,18 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; iscsi_conn_teardown(cls_conn); - if (iser_conn->ib_conn) - iser_conn->ib_conn->iser_conn = NULL; - kfree(iser_conn); + /* + * Userspace will normally call the stop callback and + * already have freed the ib_conn, but if it goofed up then + * we free it here. + */ + if (ib_conn) { + ib_conn->iser_conn = NULL; + iser_conn_put(ib_conn); + } } static int @@ -323,6 +311,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn; struct iser_conn *ib_conn; + struct iscsi_endpoint *ep; int error; error = iscsi_conn_bind(cls_session, cls_conn, is_leading); @@ -331,12 +320,14 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, /* the transport ep handle comes from user space so it must be * verified against the global ib connections list */ - ib_conn = iscsi_iser_ib_conn_lookup(transport_eph); - if (!ib_conn) { + ep = iscsi_lookup_endpoint(transport_eph); + if (!ep) { iser_err("can't bind eph %llx\n", (unsigned long long)transport_eph); return -EINVAL; } + ib_conn = ep->dd_data; + /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ @@ -344,10 +335,30 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, iser_conn = conn->dd_data; ib_conn->iser_conn = iser_conn; iser_conn->ib_conn = ib_conn; + iser_conn_get(ib_conn); + return 0; +} - conn->recv_lock = &iser_conn->lock; +static void +iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; - return 0; + /* + * Userspace may have goofed up and not bound the connection or + * might have only partially setup the connection. + */ + if (ib_conn) { + iscsi_conn_stop(cls_conn, flag); + /* + * There is no unbind event so the stop callback + * must release the ref from the bind. + */ + iser_conn_put(ib_conn); + } + iser_conn->ib_conn = NULL; } static int @@ -363,55 +374,75 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) return iscsi_conn_start(cls_conn); } -static struct iscsi_transport iscsi_iser_transport; +static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) +{ + struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + + iscsi_host_remove(shost); + iscsi_host_free(shost); +} static struct iscsi_cls_session * -iscsi_iser_session_create(struct iscsi_transport *iscsit, - struct scsi_transport_template *scsit, - uint16_t cmds_max, uint16_t qdepth, - uint32_t initial_cmdsn, uint32_t *hostno) +iscsi_iser_session_create(struct iscsi_endpoint *ep, + uint16_t cmds_max, uint16_t qdepth, + uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; + struct Scsi_Host *shost; int i; - uint32_t hn; - struct iscsi_cmd_task *ctask; - struct iscsi_mgmt_task *mtask; - struct iscsi_iser_cmd_task *iser_ctask; - struct iser_desc *desc; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; + struct iser_conn *ib_conn; + + shost = iscsi_host_alloc(&iscsi_iser_sht, 0, ISCSI_MAX_CMD_PER_LUN); + if (!shost) + return NULL; + shost->transportt = iscsi_iser_scsi_transport; + shost->max_lun = iscsi_max_lun; + shost->max_id = 0; + shost->max_channel = 0; + shost->max_cmd_len = 16; + + /* + * older userspace tools (before 2.0-870) did not pass us + * the leading conn's ep so this will be NULL; + */ + if (ep) + ib_conn = ep->dd_data; + + if (iscsi_host_add(shost, + ep ? ib_conn->device->ib_device->dma_device : NULL)) + goto free_host; + *hostno = shost->host_no; /* * we do not support setting can_queue cmd_per_lun from userspace yet * because we preallocate so many resources */ - cls_session = iscsi_session_setup(iscsit, scsit, + cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, ISCSI_DEF_XMIT_CMDS_MAX, - ISCSI_MAX_CMD_PER_LUN, - sizeof(struct iscsi_iser_cmd_task), - sizeof(struct iser_desc), - initial_cmdsn, &hn); + sizeof(struct iscsi_iser_task), + initial_cmdsn, 0); if (!cls_session) - return NULL; - - *hostno = hn; - session = class_to_transport_session(cls_session); + goto remove_host; + session = cls_session->dd_data; + shost->can_queue = session->scsi_cmds_max; /* libiscsi setup itts, data and pool so just set desc fields */ for (i = 0; i < session->cmds_max; i++) { - ctask = session->cmds[i]; - iser_ctask = ctask->dd_data; - ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; - ctask->hdr_max = sizeof(iser_ctask->desc.iscsi_header); - } - - for (i = 0; i < session->mgmtpool_max; i++) { - mtask = session->mgmt_cmds[i]; - desc = mtask->dd_data; - mtask->hdr = &desc->iscsi_header; - desc->data = mtask->data; + task = session->cmds[i]; + iser_task = task->dd_data; + task->hdr = (struct iscsi_cmd *)&iser_task->desc.iscsi_header; + task->hdr_max = sizeof(iser_task->desc.iscsi_header); } - return cls_session; + +remove_host: + iscsi_host_remove(shost); +free_host: + iscsi_host_free(shost); + return NULL; } static int @@ -484,34 +515,37 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->custom[3].value = conn->fmr_unalign_cnt; } -static int -iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking, - __u64 *ep_handle) +static struct iscsi_endpoint * +iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking) { int err; struct iser_conn *ib_conn; + struct iscsi_endpoint *ep; - err = iser_conn_init(&ib_conn); - if (err) - goto out; + ep = iscsi_create_endpoint(sizeof(*ib_conn)); + if (!ep) + return ERR_PTR(-ENOMEM); - err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking); - if (!err) - *ep_handle = (__u64)(unsigned long)ib_conn; + ib_conn = ep->dd_data; + ib_conn->ep = ep; + iser_conn_init(ib_conn); -out: - return err; + err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, + non_blocking); + if (err) { + iscsi_destroy_endpoint(ep); + return ERR_PTR(err); + } + return ep; } static int -iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) +iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { - struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); + struct iser_conn *ib_conn; int rc; - if (!ib_conn) - return -EINVAL; - + ib_conn = ep->dd_data; rc = wait_event_interruptible_timeout(ib_conn->wait, ib_conn->state == ISER_CONN_UP, msecs_to_jiffies(timeout_ms)); @@ -533,13 +567,21 @@ iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) } static void -iscsi_iser_ep_disconnect(__u64 ep_handle) +iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { struct iser_conn *ib_conn; - ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); - if (!ib_conn) - return; + ib_conn = ep->dd_data; + if (ib_conn->iser_conn) + /* + * Must suspend xmit path if the ep is bound to the + * iscsi_conn, so we know we are not accessing the ib_conn + * when we free it. + * + * This may not be bound if the ep poll failed. + */ + iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); + iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); iser_conn_terminate(ib_conn); @@ -550,7 +592,6 @@ static struct scsi_host_template iscsi_iser_sht = { .name = "iSCSI Initiator over iSER, v." DRV_VER, .queuecommand = iscsi_queuecommand, .change_queue_depth = iscsi_change_queue_depth, - .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, @@ -584,17 +625,14 @@ static struct iscsi_transport iscsi_iser_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_PING_TMO | ISCSI_RECV_TMO, + ISCSI_PING_TMO | ISCSI_RECV_TMO | + ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME | ISCSI_HOST_INITIATOR_NAME, - .host_template = &iscsi_iser_sht, - .conndata_size = sizeof(struct iscsi_conn), - .max_lun = ISCSI_ISER_MAX_LUN, - .max_cmd_len = ISCSI_ISER_MAX_CMD_LEN, /* session management */ .create_session = iscsi_iser_session_create, - .destroy_session = iscsi_session_teardown, + .destroy_session = iscsi_iser_session_destroy, /* connection management */ .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, @@ -603,17 +641,16 @@ static struct iscsi_transport iscsi_iser_transport = { .get_conn_param = iscsi_conn_get_param, .get_session_param = iscsi_session_get_param, .start_conn = iscsi_iser_conn_start, - .stop_conn = iscsi_conn_stop, + .stop_conn = iscsi_iser_conn_stop, /* iscsi host params */ .get_host_param = iscsi_host_get_param, .set_host_param = iscsi_host_set_param, /* IO */ .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_iser_conn_get_stats, - .init_cmd_task = iscsi_iser_cmd_init, - .xmit_cmd_task = iscsi_iser_ctask_xmit, - .xmit_mgmt_task = iscsi_iser_mtask_xmit, - .cleanup_cmd_task = iscsi_iser_cleanup_ctask, + .init_task = iscsi_iser_task_init, + .xmit_task = iscsi_iser_task_xmit, + .cleanup_task = iscsi_iser_cleanup_task, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, @@ -633,8 +670,6 @@ static int __init iser_init(void) return -EINVAL; } - iscsi_iser_transport.max_lun = iscsi_max_lun; - memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", @@ -650,7 +685,9 @@ static int __init iser_init(void) mutex_init(&ig.connlist_mutex); INIT_LIST_HEAD(&ig.connlist); - if (!iscsi_register_transport(&iscsi_iser_transport)) { + iscsi_iser_scsi_transport = iscsi_register_transport( + &iscsi_iser_transport); + if (!iscsi_iser_scsi_transport) { iser_err("iscsi_register_transport failed\n"); err = -EINVAL; goto register_transport_failure; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a8c1b300e34..81a82628a5f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -36,8 +36,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ */ #ifndef __ISCSI_ISER_H__ #define __ISCSI_ISER_H__ @@ -96,7 +94,6 @@ /* support upto 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISCSI_ISER_MAX_LUN 256 -#define ISCSI_ISER_MAX_CMD_LEN 16 /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ @@ -174,7 +171,8 @@ struct iser_data_buf { /* fwd declarations */ struct iser_device; struct iscsi_iser_conn; -struct iscsi_iser_cmd_task; +struct iscsi_iser_task; +struct iscsi_endpoint; struct iser_mem_reg { u32 lkey; @@ -198,7 +196,7 @@ struct iser_regd_buf { #define MAX_REGD_BUF_VECTOR_LEN 2 struct iser_dto { - struct iscsi_iser_cmd_task *ctask; + struct iscsi_iser_task *task; struct iser_conn *ib_conn; int notify_enable; @@ -242,7 +240,9 @@ struct iser_device { struct iser_conn { struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ + struct iscsi_endpoint *ep; enum iser_ib_conn_state state; /* rdma connection state */ + atomic_t refcount; spinlock_t lock; /* used for state changes */ struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ @@ -261,11 +261,9 @@ struct iser_conn { struct iscsi_iser_conn { struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ struct iser_conn *ib_conn; /* iSER IB conn */ - - rwlock_t lock; }; -struct iscsi_iser_cmd_task { +struct iscsi_iser_task { struct iser_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_status status; @@ -298,22 +296,26 @@ extern int iser_debug_level; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); -int iser_send_control(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask); +int iser_send_control(struct iscsi_conn *conn, + struct iscsi_task *task); -int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask); +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_task *task); -int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask, - struct iscsi_data *hdr); +int iser_send_data_out(struct iscsi_conn *conn, + struct iscsi_task *task, + struct iscsi_data *hdr); void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *rx_data, int rx_data_len); -int iser_conn_init(struct iser_conn **ib_conn); +void iser_conn_init(struct iser_conn *ib_conn); + +void iser_conn_get(struct iser_conn *ib_conn); + +void iser_conn_put(struct iser_conn *ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); @@ -322,9 +324,9 @@ void iser_rcv_completion(struct iser_desc *desc, void iser_snd_completion(struct iser_desc *desc); -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); +void iser_task_rdma_init(struct iscsi_iser_task *task); -void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); +void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_dto_buffs_release(struct iser_dto *dto); @@ -334,10 +336,10 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask, +int iser_reg_rdma_mem(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, @@ -357,10 +359,10 @@ int iser_post_send(struct iser_desc *tx_desc); int iser_conn_state_comp(struct iser_conn *ib_conn, enum iser_ib_conn_state comp); -int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, +int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 08dc81c46f4..cdd28318904 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include <linux/kernel.h> #include <linux/slab.h> @@ -66,46 +64,46 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto, /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in - * iser_ctask->data[ISER_DIR_IN].data_len + * iser_task->data[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, +static int iser_prepare_read_cmd(struct iscsi_task *task, unsigned int edtl) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_hdr *hdr = &iser_ctask->desc.iser_header; - struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN]; + struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; - err = iser_dma_map_task_data(iser_ctask, + err = iser_dma_map_task_data(iser_task, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; - if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) { + if (edtl > iser_task->data[ISER_DIR_IN].data_len) { iser_err("Total data length: %ld, less than EDTL: " "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_ctask->data[ISER_DIR_IN].data_len, edtl, - ctask->itt, iser_ctask->iser_conn); + iser_task->data[ISER_DIR_IN].data_len, edtl, + task->itt, iser_task->iser_conn); return -EINVAL; } - err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN); + err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; } - regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN]; + regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_va = cpu_to_be64(regd_buf->reg.va); iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", - ctask->itt, regd_buf->reg.rkey, + task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va); return 0; @@ -113,43 +111,43 @@ static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in - * ctask->data[ISER_DIR_OUT].data_len + * task->data[ISER_DIR_OUT].data_len */ static int -iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, +iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, unsigned int unsol_sz, unsigned int edtl) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_dto *send_dto = &iser_ctask->desc.dto; - struct iser_hdr *hdr = &iser_ctask->desc.iser_header; - struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT]; + struct iser_dto *send_dto = &iser_task->desc.dto; + struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; - err = iser_dma_map_task_data(iser_ctask, + err = iser_dma_map_task_data(iser_task, buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; - if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) { + if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Total data length: %ld, less than EDTL: %d, " "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_ctask->data[ISER_DIR_OUT].data_len, - edtl, ctask->itt, ctask->conn); + iser_task->data[ISER_DIR_OUT].data_len, + edtl, task->itt, task->conn); return -EINVAL; } - err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT); + err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; } - regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; if (unsol_sz < edtl) { hdr->flags |= ISER_WSV; @@ -158,13 +156,13 @@ iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " "VA:%#llX + unsol:%d\n", - ctask->itt, regd_buf->reg.rkey, + task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va, unsol_sz); } if (imm_sz > 0) { iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", - ctask->itt, imm_sz); + task->itt, imm_sz); iser_dto_add_regd_buff(send_dto, regd_buf, 0, @@ -316,38 +314,38 @@ iser_check_xmit(struct iscsi_conn *conn, void *task) /** * iser_send_command - send command PDU */ -int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_task *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_dto *send_dto = NULL; unsigned long edtl; int err = 0; struct iser_data_buf *data_buf; - struct iscsi_cmd *hdr = ctask->hdr; - struct scsi_cmnd *sc = ctask->sc; + struct iscsi_cmd *hdr = task->hdr; + struct scsi_cmnd *sc = task->sc; if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; edtl = ntohl(hdr->data_length); /* build the tx desc regd header and add it to the tx desc dto */ - iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND; - send_dto = &iser_ctask->desc.dto; - send_dto->ctask = iser_ctask; - iser_create_send_desc(iser_conn, &iser_ctask->desc); + iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; + send_dto = &iser_task->desc.dto; + send_dto->task = iser_task; + iser_create_send_desc(iser_conn, &iser_task->desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) - data_buf = &iser_ctask->data[ISER_DIR_IN]; + data_buf = &iser_task->data[ISER_DIR_IN]; else - data_buf = &iser_ctask->data[ISER_DIR_OUT]; + data_buf = &iser_task->data[ISER_DIR_OUT]; if (scsi_sg_count(sc)) { /* using a scatter list */ data_buf->buf = scsi_sglist(sc); @@ -357,15 +355,15 @@ int iser_send_command(struct iscsi_conn *conn, data_buf->data_len = scsi_bufflen(sc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(ctask, edtl); + err = iser_prepare_read_cmd(task, edtl); if (err) goto send_command_error; } if (hdr->flags & ISCSI_FLAG_CMD_WRITE) { - err = iser_prepare_write_cmd(ctask, - ctask->imm_count, - ctask->imm_count + - ctask->unsol_count, + err = iser_prepare_write_cmd(task, + task->imm_count, + task->imm_count + + task->unsol_count, edtl); if (err) goto send_command_error; @@ -380,27 +378,27 @@ int iser_send_command(struct iscsi_conn *conn, goto send_command_error; } - iser_ctask->status = ISER_TASK_STATUS_STARTED; + iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(&iser_ctask->desc); + err = iser_post_send(&iser_task->desc); if (!err) return 0; send_command_error: iser_dto_buffs_release(send_dto); - iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err); + iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); return err; } /** * iser_send_data_out - send data out PDU */ -int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask, +int iser_send_data_out(struct iscsi_conn *conn, + struct iscsi_task *task, struct iscsi_data *hdr) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_desc *tx_desc = NULL; struct iser_dto *send_dto = NULL; unsigned long buf_offset; @@ -413,7 +411,7 @@ int iser_send_data_out(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; itt = (__force uint32_t)hdr->itt; @@ -434,7 +432,7 @@ int iser_send_data_out(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ send_dto = &tx_desc->dto; - send_dto->ctask = iser_ctask; + send_dto->task = iser_task; iser_create_send_desc(iser_conn, tx_desc); iser_reg_single(iser_conn->ib_conn->device, @@ -442,15 +440,15 @@ int iser_send_data_out(struct iscsi_conn *conn, /* all data was registered for RDMA, we can use the lkey */ iser_dto_add_regd_buff(send_dto, - &iser_ctask->rdma_regd[ISER_DIR_OUT], + &iser_task->rdma_regd[ISER_DIR_OUT], buf_offset, data_seg_len); - if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) { + if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " "inconsistent with total len:%ld, itt:%d\n", buf_offset, data_seg_len, - iser_ctask->data[ISER_DIR_OUT].data_len, itt); + iser_task->data[ISER_DIR_OUT].data_len, itt); err = -EINVAL; goto send_data_out_error; } @@ -470,10 +468,11 @@ send_data_out_error: } int iser_send_control(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask) + struct iscsi_task *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_desc *mdesc = mtask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_desc *mdesc = &iser_task->desc; struct iser_dto *send_dto = NULL; unsigned long data_seg_len; int err = 0; @@ -485,27 +484,27 @@ int iser_send_control(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn,mtask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; send_dto = &mdesc->dto; - send_dto->ctask = NULL; + send_dto->task = NULL; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn->device; iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); - data_seg_len = ntoh24(mtask->hdr->dlength); + data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { regd_buf = &mdesc->data_regd_buf; memset(regd_buf, 0, sizeof(struct iser_regd_buf)); regd_buf->device = device; - regd_buf->virt_addr = mtask->data; - regd_buf->data_size = mtask->data_count; + regd_buf->virt_addr = task->data; + regd_buf->data_size = task->data_count; iser_reg_single(device, regd_buf, DMA_TO_DEVICE); iser_dto_add_regd_buff(send_dto, regd_buf, @@ -535,15 +534,13 @@ send_control_error: void iser_rcv_completion(struct iser_desc *rx_desc, unsigned long dto_xfer_len) { - struct iser_dto *dto = &rx_desc->dto; + struct iser_dto *dto = &rx_desc->dto; struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; - struct iscsi_session *session = conn->iscsi_conn->session; - struct iscsi_cmd_task *ctask; - struct iscsi_iser_cmd_task *iser_ctask; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; struct iscsi_hdr *hdr; char *rx_data = NULL; int rx_data_len = 0; - unsigned int itt; unsigned char opcode; hdr = &rx_desc->iscsi_header; @@ -559,19 +556,24 @@ void iser_rcv_completion(struct iser_desc *rx_desc, opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { - itt = get_itt(hdr->itt); /* mask out cid and age bits */ - if (!(itt < session->cmds_max)) + spin_lock(&conn->iscsi_conn->session->lock); + task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); + if (task) + __iscsi_get_task(task); + spin_unlock(&conn->iscsi_conn->session->lock); + + if (!task) iser_err("itt can't be matched to task!!! " - "conn %p opcode %d cmds_max %d itt %d\n", - conn->iscsi_conn,opcode,session->cmds_max,itt); - /* use the mapping given with the cmds array indexed by itt */ - ctask = (struct iscsi_cmd_task *)session->cmds[itt]; - iser_ctask = ctask->dd_data; - iser_dbg("itt %d ctask %p\n",itt,ctask); - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); + "conn %p opcode %d itt %d\n", + conn->iscsi_conn, opcode, hdr->itt); + else { + iser_task = task->dd_data; + iser_dbg("itt %d task %p\n",hdr->itt, task); + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); + iscsi_put_task(task); + } } - iser_dto_buffs_release(dto); iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); @@ -592,7 +594,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iser_conn *ib_conn = dto->ib_conn; struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; - struct iscsi_mgmt_task *mtask; + struct iscsi_task *task; int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -615,36 +617,31 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ - mtask = (void *) ((long)(void *)tx_desc - - sizeof(struct iscsi_mgmt_task)); - if (mtask->hdr->itt == RESERVED_ITT) { - struct iscsi_session *session = conn->session; - - spin_lock(&conn->session->lock); - iscsi_free_mgmt_task(conn, mtask); - spin_unlock(&session->lock); - } + task = (void *) ((long)(void *)tx_desc - + sizeof(struct iscsi_task)); + if (task->hdr->itt == RESERVED_ITT) + iscsi_put_task(task); } } -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) +void iser_task_rdma_init(struct iscsi_iser_task *iser_task) { - iser_ctask->status = ISER_TASK_STATUS_INIT; + iser_task->status = ISER_TASK_STATUS_INIT; - iser_ctask->dir[ISER_DIR_IN] = 0; - iser_ctask->dir[ISER_DIR_OUT] = 0; + iser_task->dir[ISER_DIR_IN] = 0; + iser_task->dir[ISER_DIR_OUT] = 0; - iser_ctask->data[ISER_DIR_IN].data_len = 0; - iser_ctask->data[ISER_DIR_OUT].data_len = 0; + iser_task->data[ISER_DIR_IN].data_len = 0; + iser_task->data[ISER_DIR_OUT].data_len = 0; - memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0, + memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, sizeof(struct iser_regd_buf)); - memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0, + memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, sizeof(struct iser_regd_buf)); } -void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) +void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { int deferred; int is_rdma_aligned = 1; @@ -653,17 +650,17 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ - if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) { + if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); + iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); } - if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) { + if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); } - if (iser_ctask->dir[ISER_DIR_IN]) { - regd = &iser_ctask->rdma_regd[ISER_DIR_IN]; + if (iser_task->dir[ISER_DIR_IN]) { + regd = &iser_task->rdma_regd[ISER_DIR_IN]; deferred = iser_regd_buff_release(regd); if (deferred) { iser_err("%d references remain for BUF-IN rdma reg\n", @@ -671,8 +668,8 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) } } - if (iser_ctask->dir[ISER_DIR_OUT]) { - regd = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + if (iser_task->dir[ISER_DIR_OUT]) { + regd = &iser_task->rdma_regd[ISER_DIR_OUT]; deferred = iser_regd_buff_release(regd); if (deferred) { iser_err("%d references remain for BUF-OUT rdma reg\n", @@ -682,7 +679,7 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_ctask); + iser_dma_unmap_task_data(iser_task); } void iser_dto_buffs_release(struct iser_dto *dto) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index cac50c4dc15..b9453d068e9 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include <linux/module.h> #include <linux/kernel.h> @@ -101,13 +99,13 @@ void iser_reg_single(struct iser_device *device, /** * iser_start_rdma_unaligned_sg */ -static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, +static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { int dma_nents; struct ib_device *dev; char *mem = NULL; - struct iser_data_buf *data = &iser_ctask->data[cmd_dir]; + struct iser_data_buf *data = &iser_task->data[cmd_dir]; unsigned long cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) @@ -140,37 +138,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, } } - sg_init_one(&iser_ctask->data_copy[cmd_dir].sg_single, mem, cmd_data_len); - iser_ctask->data_copy[cmd_dir].buf = - &iser_ctask->data_copy[cmd_dir].sg_single; - iser_ctask->data_copy[cmd_dir].size = 1; + sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); + iser_task->data_copy[cmd_dir].buf = + &iser_task->data_copy[cmd_dir].sg_single; + iser_task->data_copy[cmd_dir].size = 1; - iser_ctask->data_copy[cmd_dir].copy_buf = mem; + iser_task->data_copy[cmd_dir].copy_buf = mem; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn->device->ib_device; dma_nents = ib_dma_map_sg(dev, - &iser_ctask->data_copy[cmd_dir].sg_single, + &iser_task->data_copy[cmd_dir].sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); - iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents; + iser_task->data_copy[cmd_dir].dma_nents = dma_nents; return 0; } /** * iser_finalize_rdma_unaligned_sg */ -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct ib_device *dev; struct iser_data_buf *mem_copy; unsigned long cmd_data_len; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; - mem_copy = &iser_ctask->data_copy[cmd_dir]; + dev = iser_task->iser_conn->ib_conn->device->ib_device; + mem_copy = &iser_task->data_copy[cmd_dir]; ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -186,8 +184,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, /* copy back read RDMA to unaligned sg */ mem = mem_copy->copy_buf; - sgl = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf; - sg_size = iser_ctask->data[ISER_DIR_IN].size; + sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; + sg_size = iser_task->data[ISER_DIR_IN].size; p = mem; for_each_sg(sgl, sg, sg_size, i) { @@ -200,7 +198,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, } } - cmd_data_len = iser_ctask->data[cmd_dir].data_len; + cmd_data_len = iser_task->data[cmd_dir].data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) free_pages((unsigned long)mem_copy->copy_buf, @@ -378,15 +376,15 @@ static void iser_page_vec_build(struct iser_data_buf *data, } } -int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) +int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) { struct ib_device *dev; - iser_ctask->dir[iser_dir] = 1; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + iser_task->dir[iser_dir] = 1; + dev = iser_task->iser_conn->ib_conn->device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -396,20 +394,20 @@ int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, return 0; } -void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) { struct ib_device *dev; struct iser_data_buf *data; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn->device->ib_device; - if (iser_ctask->dir[ISER_DIR_IN]) { - data = &iser_ctask->data[ISER_DIR_IN]; + if (iser_task->dir[ISER_DIR_IN]) { + data = &iser_task->data[ISER_DIR_IN]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } - if (iser_ctask->dir[ISER_DIR_OUT]) { - data = &iser_ctask->data[ISER_DIR_OUT]; + if (iser_task->dir[ISER_DIR_OUT]) { + data = &iser_task->data[ISER_DIR_OUT]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); } } @@ -420,21 +418,21 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, +int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iscsi_conn *iscsi_conn = iser_ctask->iser_conn->iscsi_conn; - struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; - struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; int err; int i; struct scatterlist *sg; - regd_buf = &iser_ctask->rdma_regd[cmd_dir]; + regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { @@ -444,13 +442,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_ctask); + iser_dma_unmap_task_data(iser_task); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) + if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) return -ENOMEM; - mem = &iser_ctask->data_copy[cmd_dir]; + mem = &iser_task->data_copy[cmd_dir]; } /* if there a single dma entry, FMR is not needed */ @@ -474,8 +472,9 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); if (err) { iser_data_buf_dump(mem, ibdev); - iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, - ntoh24(iser_ctask->desc.iscsi_header.dlength)); + iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", + mem->dma_nents, + ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", ib_conn->page_vec->data_size, ib_conn->page_vec->length, ib_conn->page_vec->offset); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d19cfe605eb..3a917c1f796 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ */ #include <linux/kernel.h> #include <linux/module.h> @@ -325,7 +323,18 @@ static void iser_conn_release(struct iser_conn *ib_conn) iser_device_try_release(device); if (ib_conn->iser_conn) ib_conn->iser_conn->ib_conn = NULL; - kfree(ib_conn); + iscsi_destroy_endpoint(ib_conn->ep); +} + +void iser_conn_get(struct iser_conn *ib_conn) +{ + atomic_inc(&ib_conn->refcount); +} + +void iser_conn_put(struct iser_conn *ib_conn) +{ + if (atomic_dec_and_test(&ib_conn->refcount)) + iser_conn_release(ib_conn); } /** @@ -349,7 +358,7 @@ void iser_conn_terminate(struct iser_conn *ib_conn) wait_event_interruptible(ib_conn->wait, ib_conn->state == ISER_CONN_DOWN); - iser_conn_release(ib_conn); + iser_conn_put(ib_conn); } static void iser_connect_error(struct rdma_cm_id *cma_id) @@ -483,24 +492,15 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve return ret; } -int iser_conn_init(struct iser_conn **ibconn) +void iser_conn_init(struct iser_conn *ib_conn) { - struct iser_conn *ib_conn; - - ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL); - if (!ib_conn) { - iser_err("can't alloc memory for struct iser_conn\n"); - return -ENOMEM; - } ib_conn->state = ISER_CONN_INIT; init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); + atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); - - *ibconn = ib_conn; - return 0; } /** diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 435145709dd..ed7c5f72cb8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $ */ #include <linux/module.h> @@ -49,8 +47,6 @@ #include <scsi/srp.h> #include <scsi/scsi_transport_srp.h> -#include <rdma/ib_cache.h> - #include "ib_srp.h" #define DRV_NAME "ib_srp" @@ -183,10 +179,10 @@ static int srp_init_qp(struct srp_target_port *target, if (!attr) return -ENOMEM; - ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, - target->srp_host->port, - be16_to_cpu(target->path.pkey), - &attr->pkey_index); + ret = ib_find_pkey(target->srp_host->srp_dev->dev, + target->srp_host->port, + be16_to_cpu(target->path.pkey), + &attr->pkey_index); if (ret) goto out; @@ -1883,8 +1879,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; - ib_get_cached_gid(host->srp_dev->dev, host->port, 0, - &target->path.sgid); + ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid); shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 63d2ae72406..e185b907fc1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $ */ #ifndef IB_SRP_H |