aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile9
-rw-r--r--drivers/infiniband/core/addr.c213
-rw-r--r--drivers/infiniband/core/agent.c3
-rw-r--r--drivers/infiniband/core/cache.c50
-rw-r--r--drivers/infiniband/core/cm.c165
-rw-r--r--drivers/infiniband/core/cm_msgs.h45
-rw-r--r--drivers/infiniband/core/cma.c1822
-rw-r--r--drivers/infiniband/core/core_priv.h2
-rw-r--r--drivers/infiniband/core/device.c56
-rw-r--r--drivers/infiniband/core/fmr_pool.c4
-rw-r--r--drivers/infiniband/core/iwcm.c43
-rw-r--r--drivers/infiniband/core/iwpm_msg.c685
-rw-r--r--drivers/infiniband/core/iwpm_util.c607
-rw-r--r--drivers/infiniband/core/iwpm_util.h238
-rw-r--r--drivers/infiniband/core/mad.c76
-rw-r--r--drivers/infiniband/core/multicast.c1
-rw-r--r--drivers/infiniband/core/netlink.c216
-rw-r--r--drivers/infiniband/core/packer.c1
-rw-r--r--drivers/infiniband/core/sa_query.c173
-rw-r--r--drivers/infiniband/core/sysfs.c128
-rw-r--r--drivers/infiniband/core/ucm.c22
-rw-r--r--drivers/infiniband/core/ucma.c538
-rw-r--r--drivers/infiniband/core/ud_header.c31
-rw-r--r--drivers/infiniband/core/umem.c131
-rw-r--r--drivers/infiniband/core/user_mad.c87
-rw-r--r--drivers/infiniband/core/uverbs.h62
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1297
-rw-r--r--drivers/infiniband/core/uverbs_main.c190
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c5
-rw-r--r--drivers/infiniband/core/verbs.c599
30 files changed, 6074 insertions, 1425 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index cb1ab3ea499..ffd0af6734a 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,14 +1,15 @@
-infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
+infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
- ib_cm.o iw_cm.o $(infiniband-y)
+ ib_cm.o iw_cm.o ib_addr.o \
+ $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
- device.o fmr_pool.o cache.o
+ device.o fmr_pool.o cache.o netlink.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
@@ -17,7 +18,7 @@ ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
-iw_cm-y := iwcm.o
+iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
rdma_cm-y := cma.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 8aba0ba57de..8172d37f9ad 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,6 +37,7 @@
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
+#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -44,6 +45,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
@@ -69,6 +71,23 @@ static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
+int rdma_addr_size(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ return sizeof(struct sockaddr_in);
+ case AF_INET6:
+ return sizeof(struct sockaddr_in6);
+ case AF_IB:
+ return sizeof(struct sockaddr_ib);
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(rdma_addr_size);
+
+static struct rdma_addr_client self;
+
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@@ -102,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
@@ -125,10 +145,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
@@ -136,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
@@ -151,13 +175,11 @@ static void set_timeout(unsigned long time)
{
unsigned long delay;
- cancel_delayed_work(&work);
-
delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
- queue_delayed_work(addr_wq, &work, delay);
+ mod_delayed_work(addr_wq, &work, delay);
}
static void queue_req(struct addr_req *req)
@@ -177,31 +199,53 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+{
+ struct neighbour *n;
+ int ret;
+
+ n = dst_neigh_lookup(dst, daddr);
+
+ rcu_read_lock();
+ if (!n || !(n->nud_state & NUD_VALID)) {
+ if (n)
+ neigh_event_send(n, NULL);
+ ret = -ENODATA;
+ } else {
+ ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
+ }
+ rcu_read_unlock();
+
+ if (n)
+ neigh_release(n);
+
+ return ret;
+}
+
static int addr4_resolve(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
- struct flowi fl;
struct rtable *rt;
- struct neighbour *neigh;
+ struct flowi4 fl4;
int ret;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = dst_ip;
- fl.nl_u.ip4_u.saddr = src_ip;
- fl.oif = addr->bound_dev_if;
-
- ret = ip_route_output_key(&init_net, &rt, &fl);
- if (ret)
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.daddr = dst_ip;
+ fl4.saddr = src_ip;
+ fl4.flowi4_oif = addr->bound_dev_if;
+ rt = ip_route_output_key(&init_net, &fl4);
+ if (IS_ERR(rt)) {
+ ret = PTR_ERR(rt);
goto out;
-
+ }
src_in->sin_family = AF_INET;
- src_in->sin_addr.s_addr = rt->rt_src;
+ src_in->sin_addr.s_addr = fl4.saddr;
if (rt->dst.dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
@@ -209,59 +253,47 @@ static int addr4_resolve(struct sockaddr_in *src_in,
/* If the device does ARP internally, return 'done' */
if (rt->dst.dev->flags & IFF_NOARP) {
- rdma_copy_addr(addr, rt->dst.dev, NULL);
- goto put;
- }
-
- neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev);
- if (!neigh || !(neigh->nud_state & NUD_VALID)) {
- neigh_event_send(rt->dst.neighbour, NULL);
- ret = -ENODATA;
- if (neigh)
- goto release;
+ ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
goto put;
}
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
-release:
- neigh_release(neigh);
+ ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
put:
ip_rt_put(rt);
out:
return ret;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
- struct flowi fl;
- struct neighbour *neigh;
+ struct flowi6 fl6;
struct dst_entry *dst;
int ret;
- memset(&fl, 0, sizeof fl);
- ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
- ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
- fl.oif = addr->bound_dev_if;
+ memset(&fl6, 0, sizeof fl6);
+ fl6.daddr = dst_in->sin6_addr;
+ fl6.saddr = src_in->sin6_addr;
+ fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(&init_net, NULL, &fl);
+ dst = ip6_route_output(&init_net, NULL, &fl6);
if ((ret = dst->error))
goto put;
- if (ipv6_addr_any(&fl.fl6_src)) {
+ if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
- &fl.fl6_dst, 0, &fl.fl6_src);
+ &fl6.daddr, 0, &fl6.saddr);
if (ret)
goto put;
src_in->sin6_family = AF_INET6;
- ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+ src_in->sin6_addr = fl6.saddr;
}
if (dst->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
@@ -273,14 +305,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
goto put;
}
- neigh = dst->neighbour;
- if (!neigh || !(neigh->nud_state & NUD_VALID)) {
- neigh_event_send(dst->neighbour, NULL);
- ret = -ENODATA;
- goto put;
- }
-
- ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+ ret = dst_fetch_ha(dst, addr, &fl6.daddr);
put:
dst_release(dst);
return ret;
@@ -367,12 +392,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
goto err;
}
- memcpy(src_in, src_addr, ip_addr_size(src_addr));
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
- memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
+ memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
@@ -419,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
+struct resolve_cb_context {
+ struct rdma_dev_addr *addr;
+ struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context)
+{
+ memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+ rdma_dev_addr));
+ complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+ u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ struct resolve_cb_context ctx;
+ struct net_device *dev;
+
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+
+ ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ if (ret)
+ return ret;
+
+ ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ if (ret)
+ return ret;
+
+ memset(&dev_addr, 0, sizeof(dev_addr));
+
+ ctx.addr = &dev_addr;
+ init_completion(&ctx.comp);
+ ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+ &dev_addr, 1000, resolve_cb, &ctx);
+ if (ret)
+ return ret;
+
+ wait_for_completion(&ctx.comp);
+
+ memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+ dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } gid_addr;
+
+ ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+ if (ret)
+ return ret;
+ memset(&dev_addr, 0, sizeof(dev_addr));
+ ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+ if (ret)
+ return ret;
+
+ memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
@@ -443,11 +550,13 @@ static int __init addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
+ rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
+ rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 91916a8d5de..2bc7f5af64f 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -101,7 +101,8 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- printk(KERN_ERR SPFX "ib_create_ah_from_wc error\n");
+ printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ PTR_ERR(ah));
return;
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 68883565b72..80f6cf2449f 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -167,6 +167,7 @@ int ib_find_cached_pkey(struct ib_device *device,
unsigned long flags;
int i;
int ret = -ENOENT;
+ int partial_ix = -1;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
@@ -179,6 +180,46 @@ int ib_find_cached_pkey(struct ib_device *device,
for (i = 0; i < cache->table_len; ++i)
if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+ if (cache->table[i] & 0x8000) {
+ *index = i;
+ ret = 0;
+ break;
+ } else
+ partial_ix = i;
+ }
+
+ if (ret && partial_ix >= 0) {
+ *index = partial_ix;
+ ret = 0;
+ }
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_pkey);
+
+int ib_find_exact_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index)
+{
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int i;
+ int ret = -ENOENT;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ *index = -1;
+
+ for (i = 0; i < cache->table_len; ++i)
+ if (cache->table[i] == pkey) {
*index = i;
ret = 0;
break;
@@ -188,7 +229,7 @@ int ib_find_cached_pkey(struct ib_device *device,
return ret;
}
-EXPORT_SYMBOL(ib_find_cached_pkey);
+EXPORT_SYMBOL(ib_find_exact_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,
@@ -302,13 +343,14 @@ static void ib_cache_event(struct ib_event_handler *handler,
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE) {
work = kmalloc(sizeof *work, GFP_ATOMIC);
if (work) {
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
- schedule_work(&work->work);
+ queue_work(ib_wq, &work->work);
}
}
}
@@ -368,7 +410,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)
int p;
ib_unregister_event_handler(&device->cache.event_handler);
- flush_scheduled_work();
+ flush_workqueue(ib_wq);
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 64e0903091a..c3239170d8b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -36,6 +36,7 @@
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/device.h>
+#include <linux/module.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
@@ -46,6 +47,7 @@
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
@@ -176,6 +178,8 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
+ u8 valid;
+ u8 smac[ETH_ALEN];
};
struct cm_work {
@@ -375,26 +379,27 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
+ memcpy(av->smac, path->smac, sizeof(av->smac));
+
+ av->valid = 1;
return 0;
}
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
- int ret, id;
- static int next_id;
+ int id;
- do {
- spin_lock_irqsave(&cm.lock, flags);
- ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
- next_id, &id);
- if (!ret)
- next_id = ((unsigned) id + 1) & MAX_ID_MASK;
- spin_unlock_irqrestore(&cm.lock, flags);
- } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+ idr_preload(GFP_KERNEL);
+ spin_lock_irqsave(&cm.lock, flags);
+
+ id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
+
+ spin_unlock_irqrestore(&cm.lock, flags);
+ idr_preload_end();
cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
- return ret;
+ return id < 0 ? id : 0;
}
static void cm_free_id(__be32 local_id)
@@ -889,6 +894,8 @@ retest:
break;
case IB_CM_ESTABLISHED:
spin_unlock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+ break;
ib_send_cm_dreq(cm_id, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
@@ -1008,7 +1015,6 @@ static void cm_format_req(struct cm_req_msg *req_msg,
req_msg->service_id = param->service_id;
req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
- cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_init_depth(req_msg, param->initiator_depth);
cm_req_set_remote_resp_timeout(req_msg,
param->remote_cm_response_timeout);
@@ -1017,12 +1023,16 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
cm_req_set_local_resp_timeout(req_msg,
param->local_cm_response_timeout);
- cm_req_set_retry_count(req_msg, param->retry_count);
req_msg->pkey = param->primary_path->pkey;
cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
- cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
- cm_req_set_srq(req_msg, param->srq);
+
+ if (param->qp_type != IB_QPT_XRC_INI) {
+ cm_req_set_resp_res(req_msg, param->responder_resources);
+ cm_req_set_retry_count(req_msg, param->retry_count);
+ cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
+ cm_req_set_srq(req_msg, param->srq);
+ }
if (pri_path->hop_limit <= 1) {
req_msg->primary_local_lid = pri_path->slid;
@@ -1080,7 +1090,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
if (!param->primary_path)
return -EINVAL;
- if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
+ if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
+ param->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
if (param->private_data &&
@@ -1549,6 +1560,9 @@ static int cm_req_handler(struct cm_work *work)
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
+
+ memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
+ work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1601,18 +1615,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
rep_msg->local_comm_id = cm_id_priv->id.local_id;
rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
rep_msg->resp_resources = param->responder_resources;
- rep_msg->initiator_depth = param->initiator_depth;
cm_rep_set_target_ack_delay(rep_msg,
cm_id_priv->av.port->cm_dev->ack_delay);
cm_rep_set_failover(rep_msg, param->failover_accepted);
- cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
- cm_rep_set_srq(rep_msg, param->srq);
rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+ if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
+ rep_msg->initiator_depth = param->initiator_depth;
+ cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
+ cm_rep_set_srq(rep_msg, param->srq);
+ cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+ } else {
+ cm_rep_set_srq(rep_msg, 1);
+ cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+ }
+
if (param->private_data && param->private_data_len)
memcpy(rep_msg->private_data, param->private_data,
param->private_data_len);
@@ -1660,7 +1680,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
- cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
@@ -1731,7 +1751,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
EXPORT_SYMBOL(ib_send_cm_rtu);
-static void cm_format_rep_event(struct cm_work *work)
+static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
{
struct cm_rep_msg *rep_msg;
struct ib_cm_rep_event_param *param;
@@ -1740,7 +1760,7 @@ static void cm_format_rep_event(struct cm_work *work)
param = &work->cm_event.param.rep_rcvd;
param->remote_ca_guid = rep_msg->local_ca_guid;
param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
- param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
+ param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
param->responder_resources = rep_msg->initiator_depth;
param->initiator_depth = rep_msg->resp_resources;
@@ -1808,7 +1828,7 @@ static int cm_rep_handler(struct cm_work *work)
return -EINVAL;
}
- cm_format_rep_event(work);
+ cm_format_rep_event(work, cm_id_priv->qp_type);
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
@@ -1823,7 +1843,7 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
- cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
spin_lock(&cm.lock);
/* Check for duplicate REP. */
@@ -1850,7 +1870,7 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv->id.state = IB_CM_REP_RCVD;
cm_id_priv->id.remote_id = rep_msg->local_comm_id;
- cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
cm_id_priv->initiator_depth = rep_msg->resp_resources;
cm_id_priv->responder_resources = rep_msg->initiator_depth;
cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
@@ -1988,6 +2008,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
goto out;
}
+ if (cm_id->lap_state == IB_CM_LAP_SENT ||
+ cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret) {
cm_enter_timewait(cm_id_priv);
@@ -2129,6 +2153,10 @@ static int cm_dreq_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ break;
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
@@ -2349,9 +2377,18 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- case IB_CM_ESTABLISHED:
cm_enter_timewait(cm_id_priv);
break;
+ case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
+ cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg);
+ cm_enter_timewait(cm_id_priv);
+ break;
+ }
+ /* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
@@ -2989,6 +3026,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* No match. */
}
atomic_inc(&cur_cm_id_priv->refcount);
+ atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
@@ -3471,10 +3509,37 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if (!cm_id_priv->av.valid) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return -EINVAL;
+ }
+ if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
+ qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
+ *qp_attr_mask |= IB_QP_VID;
+ }
+ if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
+ memcpy(qp_attr->smac, cm_id_priv->av.smac,
+ sizeof(qp_attr->smac));
+ *qp_attr_mask |= IB_QP_SMAC;
+ }
+ if (cm_id_priv->alt_av.valid) {
+ if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
+ qp_attr->alt_vlan_id =
+ cm_id_priv->alt_av.ah_attr.vlan_id;
+ *qp_attr_mask |= IB_QP_ALT_VID;
+ }
+ if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
+ memcpy(qp_attr->alt_smac,
+ cm_id_priv->alt_av.smac,
+ sizeof(qp_attr->alt_smac));
+ *qp_attr_mask |= IB_QP_ALT_SMAC;
+ }
+ }
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
- if (cm_id_priv->qp_type == IB_QPT_RC) {
+ if (cm_id_priv->qp_type == IB_QPT_RC ||
+ cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER;
qp_attr->max_dest_rd_atomic =
@@ -3519,15 +3584,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
- if (cm_id_priv->qp_type == IB_QPT_RC) {
- *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY |
+ switch (cm_id_priv->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_XRC_INI:
+ *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC;
- qp_attr->timeout = cm_id_priv->av.timeout;
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
- qp_attr->max_rd_atomic =
- cm_id_priv->initiator_depth;
+ qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+ /* fall through */
+ case IB_QPT_XRC_TGT:
+ *qp_attr_mask |= IB_QP_TIMEOUT;
+ qp_attr->timeout = cm_id_priv->av.timeout;
+ break;
+ default:
+ break;
}
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
@@ -3621,8 +3692,17 @@ static struct kobj_type cm_port_obj_type = {
.release = cm_release_port_obj
};
+static char *cm_devnode(struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
struct class cm_class = {
+ .owner = THIS_MODULE,
.name = "infiniband_cm",
+ .devnode = cm_devnode,
};
EXPORT_SYMBOL(cm_class);
@@ -3797,28 +3877,31 @@ static int __init ib_cm_init(void)
cm.remote_sidr_table = RB_ROOT;
idr_init(&cm.local_id_table);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
- idr_pre_get(&cm.local_id_table, GFP_KERNEL);
INIT_LIST_HEAD(&cm.timewait_list);
ret = class_register(&cm_class);
- if (ret)
- return -ENOMEM;
+ if (ret) {
+ ret = -ENOMEM;
+ goto error1;
+ }
cm.wq = create_workqueue("ib_cm");
if (!cm.wq) {
ret = -ENOMEM;
- goto error1;
+ goto error2;
}
ret = ib_register_client(&cm_client);
if (ret)
- goto error2;
+ goto error3;
return 0;
-error2:
+error3:
destroy_workqueue(cm.wq);
-error1:
+error2:
class_unregister(&cm_class);
+error1:
+ idr_destroy(&cm.local_id_table);
return ret;
}
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 7e63c08f697..be068f47e47 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
@@ -44,18 +44,6 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
-#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
-#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
-#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
-#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
-#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
-#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
-#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
-#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
-#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
-#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
-
enum cm_msg_sequence {
CM_MSG_SEQUENCE_REQ,
CM_MSG_SEQUENCE_LAP,
@@ -86,7 +74,7 @@ struct cm_req_msg {
__be16 pkey;
/* path MTU:4, RDC exists:1, RNR retry count:3. */
u8 offset50;
- /* max CM Retries:4, SRQ:1, rsvd:3 */
+ /* max CM Retries:4, SRQ:1, extended transport type:3 */
u8 offset51;
__be16 primary_local_lid;
@@ -175,6 +163,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
switch(transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
+ case 3:
+ switch (req_msg->offset51 & 0x7) {
+ case 1: return IB_QPT_XRC_TGT;
+ default: return 0;
+ }
default: return 0;
}
}
@@ -188,6 +181,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
break;
+ case IB_QPT_XRC_INI:
+ req_msg->offset40 = cpu_to_be32((be32_to_cpu(
+ req_msg->offset40) &
+ 0xFFFFFFF9) | 0x6);
+ req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+ break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
@@ -527,6 +526,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
(be32_to_cpu(rep_msg->offset12) & 0x000000FF));
}
+static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
+{
+ return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
+}
+
+static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
+{
+ rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
+ (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
+}
+
+static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
+{
+ return (qp_type == IB_QPT_XRC_INI) ?
+ cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
+}
+
static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
@@ -771,6 +787,7 @@ struct cm_apr_msg {
u8 info_length;
u8 ap_status;
+ __be16 rsvd;
u8 info[IB_CM_APR_INFO_LENGTH];
u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da24fde..d570030d899 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -41,12 +41,16 @@
#include <linux/idr.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/route.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/ib.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_sa.h>
@@ -76,10 +80,10 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
-static DEFINE_IDR(sdp_ps);
static DEFINE_IDR(tcp_ps);
static DEFINE_IDR(udp_ps);
static DEFINE_IDR(ipoib_ps);
+static DEFINE_IDR(ib_ps);
struct cma_device {
struct list_head list;
@@ -89,26 +93,16 @@ struct cma_device {
struct list_head id_list;
};
-enum cma_state {
- CMA_IDLE,
- CMA_ADDR_QUERY,
- CMA_ADDR_RESOLVED,
- CMA_ROUTE_QUERY,
- CMA_ROUTE_RESOLVED,
- CMA_CONNECT,
- CMA_DISCONNECT,
- CMA_ADDR_BOUND,
- CMA_LISTEN,
- CMA_DEVICE_REMOVAL,
- CMA_DESTROYING
-};
-
struct rdma_bind_list {
struct idr *ps;
struct hlist_head owners;
unsigned short port;
};
+enum {
+ CMA_OPTION_AFONLY,
+};
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -126,7 +120,7 @@ struct rdma_id_private {
struct list_head mc_list;
int internal_id;
- enum cma_state state;
+ enum rdma_cm_state state;
spinlock_t lock;
struct mutex qp_mutex;
@@ -146,8 +140,12 @@ struct rdma_id_private {
u32 seq_num;
u32 qkey;
u32 qp_num;
+ pid_t owner;
+ u32 options;
u8 srq;
u8 tos;
+ u8 reuseaddr;
+ u8 afonly;
};
struct cma_multicast {
@@ -164,8 +162,8 @@ struct cma_multicast {
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
- enum cma_state old_state;
- enum cma_state new_state;
+ enum rdma_cm_state old_state;
+ enum rdma_cm_state new_state;
struct rdma_cm_event event;
};
@@ -197,26 +195,9 @@ struct cma_hdr {
union cma_ip_addr dst_addr;
};
-struct sdp_hh {
- u8 bsdh[16];
- u8 sdp_version; /* Major version: 7:4 */
- u8 ip_version; /* IP version: 7:4 */
- u8 sdp_specific1[10];
- __be16 port;
- __be16 sdp_specific2;
- union cma_ip_addr src_addr;
- union cma_ip_addr dst_addr;
-};
-
-struct sdp_hah {
- u8 bsdh[16];
- u8 sdp_version;
-};
-
#define CMA_VERSION 0x00
-#define SDP_MAJ_VERSION 0x2
-static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
{
unsigned long flags;
int ret;
@@ -228,7 +209,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
}
static int cma_comp_exch(struct rdma_id_private *id_priv,
- enum cma_state comp, enum cma_state exch)
+ enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
@@ -240,11 +221,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum cma_state cma_exch(struct rdma_id_private *id_priv,
- enum cma_state exch)
+static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
+ enum rdma_cm_state exch)
{
unsigned long flags;
- enum cma_state old;
+ enum rdma_cm_state old;
spin_lock_irqsave(&id_priv->lock, flags);
old = id_priv->state;
@@ -263,26 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static inline u8 sdp_get_majv(u8 sdp_version)
-{
- return sdp_version >> 4;
-}
-
-static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
-{
- return hh->ip_version >> 4;
-}
-
-static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
-{
- hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
-}
-
-static inline int cma_is_ud_ps(enum rdma_port_space ps)
-{
- return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
-}
-
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -308,23 +269,49 @@ static inline void release_mc(struct kref *kref)
kfree(mc);
}
-static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+static void cma_release_dev(struct rdma_id_private *id_priv)
{
+ mutex_lock(&lock);
list_del(&id_priv->list);
cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ mutex_unlock(&lock);
+}
+
+static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+}
+
+static inline unsigned short cma_family(struct rdma_id_private *id_priv)
+{
+ return id_priv->id.route.addr.src_addr.ss_family;
}
-static int cma_set_qkey(struct rdma_id_private *id_priv)
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
- if (id_priv->qkey)
+ if (id_priv->qkey) {
+ if (qkey && id_priv->qkey != qkey)
+ return -EINVAL;
return 0;
+ }
+
+ if (qkey) {
+ id_priv->qkey = qkey;
+ return 0;
+ }
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
+ case RDMA_PS_IB:
id_priv->qkey = RDMA_UDP_QKEY;
break;
case RDMA_PS_IPOIB:
@@ -341,55 +328,83 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)
return ret;
}
-static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
+static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
- int i;
- int err;
- struct ib_port_attr props;
- union ib_gid tmp;
+ dev_addr->dev_type = ARPHRD_INFINIBAND;
+ rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
+ ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
+}
- err = ib_query_port(device, port_num, &props);
- if (err)
- return 1;
+static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+ int ret;
- for (i = 0; i < props.gid_tbl_len; ++i) {
- err = ib_query_gid(device, port_num, i, &tmp);
- if (err)
- return 1;
- if (!memcmp(&tmp, gid, sizeof tmp))
- return 0;
+ if (addr->sa_family != AF_IB) {
+ ret = rdma_translate_ip(addr, dev_addr, NULL);
+ } else {
+ cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
+ ret = 0;
}
- return -EAGAIN;
+ return ret;
}
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv,
+ struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid;
int ret = -ENODEV;
- u8 port;
+ u8 port, found_port;
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
- iboe_addr_get_sgid(dev_addr, &iboe_gid);
+ if (dev_ll != IB_LINK_LAYER_INFINIBAND &&
+ id_priv->id.ps == RDMA_PS_IPOIB)
+ return -EINVAL;
+
+ mutex_lock(&lock);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &iboe_gid);
+
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
+ if (listen_id_priv &&
+ rdma_port_get_link_layer(listen_id_priv->id.device,
+ listen_id_priv->id.port_num) == dev_ll) {
+ cma_dev = listen_id_priv->cma_dev;
+ port = listen_id_priv->id.port_num;
+ if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+ ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
+ &found_port, NULL);
+ else
+ ret = ib_find_cached_gid(cma_dev->device, &gid,
+ &found_port, NULL);
+
+ if (!ret && (port == found_port)) {
+ id_priv->id.port_num = found_port;
+ goto out;
+ }
+ }
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ if (listen_id_priv &&
+ listen_id_priv->cma_dev == cma_dev &&
+ listen_id_priv->id.port_num == port)
+ continue;
if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
- ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+ ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
else
- ret = find_gid_port(cma_dev->device, &gid, port);
+ ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
- if (!ret) {
- id_priv->id.port_num = port;
+ if (!ret && (port == found_port)) {
+ id_priv->id.port_num = found_port;
goto out;
- } else if (ret == 1)
- break;
+ }
}
}
}
@@ -398,9 +413,64 @@ out:
if (!ret)
cma_attach_to_dev(id_priv, cma_dev);
+ mutex_unlock(&lock);
return ret;
}
+/*
+ * Select the source IB device and address to reach the destination IB address.
+ */
+static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
+{
+ struct cma_device *cma_dev, *cur_dev;
+ struct sockaddr_ib *addr;
+ union ib_gid gid, sgid, *dgid;
+ u16 pkey, index;
+ u8 p;
+ int i;
+
+ cma_dev = NULL;
+ addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
+ dgid = (union ib_gid *) &addr->sib_addr;
+ pkey = ntohs(addr->sib_pkey);
+
+ list_for_each_entry(cur_dev, &dev_list, list) {
+ if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+ continue;
+
+ for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
+ continue;
+
+ for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
+ if (!memcmp(&gid, dgid, sizeof(gid))) {
+ cma_dev = cur_dev;
+ sgid = gid;
+ id_priv->id.port_num = p;
+ goto found;
+ }
+
+ if (!cma_dev && (gid.global.subnet_prefix ==
+ dgid->global.subnet_prefix)) {
+ cma_dev = cur_dev;
+ sgid = gid;
+ id_priv->id.port_num = p;
+ }
+ }
+ }
+ }
+
+ if (!cma_dev)
+ return -ENODEV;
+
+found:
+ cma_attach_to_dev(id_priv, cma_dev);
+ addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
+ memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+ cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
+ return 0;
+}
+
static void cma_deref_id(struct rdma_id_private *id_priv)
{
if (atomic_dec_and_test(&id_priv->refcount))
@@ -408,7 +478,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
}
static int cma_disable_callback(struct rdma_id_private *id_priv,
- enum cma_state state)
+ enum rdma_cm_state state)
{
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state != state) {
@@ -418,13 +488,9 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
return 0;
}
-static int cma_has_cm_dev(struct rdma_id_private *id_priv)
-{
- return (id_priv->id.device && id_priv->cm_id.ib);
-}
-
struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps)
+ void *context, enum rdma_port_space ps,
+ enum ib_qp_type qp_type)
{
struct rdma_id_private *id_priv;
@@ -432,10 +498,12 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->state = CMA_IDLE;
+ id_priv->owner = task_pid_nr(current);
+ id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
+ id_priv->id.qp_type = qp_type;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -503,7 +571,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
if (IS_ERR(qp))
return PTR_ERR(qp);
- if (cma_is_ud_ps(id_priv->id.ps))
+ if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
@@ -537,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
+ union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
@@ -559,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
if (ret)
goto out;
+ ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
+ qp_attr.ah_attr.grh.sgid_index, &sgid);
+ if (ret)
+ goto out;
+
+ if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
+ == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
+ == IB_LINK_LAYER_ETHERNET) {
+ ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
+
+ if (ret)
+ goto out;
+ }
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -631,8 +714,8 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
qp_attr->port_num = id_priv->id.port_num;
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
- if (cma_is_ud_ps(id_priv->id.ps)) {
- ret = cma_set_qkey(id_priv);
+ if (id_priv->id.qp_type == IB_QPT_UD) {
+ ret = cma_set_qkey(id_priv, 0);
if (ret)
return ret;
@@ -654,11 +737,12 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
id_priv = container_of(id, struct rdma_id_private, id);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+ if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);
+
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
@@ -681,26 +765,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr);
static inline int cma_zero_addr(struct sockaddr *addr)
{
- struct in6_addr *ip6;
-
- if (addr->sa_family == AF_INET)
- return ipv4_is_zeronet(
- ((struct sockaddr_in *)addr)->sin_addr.s_addr);
- else {
- ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
- return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
- ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+ switch (addr->sa_family) {
+ case AF_INET:
+ return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
+ case AF_INET6:
+ return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ case AF_IB:
+ return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+ default:
+ return 0;
}
}
static inline int cma_loopback_addr(struct sockaddr *addr)
{
- if (addr->sa_family == AF_INET)
- return ipv4_is_loopback(
- ((struct sockaddr_in *) addr)->sin_addr.s_addr);
- else
- return ipv6_addr_loopback(
- &((struct sockaddr_in6 *) addr)->sin6_addr);
+ switch (addr->sa_family) {
+ case AF_INET:
+ return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ case AF_INET6:
+ return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ case AF_IB:
+ return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+ default:
+ return 0;
+ }
}
static inline int cma_any_addr(struct sockaddr *addr)
@@ -708,12 +796,40 @@ static inline int cma_any_addr(struct sockaddr *addr)
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
-static inline __be16 cma_port(struct sockaddr *addr)
+static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
{
- if (addr->sa_family == AF_INET)
+ if (src->sa_family != dst->sa_family)
+ return -1;
+
+ switch (src->sa_family) {
+ case AF_INET:
+ return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
+ ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ case AF_INET6:
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
+ &((struct sockaddr_in6 *) dst)->sin6_addr);
+ default:
+ return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
+ &((struct sockaddr_ib *) dst)->sib_addr);
+ }
+}
+
+static __be16 cma_port(struct sockaddr *addr)
+{
+ struct sockaddr_ib *sib;
+
+ switch (addr->sa_family) {
+ case AF_INET:
return ((struct sockaddr_in *) addr)->sin_port;
- else
+ case AF_INET6:
return ((struct sockaddr_in6 *) addr)->sin6_port;
+ case AF_IB:
+ sib = (struct sockaddr_ib *) addr;
+ return htons((u16) (be64_to_cpu(sib->sib_sid) &
+ be64_to_cpu(sib->sib_sid_mask)));
+ default:
+ return 0;
+ }
}
static inline int cma_any_port(struct sockaddr *addr)
@@ -721,83 +837,93 @@ static inline int cma_any_port(struct sockaddr *addr)
return !cma_port(addr);
}
-static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
- u8 *ip_ver, __be16 *port,
- union cma_ip_addr **src, union cma_ip_addr **dst)
+static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+ struct ib_sa_path_rec *path)
{
- switch (ps) {
- case RDMA_PS_SDP:
- if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
- SDP_MAJ_VERSION)
- return -EINVAL;
-
- *ip_ver = sdp_get_ip_ver(hdr);
- *port = ((struct sdp_hh *) hdr)->port;
- *src = &((struct sdp_hh *) hdr)->src_addr;
- *dst = &((struct sdp_hh *) hdr)->dst_addr;
- break;
- default:
- if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
- return -EINVAL;
+ struct sockaddr_ib *listen_ib, *ib;
- *ip_ver = cma_get_ip_ver(hdr);
- *port = ((struct cma_hdr *) hdr)->port;
- *src = &((struct cma_hdr *) hdr)->src_addr;
- *dst = &((struct cma_hdr *) hdr)->dst_addr;
- break;
- }
+ listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
+ ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
+ ib->sib_family = listen_ib->sib_family;
+ ib->sib_pkey = path->pkey;
+ ib->sib_flowinfo = path->flow_label;
+ memcpy(&ib->sib_addr, &path->sgid, 16);
+ ib->sib_sid = listen_ib->sib_sid;
+ ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
+ ib->sib_scope_id = listen_ib->sib_scope_id;
- if (*ip_ver != 4 && *ip_ver != 6)
- return -EINVAL;
- return 0;
+ ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
+ ib->sib_family = listen_ib->sib_family;
+ ib->sib_pkey = path->pkey;
+ ib->sib_flowinfo = path->flow_label;
+ memcpy(&ib->sib_addr, &path->dgid, 16);
}
-static void cma_save_net_info(struct rdma_addr *addr,
- struct rdma_addr *listen_addr,
- u8 ip_ver, __be16 port,
- union cma_ip_addr *src, union cma_ip_addr *dst)
+static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+ struct cma_hdr *hdr)
{
struct sockaddr_in *listen4, *ip4;
+
+ listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
+ ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
+ ip4->sin_family = listen4->sin_family;
+ ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
+ ip4->sin_port = listen4->sin_port;
+
+ ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
+ ip4->sin_family = listen4->sin_family;
+ ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
+ ip4->sin_port = hdr->port;
+}
+
+static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+ struct cma_hdr *hdr)
+{
struct sockaddr_in6 *listen6, *ip6;
- switch (ip_ver) {
+ listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
+ ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
+ ip6->sin6_family = listen6->sin6_family;
+ ip6->sin6_addr = hdr->dst_addr.ip6;
+ ip6->sin6_port = listen6->sin6_port;
+
+ ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
+ ip6->sin6_family = listen6->sin6_family;
+ ip6->sin6_addr = hdr->src_addr.ip6;
+ ip6->sin6_port = hdr->port;
+}
+
+static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+ struct ib_cm_event *ib_event)
+{
+ struct cma_hdr *hdr;
+
+ if ((listen_id->route.addr.src_addr.ss_family == AF_IB) &&
+ (ib_event->event == IB_CM_REQ_RECEIVED)) {
+ cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
+ return 0;
+ }
+
+ hdr = ib_event->private_data;
+ if (hdr->cma_version != CMA_VERSION)
+ return -EINVAL;
+
+ switch (cma_get_ip_ver(hdr)) {
case 4:
- listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
- ip4 = (struct sockaddr_in *) &addr->src_addr;
- ip4->sin_family = listen4->sin_family;
- ip4->sin_addr.s_addr = dst->ip4.addr;
- ip4->sin_port = listen4->sin_port;
-
- ip4 = (struct sockaddr_in *) &addr->dst_addr;
- ip4->sin_family = listen4->sin_family;
- ip4->sin_addr.s_addr = src->ip4.addr;
- ip4->sin_port = port;
+ cma_save_ip4_info(id, listen_id, hdr);
break;
case 6:
- listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
- ip6 = (struct sockaddr_in6 *) &addr->src_addr;
- ip6->sin6_family = listen6->sin6_family;
- ip6->sin6_addr = dst->ip6;
- ip6->sin6_port = listen6->sin6_port;
-
- ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
- ip6->sin6_family = listen6->sin6_family;
- ip6->sin6_addr = src->ip6;
- ip6->sin6_port = port;
+ cma_save_ip6_info(id, listen_id, hdr);
break;
default:
- break;
+ return -EINVAL;
}
+ return 0;
}
-static inline int cma_user_data_offset(enum rdma_port_space ps)
+static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
{
- switch (ps) {
- case RDMA_PS_SDP:
- return 0;
- default:
- return sizeof(struct cma_hdr);
- }
+ return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
}
static void cma_cancel_route(struct rdma_id_private *id_priv)
@@ -838,18 +964,17 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)
}
static void cma_cancel_operation(struct rdma_id_private *id_priv,
- enum cma_state state)
+ enum rdma_cm_state state)
{
switch (state) {
- case CMA_ADDR_QUERY:
+ case RDMA_CM_ADDR_QUERY:
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
- case CMA_ROUTE_QUERY:
+ case RDMA_CM_ROUTE_QUERY:
cma_cancel_route(id_priv);
break;
- case CMA_LISTEN:
- if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
- && !id_priv->cma_dev)
+ case RDMA_CM_LISTEN:
+ if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
cma_cancel_listens(id_priv);
break;
default:
@@ -898,32 +1023,35 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
- enum cma_state state;
+ enum rdma_cm_state state;
id_priv = container_of(id, struct rdma_id_private, id);
- state = cma_exch(id_priv, CMA_DESTROYING);
+ state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- mutex_lock(&lock);
+ /*
+ * Wait for any active callback to finish. New callbacks will find
+ * the id_priv state set to destroying and abort.
+ */
+ mutex_lock(&id_priv->handler_mutex);
+ mutex_unlock(&id_priv->handler_mutex);
+
if (id_priv->cma_dev) {
- mutex_unlock(&lock);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+ if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
break;
case RDMA_TRANSPORT_IWARP:
- if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+ if (id_priv->cm_id.iw)
iw_destroy_cm_id(id_priv->cm_id.iw);
break;
default:
break;
}
cma_leave_mc_groups(id_priv);
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
+ cma_release_dev(id_priv);
}
- mutex_unlock(&lock);
cma_release_port(id_priv);
cma_deref_id(id_priv);
@@ -961,16 +1089,6 @@ reject:
return ret;
}
-static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
-{
- if (id_priv->id.ps == RDMA_PS_SDP &&
- sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
- SDP_MAJ_VERSION)
- return -EINVAL;
-
- return 0;
-}
-
static void cma_set_rep_event_data(struct rdma_cm_event *event,
struct ib_cm_rep_event_param *rep_data,
void *private_data)
@@ -992,9 +1110,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
int ret = 0;
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- cma_disable_callback(id_priv, CMA_CONNECT)) ||
+ cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- cma_disable_callback(id_priv, CMA_DISCONNECT)))
+ cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
return 0;
memset(&event, 0, sizeof event);
@@ -1005,15 +1123,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- event.status = cma_verify_rep(id_priv, ib_event->private_data);
- if (event.status)
- event.event = RDMA_CM_EVENT_CONNECT_ERROR;
- else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
+ if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
RDMA_CM_EVENT_ESTABLISHED;
- } else
+ } else {
event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
+ }
cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
ib_event->private_data);
break;
@@ -1025,7 +1141,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT; /* fall through */
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
- if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+ if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT))
goto out;
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
@@ -1052,7 +1169,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
@@ -1068,53 +1185,44 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
- union cma_ip_addr *src, *dst;
- __be16 port;
- u8 ip_ver;
int ret;
- if (cma_get_net_info(ib_event->private_data, listen_id->ps,
- &ip_ver, &port, &src, &dst))
- goto err;
-
id = rdma_create_id(listen_id->event_handler, listen_id->context,
- listen_id->ps);
+ listen_id->ps, ib_event->param.req_rcvd.qp_type);
if (IS_ERR(id))
- goto err;
+ return NULL;
- cma_save_net_info(&id->route.addr, &listen_id->route.addr,
- ip_ver, port, src, dst);
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (cma_save_net_info(id, listen_id, ib_event))
+ goto err;
rt = &id->route;
rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
GFP_KERNEL);
if (!rt->path_rec)
- goto destroy_id;
+ goto err;
rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
- if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+ if (cma_any_addr(cma_src_addr(id_priv))) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
- ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+ ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
} else {
- ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
- &rt->addr.dev_addr);
+ ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
if (ret)
- goto destroy_id;
+ goto err;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
- id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->state = CMA_CONNECT;
+ id_priv->state = RDMA_CM_CONNECT;
return id_priv;
-destroy_id:
- rdma_destroy_id(id);
err:
+ rdma_destroy_id(id);
return NULL;
}
@@ -1123,33 +1231,24 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
{
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
- union cma_ip_addr *src, *dst;
- __be16 port;
- u8 ip_ver;
int ret;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
- listen_id->ps);
+ listen_id->ps, IB_QPT_UD);
if (IS_ERR(id))
return NULL;
-
- if (cma_get_net_info(ib_event->private_data, listen_id->ps,
- &ip_ver, &port, &src, &dst))
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (cma_save_net_info(id, listen_id, ib_event))
goto err;
- cma_save_net_info(&id->route.addr, &listen_id->route.addr,
- ip_ver, port, src, dst);
-
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
+ ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
if (ret)
goto err;
}
- id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->state = CMA_CONNECT;
+ id_priv->state = RDMA_CM_CONNECT;
return id_priv;
err:
rdma_destroy_id(id);
@@ -1171,6 +1270,15 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = req_data->remote_qpn;
}
+static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+{
+ return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
+ (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
+ ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
+ (id->qp_type == IB_QPT_UD)) ||
+ (!id->qp_type));
+}
+
static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
{
struct rdma_id_private *listen_id, *conn_id;
@@ -1178,13 +1286,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
int offset, ret;
listen_id = cm_id->context;
- if (cma_disable_callback(listen_id, CMA_LISTEN))
+ if (!cma_check_req_qp_type(&listen_id->id, ib_event))
+ return -EINVAL;
+
+ if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
memset(&event, 0, sizeof event);
- offset = cma_user_data_offset(listen_id->id.ps);
+ offset = cma_user_data_offset(listen_id);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
- if (cma_is_ud_ps(listen_id->id.ps)) {
+ if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
@@ -1196,93 +1307,89 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
}
if (!conn_id) {
ret = -ENOMEM;
- goto out;
+ goto err1;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- mutex_lock(&lock);
- ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
+ ret = cma_acquire_dev(conn_id, listen_id);
if (ret)
- goto release_conn_id;
+ goto err2;
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
- if (!ret) {
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
- if (cma_comp(conn_id, CMA_CONNECT) &&
- !cma_is_ud_ps(conn_id->id.ps))
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
- mutex_unlock(&lock);
- mutex_unlock(&conn_id->handler_mutex);
- goto out;
- }
+ if (ret)
+ goto err3;
+ /*
+ * Acquire mutex to prevent user executing rdma_destroy_id()
+ * while we're accessing the cm_id.
+ */
+ mutex_lock(&lock);
+ if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
+ (conn_id->id.qp_type != IB_QPT_UD))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ mutex_unlock(&lock);
+ mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&listen_id->handler_mutex);
+ cma_deref_id(conn_id);
+ return 0;
+err3:
+ cma_deref_id(conn_id);
/* Destroy the CM ID by returning a non-zero value. */
conn_id->cm_id.ib = NULL;
-
-release_conn_id:
- cma_exch(conn_id, CMA_DESTROYING);
+err2:
+ cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(&conn_id->id);
-
-out:
+err1:
mutex_unlock(&listen_id->handler_mutex);
+ if (conn_id)
+ rdma_destroy_id(&conn_id->id);
return ret;
}
-static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
+__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
{
- return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
+ if (addr->sa_family == AF_IB)
+ return ((struct sockaddr_ib *) addr)->sib_sid;
+
+ return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
}
+EXPORT_SYMBOL(rdma_get_service_id);
static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
struct ib_cm_compare_data *compare)
{
struct cma_hdr *cma_data, *cma_mask;
- struct sdp_hh *sdp_data, *sdp_mask;
__be32 ip4_addr;
struct in6_addr ip6_addr;
memset(compare, 0, sizeof *compare);
cma_data = (void *) compare->data;
cma_mask = (void *) compare->mask;
- sdp_data = (void *) compare->data;
- sdp_mask = (void *) compare->mask;
switch (addr->sa_family) {
case AF_INET:
ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
- if (ps == RDMA_PS_SDP) {
- sdp_set_ip_ver(sdp_data, 4);
- sdp_set_ip_ver(sdp_mask, 0xF);
- sdp_data->dst_addr.ip4.addr = ip4_addr;
- sdp_mask->dst_addr.ip4.addr = htonl(~0);
- } else {
- cma_set_ip_ver(cma_data, 4);
- cma_set_ip_ver(cma_mask, 0xF);
+ cma_set_ip_ver(cma_data, 4);
+ cma_set_ip_ver(cma_mask, 0xF);
+ if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip4.addr = ip4_addr;
cma_mask->dst_addr.ip4.addr = htonl(~0);
}
break;
case AF_INET6:
ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
- if (ps == RDMA_PS_SDP) {
- sdp_set_ip_ver(sdp_data, 6);
- sdp_set_ip_ver(sdp_mask, 0xF);
- sdp_data->dst_addr.ip6 = ip6_addr;
- memset(&sdp_mask->dst_addr.ip6, 0xFF,
- sizeof sdp_mask->dst_addr.ip6);
- } else {
- cma_set_ip_ver(cma_data, 6);
- cma_set_ip_ver(cma_mask, 0xF);
+ cma_set_ip_ver(cma_data, 6);
+ cma_set_ip_ver(cma_mask, 0xF);
+ if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip6 = ip6_addr;
memset(&cma_mask->dst_addr.ip6, 0xFF,
sizeof cma_mask->dst_addr.ip6);
@@ -1297,10 +1404,11 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
struct rdma_cm_event event;
- struct sockaddr_in *sin;
int ret = 0;
+ struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
- if (cma_disable_callback(id_priv, CMA_CONNECT))
+ if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
memset(&event, 0, sizeof event);
@@ -1309,13 +1417,15 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
case IW_CM_EVENT_CONNECT_REPLY:
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- *sin = iw_event->local_addr;
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
- *sin = iw_event->remote_addr;
+ memcpy(cma_src_addr(id_priv), laddr,
+ rdma_addr_size(laddr));
+ memcpy(cma_dst_addr(id_priv), raddr,
+ rdma_addr_size(raddr));
switch (iw_event->status) {
case 0:
event.event = RDMA_CM_EVENT_ESTABLISHED;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -1331,6 +1441,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
break;
case IW_CM_EVENT_ESTABLISHED:
event.event = RDMA_CM_EVENT_ESTABLISHED;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
break;
default:
BUG_ON(1);
@@ -1343,7 +1455,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
@@ -1358,45 +1470,36 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct sockaddr_in *sin;
- struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
+ struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
listen_id = cm_id->context;
- if (cma_disable_callback(listen_id, CMA_LISTEN))
+ if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
/* Create a new RDMA id for the new IW CM ID */
new_cm_id = rdma_create_id(listen_id->id.event_handler,
listen_id->id.context,
- RDMA_PS_TCP);
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
}
conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- conn_id->state = CMA_CONNECT;
+ conn_id->state = RDMA_CM_CONNECT;
- dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
- if (!dev) {
- ret = -EADDRNOTAVAIL;
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
- }
- ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
- mutex_lock(&lock);
- ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
+ ret = cma_acquire_dev(conn_id, listen_id);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1407,10 +1510,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
cm_id->context = conn_id;
cm_id->cm_handler = cma_iw_handler;
- sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
- *sin = iw_event->local_addr;
- sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
- *sin = iw_event->remote_addr;
+ memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
+ memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
ret = ib_query_device(conn_id->id.device, &attr);
if (ret) {
@@ -1423,23 +1524,29 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
- event.param.conn.responder_resources = attr.max_qp_rd_atom;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
+
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, CMA_DESTROYING);
+ cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
goto out;
}
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
out:
- if (dev)
- dev_put(dev);
mutex_unlock(&listen_id->handler_mutex);
return ret;
}
@@ -1448,17 +1555,19 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
{
struct ib_cm_compare_data compare_data;
struct sockaddr *addr;
+ struct ib_cm_id *id;
__be64 svc_id;
int ret;
- id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
- id_priv);
- if (IS_ERR(id_priv->cm_id.ib))
- return PTR_ERR(id_priv->cm_id.ib);
+ id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
+ if (IS_ERR(id))
+ return PTR_ERR(id);
- addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
- svc_id = cma_get_service_id(id_priv->id.ps, addr);
- if (cma_any_addr(addr))
+ id_priv->cm_id.ib = id;
+
+ addr = cma_src_addr(id_priv);
+ svc_id = rdma_get_service_id(&id_priv->id, addr);
+ if (cma_any_addr(addr) && !id_priv->afonly)
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
@@ -1476,16 +1585,18 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
{
int ret;
- struct sockaddr_in *sin;
+ struct iw_cm_id *id;
+
+ id = iw_create_cm_id(id_priv->id.device,
+ iw_conn_req_handler,
+ id_priv);
+ if (IS_ERR(id))
+ return PTR_ERR(id);
- id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
- iw_conn_req_handler,
- id_priv);
- if (IS_ERR(id_priv->cm_id.iw))
- return PTR_ERR(id_priv->cm_id.iw);
+ id_priv->cm_id.iw = id;
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- id_priv->cm_id.iw->local_addr = *sin;
+ memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
@@ -1514,20 +1625,26 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
struct rdma_cm_id *id;
int ret;
- id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
+ if (cma_family(id_priv) == AF_IB &&
+ rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
+ id_priv->id.qp_type);
if (IS_ERR(id))
return;
dev_id_priv = container_of(id, struct rdma_id_private, id);
- dev_id_priv->state = CMA_ADDR_BOUND;
- memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
- ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+ dev_id_priv->state = RDMA_CM_ADDR_BOUND;
+ memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
+ dev_id_priv->afonly = id_priv->afonly;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
@@ -1546,50 +1663,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-int rdma_listen(struct rdma_cm_id *id, int backlog)
-{
- struct rdma_id_private *id_priv;
- int ret;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == CMA_IDLE) {
- ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
- ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
- if (ret)
- return ret;
- }
-
- if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
- return -EINVAL;
-
- id_priv->backlog = backlog;
- if (id->device) {
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- ret = cma_ib_listen(id_priv);
- if (ret)
- goto err;
- break;
- case RDMA_TRANSPORT_IWARP:
- ret = cma_iw_listen(id_priv, backlog);
- if (ret)
- goto err;
- break;
- default:
- ret = -ENOSYS;
- goto err;
- }
- } else
- cma_listen_on_all(id_priv);
-
- return 0;
-err:
- id_priv->backlog = 0;
- cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
- return ret;
-}
-EXPORT_SYMBOL(rdma_listen);
-
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
{
struct rdma_id_private *id_priv;
@@ -1611,8 +1684,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
route->num_paths = 1;
*route->path_rec = *path_rec;
} else {
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ADDR_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
}
@@ -1623,31 +1696,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
- struct rdma_addr *addr = &id_priv->id.route.addr;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct ib_sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
+ struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
- rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
- rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
- path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
+ rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
+ rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
+ path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = cma_get_service_id(id_priv->id.ps,
- (struct sockaddr *) &addr->dst_addr);
+ path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
- if (addr->src_addr.ss_family == AF_INET) {
+ switch (cma_family(id_priv)) {
+ case AF_INET:
path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
- } else {
- sin6 = (struct sockaddr_in6 *) &addr->src_addr;
+ break;
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+ break;
+ case AF_IB:
+ sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
+ path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
+ comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+ break;
}
id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
@@ -1670,7 +1751,7 @@ static void cma_work_handler(struct work_struct *_work)
goto out;
if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
out:
@@ -1688,12 +1769,12 @@ static void cma_ndev_work_handler(struct work_struct *_work)
int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == CMA_DESTROYING ||
- id_priv->state == CMA_DEVICE_REMOVAL)
+ if (id_priv->state == RDMA_CM_DESTROYING ||
+ id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -1717,8 +1798,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ROUTE_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
@@ -1747,7 +1828,8 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_RESOLVED))
return -EINVAL;
id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
@@ -1760,7 +1842,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
id->route.num_paths = num_paths;
return 0;
err:
- cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
return ret;
}
EXPORT_SYMBOL(rdma_set_ib_paths);
@@ -1775,26 +1857,41 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ROUTE_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
}
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+ int prio;
+ struct net_device *dev;
+
+ prio = rt_tos2priority(tos);
+ dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
+ vlan_dev_real_dev(ndev) : ndev;
+
+ if (dev->num_tc)
+ return netdev_get_prio_tc_map(dev, prio);
+
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ return (vlan_dev_get_egress_qos_mask(ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+#endif
+ return 0;
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
struct rdma_addr *addr = &route->addr;
struct cma_work *work;
int ret;
- struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
- struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
struct net_device *ndev = NULL;
- u16 vid;
- if (src_addr->sin_family != dst_addr->sin_family)
- return -EINVAL;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
@@ -1818,17 +1915,20 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- vid = rdma_vlan_dev_vlan_id(ndev);
+ route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
+ memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
- iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
- iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &route->path_rec->sgid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
+ &route->path_rec->dgid);
route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = id_priv->tos >> 5;
-
+ route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -1840,8 +1940,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ROUTE_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
work->event.status = 0;
@@ -1863,7 +1963,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
@@ -1892,34 +1992,63 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
return 0;
err:
- cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
+static void cma_set_loopback(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
+ 0, 0, 0, htonl(1));
+ break;
+ default:
+ ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
+ 0, 0, 0, htonl(1));
+ break;
+ }
+}
+
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
- struct cma_device *cma_dev;
+ struct cma_device *cma_dev, *cur_dev;
struct ib_port_attr port_attr;
union ib_gid gid;
u16 pkey;
int ret;
u8 p;
+ cma_dev = NULL;
mutex_lock(&lock);
- if (list_empty(&dev_list)) {
+ list_for_each_entry(cur_dev, &dev_list, list) {
+ if (cma_family(id_priv) == AF_IB &&
+ rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+ continue;
+
+ if (!cma_dev)
+ cma_dev = cur_dev;
+
+ for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ if (!ib_query_port(cur_dev->device, p, &port_attr) &&
+ port_attr.state == IB_PORT_ACTIVE) {
+ cma_dev = cur_dev;
+ goto port_found;
+ }
+ }
+ }
+
+ if (!cma_dev) {
ret = -ENODEV;
goto out;
}
- list_for_each_entry(cma_dev, &dev_list, list)
- for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
- if (!ib_query_port(cma_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE)
- goto port_found;
p = 1;
- cma_dev = list_entry(dev_list.next, struct cma_device, list);
port_found:
ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
@@ -1938,6 +2067,7 @@ port_found:
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
+ cma_set_loopback(cma_src_addr(id_priv));
out:
mutex_unlock(&lock);
return ret;
@@ -1951,34 +2081,25 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
-
- /*
- * Grab mutex to block rdma_destroy_id() from removing the device while
- * we're trying to acquire it.
- */
- mutex_lock(&lock);
- if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
- mutex_unlock(&lock);
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED))
goto out;
- }
+ memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev)
- status = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
+ status = cma_acquire_dev(id_priv, NULL);
if (status) {
- if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ADDR_BOUND))
goto out;
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = status;
- } else {
- memcpy(&id_priv->id.route.addr.src_addr, src_addr,
- ip_addr_size(src_addr));
+ } else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- }
if (id_priv->id.event_handler(&id_priv->id, &event)) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id);
@@ -1992,7 +2113,6 @@ out:
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
- struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@@ -2009,22 +2129,40 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
- if (cma_zero_addr(src)) {
- dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
- if ((src->sa_family = dst->sa_family) == AF_INET) {
- ((struct sockaddr_in *) src)->sin_addr.s_addr =
- ((struct sockaddr_in *) dst)->sin_addr.s_addr;
- } else {
- ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
- &((struct sockaddr_in6 *) dst)->sin6_addr);
- }
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ queue_work(cma_wq, &work->work);
+ return 0;
+err:
+ kfree(work);
+ return ret;
+}
+
+static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
+{
+ struct cma_work *work;
+ int ret;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ if (!id_priv->cma_dev) {
+ ret = cma_resolve_ib_dev(id_priv);
+ if (ret)
+ goto err;
}
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
+ &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
+
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
- work->old_state = CMA_ADDR_QUERY;
- work->new_state = CMA_ADDR_RESOLVED;
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
@@ -2038,9 +2176,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
{
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+ src_addr->sa_family = dst_addr->sa_family;
+ if (dst_addr->sa_family == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *) src_addr)->sib_pkey =
+ ((struct sockaddr_ib *) dst_addr)->sib_pkey;
}
}
return rdma_bind_addr(id, src_addr);
@@ -2053,41 +2195,107 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == CMA_IDLE) {
+ if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
if (ret)
return ret;
}
- if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+ if (cma_family(id_priv) != dst_addr->sa_family)
+ return -EINVAL;
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
- memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
- if (cma_any_addr(dst_addr))
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+ if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
- else
- ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
- dst_addr, &id->route.addr.dev_addr,
- timeout_ms, addr_handler, id_priv);
+ } else {
+ if (dst_addr->sa_family == AF_IB) {
+ ret = cma_resolve_ib_addr(id_priv);
+ } else {
+ ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
+ dst_addr, &id->route.addr.dev_addr,
+ timeout_ms, addr_handler, id_priv);
+ }
+ }
if (ret)
goto err;
return 0;
err:
- cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_addr);
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
+{
+ struct rdma_id_private *id_priv;
+ unsigned long flags;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ id_priv->reuseaddr = reuse;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_set_reuseaddr);
+
+int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
+{
+ struct rdma_id_private *id_priv;
+ unsigned long flags;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
+ id_priv->options |= (1 << CMA_OPTION_AFONLY);
+ id_priv->afonly = afonly;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_set_afonly);
+
static void cma_bind_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *id_priv)
{
- struct sockaddr_in *sin;
+ struct sockaddr *addr;
+ struct sockaddr_ib *sib;
+ u64 sid, mask;
+ __be16 port;
+
+ addr = cma_src_addr(id_priv);
+ port = htons(bind_list->port);
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- sin->sin_port = htons(bind_list->port);
+ switch (addr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *) addr)->sin_port = port;
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *) addr)->sin6_port = port;
+ break;
+ case AF_IB:
+ sib = (struct sockaddr_ib *) addr;
+ sid = be64_to_cpu(sib->sib_sid);
+ mask = be64_to_cpu(sib->sib_sid_mask);
+ sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
+ sib->sib_sid_mask = cpu_to_be64(~0ULL);
+ break;
+ }
id_priv->bind_list = bind_list;
hlist_add_head(&id_priv->node, &bind_list->owners);
}
@@ -2096,33 +2304,23 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
unsigned short snum)
{
struct rdma_bind_list *bind_list;
- int port, ret;
+ int ret;
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
- do {
- ret = idr_get_new_above(ps, bind_list, snum, &port);
- } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
-
- if (ret)
- goto err1;
-
- if (port != snum) {
- ret = -EADDRNOTAVAIL;
- goto err2;
- }
+ ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL);
+ if (ret < 0)
+ goto err;
bind_list->ps = ps;
- bind_list->port = (unsigned short) port;
+ bind_list->port = (unsigned short)ret;
cma_bind_port(bind_list, id_priv);
return 0;
-err2:
- idr_remove(ps, port);
-err1:
+err:
kfree(bind_list);
- return ret;
+ return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
@@ -2131,9 +2329,9 @@ static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
int low, high, remaining;
unsigned int rover;
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(&init_net, &low, &high);
remaining = (high - low) + 1;
- rover = net_random() % remaining + low;
+ rover = prandom_u32() % remaining + low;
retry:
if (last_used_port != rover &&
!idr_find(ps, (unsigned short) rover)) {
@@ -2156,67 +2354,135 @@ retry:
return -EADDRNOTAVAIL;
}
-static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+/*
+ * Check that the requested port is available. This is called when trying to
+ * bind to a specific port, or when trying to listen on a bound port. In
+ * the latter case, the provided id_priv may already be on the bind_list, but
+ * we still need to check that it's okay to start listening.
+ */
+static int cma_check_port(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv, uint8_t reuseaddr)
{
struct rdma_id_private *cur_id;
- struct sockaddr_in *sin, *cur_sin;
- struct rdma_bind_list *bind_list;
- struct hlist_node *node;
- unsigned short snum;
+ struct sockaddr *addr, *cur_addr;
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- snum = ntohs(sin->sin_port);
- if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
- return -EACCES;
+ addr = cma_src_addr(id_priv);
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ if (id_priv == cur_id)
+ continue;
- bind_list = idr_find(ps, snum);
- if (!bind_list)
- return cma_alloc_port(ps, id_priv, snum);
+ if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
+ cur_id->reuseaddr)
+ continue;
- /*
- * We don't support binding to any address if anyone is bound to
- * a specific address on the same port.
- */
- if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
- return -EADDRNOTAVAIL;
+ cur_addr = cma_src_addr(cur_id);
+ if (id_priv->afonly && cur_id->afonly &&
+ (addr->sa_family != cur_addr->sa_family))
+ continue;
- hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
- if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
+ if (cma_any_addr(addr) || cma_any_addr(cur_addr))
return -EADDRNOTAVAIL;
- cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
- if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+ if (!cma_addr_cmp(addr, cur_addr))
return -EADDRINUSE;
}
-
- cma_bind_port(bind_list, id_priv);
return 0;
}
-static int cma_get_port(struct rdma_id_private *id_priv)
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
{
- struct idr *ps;
+ struct rdma_bind_list *bind_list;
+ unsigned short snum;
int ret;
+ snum = ntohs(cma_port(cma_src_addr(id_priv)));
+ if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ bind_list = idr_find(ps, snum);
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, snum);
+ } else {
+ ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
+ return ret;
+}
+
+static int cma_bind_listen(struct rdma_id_private *id_priv)
+{
+ struct rdma_bind_list *bind_list = id_priv->bind_list;
+ int ret = 0;
+
+ mutex_lock(&lock);
+ if (bind_list->owners.first->next)
+ ret = cma_check_port(bind_list, id_priv, 0);
+ mutex_unlock(&lock);
+ return ret;
+}
+
+static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
+{
switch (id_priv->id.ps) {
- case RDMA_PS_SDP:
- ps = &sdp_ps;
- break;
case RDMA_PS_TCP:
- ps = &tcp_ps;
- break;
+ return &tcp_ps;
case RDMA_PS_UDP:
- ps = &udp_ps;
- break;
+ return &udp_ps;
case RDMA_PS_IPOIB:
- ps = &ipoib_ps;
- break;
+ return &ipoib_ps;
+ case RDMA_PS_IB:
+ return &ib_ps;
default:
- return -EPROTONOSUPPORT;
+ return NULL;
}
+}
+
+static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
+{
+ struct idr *ps = NULL;
+ struct sockaddr_ib *sib;
+ u64 sid_ps, mask, sid;
+
+ sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
+ mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
+ sid = be64_to_cpu(sib->sib_sid) & mask;
+
+ if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
+ sid_ps = RDMA_IB_IP_PS_IB;
+ ps = &ib_ps;
+ } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
+ (sid == (RDMA_IB_IP_PS_TCP & mask))) {
+ sid_ps = RDMA_IB_IP_PS_TCP;
+ ps = &tcp_ps;
+ } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
+ (sid == (RDMA_IB_IP_PS_UDP & mask))) {
+ sid_ps = RDMA_IB_IP_PS_UDP;
+ ps = &udp_ps;
+ }
+
+ if (ps) {
+ sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
+ sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
+ be64_to_cpu(sib->sib_sid_mask));
+ }
+ return ps;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+ struct idr *ps;
+ int ret;
+
+ if (cma_family(id_priv) != AF_IB)
+ ps = cma_select_inet_ps(id_priv);
+ else
+ ps = cma_select_ib_ps(id_priv);
+ if (!ps)
+ return -EPROTONOSUPPORT;
mutex_lock(&lock);
- if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
+ if (cma_any_port(cma_src_addr(id_priv)))
ret = cma_alloc_any_port(ps, id_priv);
else
ret = cma_use_port(ps, id_priv);
@@ -2228,15 +2494,18 @@ static int cma_get_port(struct rdma_id_private *id_priv)
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct sockaddr_in6 *sin6;
if (addr->sa_family != AF_INET6)
return 0;
sin6 = (struct sockaddr_in6 *) addr;
- if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
- !sin6->sin6_scope_id)
+
+ if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+ return 0;
+
+ if (!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
@@ -2244,108 +2513,132 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
return 0;
}
+int rdma_listen(struct rdma_cm_id *id, int backlog)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id_priv->state == RDMA_CM_IDLE) {
+ id->route.addr.src_addr.ss_family = AF_INET;
+ ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+ if (ret)
+ return ret;
+ }
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
+ return -EINVAL;
+
+ if (id_priv->reuseaddr) {
+ ret = cma_bind_listen(id_priv);
+ if (ret)
+ goto err;
+ }
+
+ id_priv->backlog = backlog;
+ if (id->device) {
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_ib_listen(id_priv);
+ if (ret)
+ goto err;
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ret = cma_iw_listen(id_priv, backlog);
+ if (ret)
+ goto err;
+ break;
+ default:
+ ret = -ENOSYS;
+ goto err;
+ }
+ } else
+ cma_listen_on_all(id_priv);
+
+ return 0;
+err:
+ id_priv->backlog = 0;
+ cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_listen);
+
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
- if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
+ addr->sa_family != AF_IB)
return -EAFNOSUPPORT;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+ if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;
ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
if (ret)
goto err1;
+ memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+ ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
- mutex_lock(&lock);
- ret = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
+ ret = cma_acquire_dev(id_priv, NULL);
if (ret)
goto err1;
}
- memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+ if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
+ if (addr->sa_family == AF_INET)
+ id_priv->afonly = 1;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (addr->sa_family == AF_INET6)
+ id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
+#endif
+ }
ret = cma_get_port(id_priv);
if (ret)
goto err2;
return 0;
err2:
- if (id_priv->cma_dev) {
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
- mutex_unlock(&lock);
- }
+ if (id_priv->cma_dev)
+ cma_release_dev(id_priv);
err1:
- cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
EXPORT_SYMBOL(rdma_bind_addr);
-static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
- struct rdma_route *route)
+static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
{
struct cma_hdr *cma_hdr;
- struct sdp_hh *sdp_hdr;
- if (route->addr.src_addr.ss_family == AF_INET) {
+ cma_hdr = hdr;
+ cma_hdr->cma_version = CMA_VERSION;
+ if (cma_family(id_priv) == AF_INET) {
struct sockaddr_in *src4, *dst4;
- src4 = (struct sockaddr_in *) &route->addr.src_addr;
- dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
-
- switch (ps) {
- case RDMA_PS_SDP:
- sdp_hdr = hdr;
- if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
- return -EINVAL;
- sdp_set_ip_ver(sdp_hdr, 4);
- sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
- sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
- sdp_hdr->port = src4->sin_port;
- break;
- default:
- cma_hdr = hdr;
- cma_hdr->cma_version = CMA_VERSION;
- cma_set_ip_ver(cma_hdr, 4);
- cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
- cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
- cma_hdr->port = src4->sin_port;
- break;
- }
- } else {
+ src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
+ dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
+
+ cma_set_ip_ver(cma_hdr, 4);
+ cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+ cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+ cma_hdr->port = src4->sin_port;
+ } else if (cma_family(id_priv) == AF_INET6) {
struct sockaddr_in6 *src6, *dst6;
- src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
- dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
-
- switch (ps) {
- case RDMA_PS_SDP:
- sdp_hdr = hdr;
- if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
- return -EINVAL;
- sdp_set_ip_ver(sdp_hdr, 6);
- sdp_hdr->src_addr.ip6 = src6->sin6_addr;
- sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
- sdp_hdr->port = src6->sin6_port;
- break;
- default:
- cma_hdr = hdr;
- cma_hdr->cma_version = CMA_VERSION;
- cma_set_ip_ver(cma_hdr, 6);
- cma_hdr->src_addr.ip6 = src6->sin6_addr;
- cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
- cma_hdr->port = src6->sin6_port;
- break;
- }
+ src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
+ dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
+
+ cma_set_ip_ver(cma_hdr, 6);
+ cma_hdr->src_addr.ip6 = src6->sin6_addr;
+ cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+ cma_hdr->port = src6->sin6_port;
}
return 0;
}
@@ -2358,7 +2651,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
int ret = 0;
- if (cma_disable_callback(id_priv, CMA_CONNECT))
+ if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
memset(&event, 0, sizeof event);
@@ -2375,15 +2668,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = ib_event->param.sidr_rep_rcvd.status;
break;
}
- ret = cma_set_qkey(id_priv);
+ ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
event.event = RDMA_CM_EVENT_ADDR_ERROR;
- event.status = -EINVAL;
- break;
- }
- if (id_priv->qkey != rep->qkey) {
- event.event = RDMA_CM_EVENT_UNREACHABLE;
- event.status = -EINVAL;
+ event.status = ret;
break;
}
ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
@@ -2404,7 +2692,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
@@ -2418,34 +2706,45 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_cm_sidr_req_param req;
- struct rdma_route *route;
- int ret;
+ struct ib_cm_id *id;
+ void *private_data;
+ int offset, ret;
- req.private_data_len = sizeof(struct cma_hdr) +
- conn_param->private_data_len;
- req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
- if (!req.private_data)
- return -ENOMEM;
+ memset(&req, 0, sizeof req);
+ offset = cma_user_data_offset(id_priv);
+ req.private_data_len = offset + conn_param->private_data_len;
+ if (req.private_data_len < conn_param->private_data_len)
+ return -EINVAL;
+
+ if (req.private_data_len) {
+ private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+ if (!private_data)
+ return -ENOMEM;
+ } else {
+ private_data = NULL;
+ }
if (conn_param->private_data && conn_param->private_data_len)
- memcpy((void *) req.private_data + sizeof(struct cma_hdr),
- conn_param->private_data, conn_param->private_data_len);
+ memcpy(private_data + offset, conn_param->private_data,
+ conn_param->private_data_len);
- route = &id_priv->id.route;
- ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
- if (ret)
- goto out;
+ if (private_data) {
+ ret = cma_format_hdr(private_data, id_priv);
+ if (ret)
+ goto out;
+ req.private_data = private_data;
+ }
- id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
- cma_sidr_rep_handler, id_priv);
- if (IS_ERR(id_priv->cm_id.ib)) {
- ret = PTR_ERR(id_priv->cm_id.ib);
+ id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
+ id_priv);
+ if (IS_ERR(id)) {
+ ret = PTR_ERR(id);
goto out;
}
+ id_priv->cm_id.ib = id;
- req.path = route->path_rec;
- req.service_id = cma_get_service_id(id_priv->id.ps,
- (struct sockaddr *) &route->addr.dst_addr);
+ req.path = id_priv->id.route.path_rec;
+ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -2455,7 +2754,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
id_priv->cm_id.ib = NULL;
}
out:
- kfree(req.private_data);
+ kfree(private_data);
return ret;
}
@@ -2465,46 +2764,55 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
struct ib_cm_req_param req;
struct rdma_route *route;
void *private_data;
+ struct ib_cm_id *id;
int offset, ret;
memset(&req, 0, sizeof req);
- offset = cma_user_data_offset(id_priv->id.ps);
+ offset = cma_user_data_offset(id_priv);
req.private_data_len = offset + conn_param->private_data_len;
- private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
- if (!private_data)
- return -ENOMEM;
+ if (req.private_data_len < conn_param->private_data_len)
+ return -EINVAL;
+
+ if (req.private_data_len) {
+ private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+ if (!private_data)
+ return -ENOMEM;
+ } else {
+ private_data = NULL;
+ }
if (conn_param->private_data && conn_param->private_data_len)
memcpy(private_data + offset, conn_param->private_data,
conn_param->private_data_len);
- id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
- id_priv);
- if (IS_ERR(id_priv->cm_id.ib)) {
- ret = PTR_ERR(id_priv->cm_id.ib);
+ id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
+ if (IS_ERR(id)) {
+ ret = PTR_ERR(id);
goto out;
}
+ id_priv->cm_id.ib = id;
route = &id_priv->id.route;
- ret = cma_format_hdr(private_data, id_priv->id.ps, route);
- if (ret)
- goto out;
- req.private_data = private_data;
+ if (private_data) {
+ ret = cma_format_hdr(private_data, id_priv);
+ if (ret)
+ goto out;
+ req.private_data = private_data;
+ }
req.primary_path = &route->path_rec[0];
if (route->num_paths == 2)
req.alternate_path = &route->path_rec[1];
- req.service_id = cma_get_service_id(id_priv->id.ps,
- (struct sockaddr *) &route->addr.dst_addr);
+ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.qp_num = id_priv->qp_num;
- req.qp_type = IB_QPT_RC;
+ req.qp_type = id_priv->id.qp_type;
req.starting_psn = id_priv->seq_num;
req.responder_resources = conn_param->responder_resources;
req.initiator_depth = conn_param->initiator_depth;
req.flow_control = conn_param->flow_control;
- req.retry_count = conn_param->retry_count;
- req.rnr_retry_count = conn_param->rnr_retry_count;
+ req.retry_count = min_t(u8, 7, conn_param->retry_count);
+ req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -2512,8 +2820,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
- if (ret && !IS_ERR(id_priv->cm_id.ib)) {
- ib_destroy_cm_id(id_priv->cm_id.ib);
+ if (ret && !IS_ERR(id)) {
+ ib_destroy_cm_id(id);
id_priv->cm_id.ib = NULL;
}
@@ -2525,39 +2833,37 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct iw_cm_id *cm_id;
- struct sockaddr_in* sin;
int ret;
struct iw_cm_conn_param iw_param;
cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
- if (IS_ERR(cm_id)) {
- ret = PTR_ERR(cm_id);
- goto out;
- }
+ if (IS_ERR(cm_id))
+ return PTR_ERR(cm_id);
id_priv->cm_id.iw = cm_id;
- sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
- cm_id->local_addr = *sin;
-
- sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
- cm_id->remote_addr = *sin;
+ memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
+ memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
+ rdma_addr_size(cma_dst_addr(id_priv)));
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
goto out;
- iw_param.ord = conn_param->initiator_depth;
- iw_param.ird = conn_param->responder_resources;
- iw_param.private_data = conn_param->private_data;
- iw_param.private_data_len = conn_param->private_data_len;
- if (id_priv->id.qp)
+ if (conn_param) {
+ iw_param.ord = conn_param->initiator_depth;
+ iw_param.ird = conn_param->responder_resources;
+ iw_param.private_data = conn_param->private_data;
+ iw_param.private_data_len = conn_param->private_data_len;
+ iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
+ } else {
+ memset(&iw_param, 0, sizeof iw_param);
iw_param.qpn = id_priv->qp_num;
- else
- iw_param.qpn = conn_param->qp_num;
+ }
ret = iw_cm_connect(cm_id, &iw_param);
out:
- if (ret && !IS_ERR(cm_id)) {
+ if (ret) {
iw_destroy_cm_id(cm_id);
id_priv->cm_id.iw = NULL;
}
@@ -2570,7 +2876,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
if (!id->qp) {
@@ -2580,7 +2886,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (cma_is_ud_ps(id->ps))
+ if (id->qp_type == IB_QPT_UD)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
@@ -2597,7 +2903,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
return 0;
err:
- cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
EXPORT_SYMBOL(rdma_connect);
@@ -2625,7 +2931,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.initiator_depth = conn_param->initiator_depth;
rep.failover_accepted = 0;
rep.flow_control = conn_param->flow_control;
- rep.rnr_retry_count = conn_param->rnr_retry_count;
+ rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
@@ -2656,7 +2962,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
}
static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
- enum ib_cm_sidr_status status,
+ enum ib_cm_sidr_status status, u32 qkey,
const void *private_data, int private_data_len)
{
struct ib_cm_sidr_rep_param rep;
@@ -2665,7 +2971,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
- ret = cma_set_qkey(id_priv);
+ ret = cma_set_qkey(id_priv, qkey);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
@@ -2683,7 +2989,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_CONNECT))
+
+ id_priv->owner = task_pid_nr(current);
+
+ if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
if (!id->qp && conn_param) {
@@ -2693,14 +3002,21 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (cma_is_ud_ps(id->ps))
- ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
- conn_param->private_data,
- conn_param->private_data_len);
- else if (conn_param)
- ret = cma_accept_ib(id_priv, conn_param);
- else
- ret = cma_rep_recv(id_priv);
+ if (id->qp_type == IB_QPT_UD) {
+ if (conn_param)
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+ conn_param->qkey,
+ conn_param->private_data,
+ conn_param->private_data_len);
+ else
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+ 0, NULL, 0);
+ } else {
+ if (conn_param)
+ ret = cma_accept_ib(id_priv, conn_param);
+ else
+ ret = cma_rep_recv(id_priv);
+ }
break;
case RDMA_TRANSPORT_IWARP:
ret = cma_accept_iw(id_priv, conn_param);
@@ -2727,7 +3043,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_has_cm_dev(id_priv))
+ if (!id_priv->cm_id.ib)
return -EINVAL;
switch (id->device->node_type) {
@@ -2749,13 +3065,13 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_has_cm_dev(id_priv))
+ if (!id_priv->cm_id.ib)
return -EINVAL;
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (cma_is_ud_ps(id->ps))
- ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
+ if (id->qp_type == IB_QPT_UD)
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
else
ret = ib_send_cm_rej(id_priv->cm_id.ib,
@@ -2780,7 +3096,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_has_cm_dev(id_priv))
+ if (!id_priv->cm_id.ib)
return -EINVAL;
switch (rdma_node_get_transport(id->device->node_type)) {
@@ -2812,14 +3128,16 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
int ret;
id_priv = mc->id_priv;
- if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
- cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
+ if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
+ cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
return 0;
+ if (!status)
+ status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- multicast->rec.mlid);
+ be16_to_cpu(multicast->rec.mlid));
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
@@ -2837,7 +3155,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return 0;
@@ -2862,6 +3180,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else if (addr->sa_family == AF_IB) {
+ memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6)) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2889,9 +3209,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (ret)
return ret;
+ ret = cma_set_qkey(id_priv, 0);
+ if (ret)
+ return ret;
+
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
- if (id_priv->id.ps == RDMA_PS_UDP)
- rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ rec.qkey = cpu_to_be32(id_priv->qkey);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@@ -2904,16 +3227,16 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (id_priv->id.ps == RDMA_PS_IPOIB)
comp_mask |= IB_SA_MCMEMBER_REC_RATE |
- IB_SA_MCMEMBER_REC_RATE_SELECTOR;
+ IB_SA_MCMEMBER_REC_RATE_SELECTOR |
+ IB_SA_MCMEMBER_REC_MTU_SELECTOR |
+ IB_SA_MCMEMBER_REC_MTU |
+ IB_SA_MCMEMBER_REC_HOP_LIMIT;
mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
id_priv->id.port_num, &rec,
comp_mask, GFP_KERNEL,
cma_ib_mc_handler, mc);
- if (IS_ERR(mc->multicast.ib))
- return PTR_ERR(mc->multicast.ib);
-
- return 0;
+ return PTR_ERR_OR_ZERO(mc->multicast.ib);
}
static void iboe_mcast_work_handler(struct work_struct *work)
@@ -2996,7 +3319,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -EINVAL;
goto out2;
}
- iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &mc->multicast.ib->rec.port_gid);
work->id = id_priv;
work->mc = mc;
INIT_WORK(&work->work, iboe_mcast_work_handler);
@@ -3020,15 +3344,15 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
- !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
+ !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
mc = kmalloc(sizeof *mc, GFP_KERNEL);
if (!mc)
return -ENOMEM;
- memcpy(&mc->addr, addr, ip_addr_size(addr));
+ memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
@@ -3073,14 +3397,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
list_del(&mc->list);
spin_unlock_irq(&id_priv->lock);
if (id->qp)
ib_detach_mcast(id->qp,
&mc->multicast.ib->rec.mgid,
- mc->multicast.ib->rec.mlid);
+ be16_to_cpu(mc->multicast.ib->rec.mlid));
if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
switch (rdma_port_get_link_layer(id->device, id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
@@ -3127,9 +3451,9 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
}
static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
- void *ctx)
+ void *ptr)
{
- struct net_device *ndev = (struct net_device *)ctx;
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
int ret = NOTIFY_DONE;
@@ -3186,19 +3510,19 @@ static void cma_add_one(struct ib_device *device)
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event;
- enum cma_state state;
+ enum rdma_cm_state state;
int ret = 0;
/* Record that we want to remove the device */
- state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
- if (state == CMA_DESTROYING)
+ state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
+ if (state == RDMA_CM_DESTROYING)
return 0;
cma_cancel_operation(id_priv, state);
mutex_lock(&id_priv->handler_mutex);
/* Check for destruction from another callback. */
- if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+ if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
goto out;
memset(&event, 0, sizeof event);
@@ -3253,6 +3577,85 @@ static void cma_remove_one(struct ib_device *device)
kfree(cma_dev);
}
+static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlmsghdr *nlh;
+ struct rdma_cm_id_stats *id_stats;
+ struct rdma_id_private *id_priv;
+ struct rdma_cm_id *id = NULL;
+ struct cma_device *cma_dev;
+ int i_dev = 0, i_id = 0;
+
+ /*
+ * We export all of the IDs as a sequence of messages. Each
+ * ID gets its own netlink message.
+ */
+ mutex_lock(&lock);
+
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ if (i_dev < cb->args[0]) {
+ i_dev++;
+ continue;
+ }
+
+ i_id = 0;
+ list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ if (i_id < cb->args[1]) {
+ i_id++;
+ continue;
+ }
+
+ id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
+ sizeof *id_stats, RDMA_NL_RDMA_CM,
+ RDMA_NL_RDMA_CM_ID_STATS,
+ NLM_F_MULTI);
+ if (!id_stats)
+ goto out;
+
+ memset(id_stats, 0, sizeof *id_stats);
+ id = &id_priv->id;
+ id_stats->node_type = id->route.addr.dev_addr.dev_type;
+ id_stats->port_num = id->port_num;
+ id_stats->bound_dev_if =
+ id->route.addr.dev_addr.bound_dev_if;
+
+ if (ibnl_put_attr(skb, nlh,
+ rdma_addr_size(cma_src_addr(id_priv)),
+ cma_src_addr(id_priv),
+ RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
+ goto out;
+ if (ibnl_put_attr(skb, nlh,
+ rdma_addr_size(cma_src_addr(id_priv)),
+ cma_dst_addr(id_priv),
+ RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
+ goto out;
+
+ id_stats->pid = id_priv->owner;
+ id_stats->port_space = id->ps;
+ id_stats->cm_state = id_priv->state;
+ id_stats->qp_num = id_priv->qp_num;
+ id_stats->qp_type = id->qp_type;
+
+ i_id++;
+ }
+
+ cb->args[1] = 0;
+ i_dev++;
+ }
+
+out:
+ mutex_unlock(&lock);
+ cb->args[0] = i_dev;
+ cb->args[1] = i_id;
+
+ return skb->len;
+}
+
+static const struct ibnl_client_cbs cma_cb_table[] = {
+ [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats,
+ .module = THIS_MODULE },
+};
+
static int __init cma_init(void)
{
int ret;
@@ -3268,6 +3671,10 @@ static int __init cma_init(void)
ret = ib_register_client(&cma_client);
if (ret)
goto err;
+
+ if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
+ printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+
return 0;
err:
@@ -3280,15 +3687,16 @@ err:
static void __exit cma_cleanup(void)
{
+ ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
destroy_workqueue(cma_wq);
- idr_destroy(&sdp_ps);
idr_destroy(&tcp_ps);
idr_destroy(&udp_ps);
idr_destroy(&ipoib_ps);
+ idr_destroy(&ib_ps);
}
module_init(cma_init);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index a565af5c2d2..87d1936f5c1 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void);
int ib_cache_setup(void);
void ib_cache_cleanup(void);
+int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a19effad081..18c1ece765f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -38,7 +38,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mutex.h>
-#include <linux/workqueue.h>
+#include <rdma/rdma_netlink.h>
#include "core_priv.h"
@@ -52,6 +52,9 @@ struct ib_client_data {
void * data;
};
+struct workqueue_struct *ib_wq;
+EXPORT_SYMBOL_GPL(ib_wq);
+
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
@@ -624,6 +627,9 @@ int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
+ if (!device->modify_device)
+ return -ENOSYS;
+
return device->modify_device(device, device_modify_mask,
device_modify);
}
@@ -644,6 +650,9 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
+ if (!device->modify_port)
+ return -ENOSYS;
+
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
@@ -698,18 +707,28 @@ int ib_find_pkey(struct ib_device *device,
{
int ret, i;
u16 tmp_pkey;
+ int partial_ix = -1;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
-
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
- *index = i;
- return 0;
+ /* if there is full-member pkey take it.*/
+ if (tmp_pkey & 0x8000) {
+ *index = i;
+ return 0;
+ }
+ if (partial_ix < 0)
+ partial_ix = i;
}
}
+ /*no full-member, if exists take the limited*/
+ if (partial_ix >= 0) {
+ *index = partial_ix;
+ return 0;
+ }
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
@@ -718,25 +737,48 @@ static int __init ib_core_init(void)
{
int ret;
+ ib_wq = alloc_workqueue("infiniband", 0, 0);
+ if (!ib_wq)
+ return -ENOMEM;
+
ret = ib_sysfs_setup();
- if (ret)
+ if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+ goto err;
+ }
+
+ ret = ibnl_init();
+ if (ret) {
+ printk(KERN_WARNING "Couldn't init IB netlink interface\n");
+ goto err_sysfs;
+ }
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
- ib_sysfs_cleanup();
+ goto err_nl;
}
+ return 0;
+
+err_nl:
+ ibnl_cleanup();
+
+err_sysfs:
+ ib_sysfs_cleanup();
+
+err:
+ destroy_workqueue(ib_wq);
return ret;
}
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
+ ibnl_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
- flush_scheduled_work();
+ destroy_workqueue(ib_wq);
}
module_init(ib_core_init);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 4507043d24c..9f5ad7cc33c 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/spinlock.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <linux/kthread.h>
@@ -117,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
{
struct hlist_head *bucket;
struct ib_pool_fmr *fmr;
- struct hlist_node *pos;
if (!pool->cache_bucket)
return NULL;
bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
- hlist_for_each_entry(fmr, pos, bucket, cache_node)
+ hlist_for_each_entry(fmr, bucket, cache_node)
if (io_virtual_address == fmr->io_virtual_address &&
page_list_len == fmr->page_list_len &&
!memcmp(page_list, fmr->page_list,
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 2a1e9ae134b..3d2e489ab73 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -45,6 +45,7 @@
#include <linux/workqueue.h>
#include <linux/completion.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
@@ -180,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id)
static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
+ int cb_destroy;
+
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- if (iwcm_deref_id(cm_id_priv) &&
- test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+
+ /*
+ * Test bit before deref in case the cm_id gets freed on another
+ * thread.
+ */
+ cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+ if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
}
@@ -326,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
- int ret;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
/*
@@ -342,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* destroy the listening endpoint */
- ret = cm_id->device->iwcm->destroy_listen(cm_id);
+ cm_id->device->iwcm->destroy_listen(cm_id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:
@@ -623,17 +630,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
*/
BUG_ON(iw_event->status);
- /*
- * We could be destroying the listening id. If so, ignore this
- * upcall.
- */
- spin_lock_irqsave(&listen_id_priv->lock, flags);
- if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
- spin_unlock_irqrestore(&listen_id_priv->lock, flags);
- goto out;
- }
- spin_unlock_irqrestore(&listen_id_priv->lock, flags);
-
cm_id = iw_create_cm_id(listen_id_priv->id.device,
listen_id_priv->id.cm_handler,
listen_id_priv->id.context);
@@ -648,6 +644,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+ /*
+ * We could be destroying the listening id. If so, ignore this
+ * upcall.
+ */
+ spin_lock_irqsave(&listen_id_priv->lock, flags);
+ if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+ spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+ iw_cm_reject(cm_id, NULL, 0);
+ iw_destroy_cm_id(cm_id);
+ goto out;
+ }
+ spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
ret = alloc_work_entries(cm_id_priv, 3);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
@@ -725,7 +734,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
*/
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
- if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+ if (iw_event->status == 0) {
cm_id_priv->id.local_addr = iw_event->local_addr;
cm_id_priv->id.remote_addr = iw_event->remote_addr;
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
@@ -875,6 +884,8 @@ static void cm_work_handler(struct work_struct *_work)
}
return;
}
+ if (empty)
+ return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
new file mode 100644
index 00000000000..b85ddbc979e
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
+static int iwpm_ulib_version = 3;
+static int iwpm_user_pid = IWPM_PID_UNDEFINED;
+static atomic_t echo_nlmsg_seq;
+
+int iwpm_valid_pid(void)
+{
+ return iwpm_user_pid > 0;
+}
+EXPORT_SYMBOL(iwpm_valid_pid);
+
+/*
+ * iwpm_register_pid - Send a netlink query to user space
+ * for the iwarp port mapper pid
+ *
+ * nlmsg attributes:
+ * [IWPM_NLA_REG_PID_SEQ]
+ * [IWPM_NLA_REG_IF_NAME]
+ * [IWPM_NLA_REG_IBDEV_NAME]
+ * [IWPM_NLA_REG_ULIB_NAME]
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto pid_query_error;
+ }
+ if (iwpm_registered_client(nl_client))
+ return 0;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto pid_query_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+ if (!nlmsg_request) {
+ err_str = "Unable to allocate netlink request";
+ goto pid_query_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+
+ /* fill in the pid request message */
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
+ if (ret)
+ goto pid_query_error;
+ ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
+ pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
+ if (ret)
+ goto pid_query_error;
+ ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
+ pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME);
+ if (ret)
+ goto pid_query_error;
+ ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE,
+ (char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME);
+ if (ret)
+ goto pid_query_error;
+
+ pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
+ __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
+
+ ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ iwpm_set_registered(nl_client, 1);
+ iwpm_user_pid = IWPM_PID_UNAVAILABLE;
+ err_str = "Unable to send a nlmsg";
+ goto pid_query_error;
+ }
+ nlmsg_request->req_buffer = pm_msg;
+ ret = iwpm_wait_complete_req(nlmsg_request);
+ return ret;
+pid_query_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb(skb);
+ if (nlmsg_request)
+ iwpm_free_nlmsg_request(&nlmsg_request->kref);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_register_pid);
+
+/*
+ * iwpm_add_mapping - Send a netlink add mapping message
+ * to the port mapper
+ * nlmsg attributes:
+ * [IWPM_NLA_MANAGE_MAPPING_SEQ]
+ * [IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto add_mapping_error;
+ }
+ if (!iwpm_registered_client(nl_client)) {
+ err_str = "Unregistered port mapper client";
+ goto add_mapping_error;
+ }
+ if (!iwpm_valid_pid())
+ return 0;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto add_mapping_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+ if (!nlmsg_request) {
+ err_str = "Unable to allocate netlink request";
+ goto add_mapping_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+ /* fill in the add mapping message */
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+ IWPM_NLA_MANAGE_MAPPING_SEQ);
+ if (ret)
+ goto add_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ &pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
+ if (ret)
+ goto add_mapping_error;
+ nlmsg_request->req_buffer = pm_msg;
+
+ ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ iwpm_user_pid = IWPM_PID_UNDEFINED;
+ err_str = "Unable to send a nlmsg";
+ goto add_mapping_error;
+ }
+ ret = iwpm_wait_complete_req(nlmsg_request);
+ return ret;
+add_mapping_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb(skb);
+ if (nlmsg_request)
+ iwpm_free_nlmsg_request(&nlmsg_request->kref);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_add_mapping);
+
+/*
+ * iwpm_add_and_query_mapping - Send a netlink add and query
+ * mapping message to the port mapper
+ * nlmsg attributes:
+ * [IWPM_NLA_QUERY_MAPPING_SEQ]
+ * [IWPM_NLA_QUERY_LOCAL_ADDR]
+ * [IWPM_NLA_QUERY_REMOTE_ADDR]
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto query_mapping_error;
+ }
+ if (!iwpm_registered_client(nl_client)) {
+ err_str = "Unregistered port mapper client";
+ goto query_mapping_error;
+ }
+ if (!iwpm_valid_pid())
+ return 0;
+ ret = -ENOMEM;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto query_mapping_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq,
+ nl_client, GFP_KERNEL);
+ if (!nlmsg_request) {
+ err_str = "Unable to allocate netlink request";
+ goto query_mapping_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+
+ /* fill in the query message */
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+ IWPM_NLA_QUERY_MAPPING_SEQ);
+ if (ret)
+ goto query_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ &pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR);
+ if (ret)
+ goto query_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ &pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
+ if (ret)
+ goto query_mapping_error;
+ nlmsg_request->req_buffer = pm_msg;
+
+ ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ err_str = "Unable to send a nlmsg";
+ goto query_mapping_error;
+ }
+ ret = iwpm_wait_complete_req(nlmsg_request);
+ return ret;
+query_mapping_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb(skb);
+ if (nlmsg_request)
+ iwpm_free_nlmsg_request(&nlmsg_request->kref);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping);
+
+/*
+ * iwpm_remove_mapping - Send a netlink remove mapping message
+ * to the port mapper
+ * nlmsg attributes:
+ * [IWPM_NLA_MANAGE_MAPPING_SEQ]
+ * [IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto remove_mapping_error;
+ }
+ if (!iwpm_registered_client(nl_client)) {
+ err_str = "Unregistered port mapper client";
+ goto remove_mapping_error;
+ }
+ if (!iwpm_valid_pid())
+ return 0;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client);
+ if (!skb) {
+ ret = -ENOMEM;
+ err_str = "Unable to create a nlmsg";
+ goto remove_mapping_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+ IWPM_NLA_MANAGE_MAPPING_SEQ);
+ if (ret)
+ goto remove_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ local_addr, IWPM_NLA_MANAGE_ADDR);
+ if (ret)
+ goto remove_mapping_error;
+
+ ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ iwpm_user_pid = IWPM_PID_UNDEFINED;
+ err_str = "Unable to send a nlmsg";
+ goto remove_mapping_error;
+ }
+ iwpm_print_sockaddr(local_addr,
+ "remove_mapping: Local sockaddr:");
+ return 0;
+remove_mapping_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapping);
+
+/* netlink attribute policy for the received response to register pid request */
+static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
+ [IWPM_NLA_RREG_PID_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_RREG_IBDEV_NAME] = { .type = NLA_STRING,
+ .len = IWPM_DEVNAME_SIZE - 1 },
+ [IWPM_NLA_RREG_ULIB_NAME] = { .type = NLA_STRING,
+ .len = IWPM_ULIBNAME_SIZE - 1 },
+ [IWPM_NLA_RREG_ULIB_VER] = { .type = NLA_U16 },
+ [IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_register_pid_cb - Process a port mapper response to
+ * iwpm_register_pid()
+ */
+int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX];
+ struct iwpm_dev_data *pm_msg;
+ char *dev_name, *iwpm_name;
+ u32 msg_seq;
+ u8 nl_client;
+ u16 iwpm_version;
+ const char *msg_type = "Register Pid response";
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX,
+ resp_reg_policy, nltb, msg_type))
+ return -EINVAL;
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]);
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ pr_info("%s: Could not find a matching request (seq = %u)\n",
+ __func__, msg_seq);
+ return -EINVAL;
+ }
+ pm_msg = nlmsg_request->req_buffer;
+ nl_client = nlmsg_request->nl_client;
+ dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]);
+ iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]);
+ iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]);
+
+ /* check device name, ulib name and version */
+ if (strcmp(pm_msg->dev_name, dev_name) ||
+ strcmp(iwpm_ulib_name, iwpm_name) ||
+ iwpm_version != iwpm_ulib_version) {
+
+ pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+ __func__, dev_name, iwpm_name, iwpm_version);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto register_pid_response_exit;
+ }
+ iwpm_user_pid = cb->nlh->nlmsg_pid;
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+ __func__, iwpm_user_pid);
+ if (iwpm_valid_client(nl_client))
+ iwpm_set_registered(nl_client, 1);
+register_pid_response_exit:
+ nlmsg_request->request_done = 1;
+ /* always for found nlmsg_request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_register_pid_cb);
+
+/* netlink attribute policy for the received response to add mapping request */
+static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
+ [IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_mapping_cb - Process a port mapper response to
+ * iwpm_add_mapping()
+ */
+int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct iwpm_sa_data *pm_msg;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX];
+ struct sockaddr_storage *local_sockaddr;
+ struct sockaddr_storage *mapped_sockaddr;
+ const char *msg_type;
+ u32 msg_seq;
+
+ msg_type = "Add Mapping response";
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX,
+ resp_add_policy, nltb, msg_type))
+ return -EINVAL;
+
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ pr_info("%s: Could not find a matching request (seq = %u)\n",
+ __func__, msg_seq);
+ return -EINVAL;
+ }
+ pm_msg = nlmsg_request->req_buffer;
+ local_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+ mapped_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+
+ if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto add_mapping_response_exit;
+ }
+ if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) {
+ pr_info("%s: Sockaddr family doesn't match the requested one\n",
+ __func__);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto add_mapping_response_exit;
+ }
+ memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr,
+ sizeof(*mapped_sockaddr));
+ iwpm_print_sockaddr(&pm_msg->loc_addr,
+ "add_mapping: Local sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+ "add_mapping: Mapped local sockaddr:");
+
+add_mapping_response_exit:
+ nlmsg_request->request_done = 1;
+ /* always for found request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_add_mapping_cb);
+
+/* netlink attribute policy for the response to add and query mapping request */
+static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
+ [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_and_query_mapping_cb - Process a port mapper response to
+ * iwpm_add_and_query_mapping()
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct iwpm_sa_data *pm_msg;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+ struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+ struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+ const char *msg_type;
+ u32 msg_seq;
+ u16 err_code;
+
+ msg_type = "Query Mapping response";
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+ resp_query_policy, nltb, msg_type))
+ return -EINVAL;
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ pr_info("%s: Could not find a matching request (seq = %u)\n",
+ __func__, msg_seq);
+ return -EINVAL;
+ }
+ pm_msg = nlmsg_request->req_buffer;
+ local_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+ remote_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+ mapped_loc_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+ mapped_rem_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+ err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]);
+ if (err_code == IWPM_REMOTE_QUERY_REJECT) {
+ pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n",
+ __func__, cb->nlh->nlmsg_pid, msg_seq);
+ nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT;
+ }
+ if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) ||
+ iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) {
+ pr_info("%s: Incorrect local sockaddr\n", __func__);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto query_mapping_response_exit;
+ }
+ if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+ mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+ pr_info("%s: Sockaddr family doesn't match the requested one\n",
+ __func__);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto query_mapping_response_exit;
+ }
+ memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr,
+ sizeof(*mapped_loc_sockaddr));
+ memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr,
+ sizeof(*mapped_rem_sockaddr));
+
+ iwpm_print_sockaddr(&pm_msg->loc_addr,
+ "query_mapping: Local sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+ "query_mapping: Mapped local sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->rem_addr,
+ "query_mapping: Remote sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->mapped_rem_addr,
+ "query_mapping: Mapped remote sockaddr:");
+query_mapping_response_exit:
+ nlmsg_request->request_done = 1;
+ /* always for found request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
+
+/* netlink attribute policy for the received request for mapping info */
+static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
+ [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
+ .len = IWPM_ULIBNAME_SIZE - 1 },
+ [IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+ */
+int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX];
+ const char *msg_type = "Mapping Info response";
+ int iwpm_pid;
+ u8 nl_client;
+ char *iwpm_name;
+ u16 iwpm_version;
+ int ret = -EINVAL;
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX,
+ resp_mapinfo_policy, nltb, msg_type)) {
+ pr_info("%s: Unable to parse nlmsg\n", __func__);
+ return ret;
+ }
+ iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
+ iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
+ if (strcmp(iwpm_ulib_name, iwpm_name) ||
+ iwpm_version != iwpm_ulib_version) {
+ pr_info("%s: Invalid port mapper name = %s version = %d\n",
+ __func__, iwpm_name, iwpm_version);
+ return ret;
+ }
+ nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+ if (!iwpm_valid_client(nl_client)) {
+ pr_info("%s: Invalid port mapper client = %d\n",
+ __func__, nl_client);
+ return ret;
+ }
+ iwpm_set_registered(nl_client, 0);
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ if (!iwpm_mapinfo_available())
+ return 0;
+ iwpm_pid = cb->nlh->nlmsg_pid;
+ pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+ __func__, iwpm_pid);
+ ret = iwpm_send_mapinfo(nl_client, iwpm_pid);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_mapping_info_cb);
+
+/* netlink attribute policy for the received mapping info ack */
+static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
+ [IWPM_NLA_MAPINFO_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 },
+ [IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 }
+};
+
+/*
+ * iwpm_ack_mapping_info_cb - Process a port mapper ack for
+ * the provided mapping info records
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX];
+ u32 mapinfo_send, mapinfo_ack;
+ const char *msg_type = "Mapping Info Ack";
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX,
+ ack_mapinfo_policy, nltb, msg_type))
+ return -EINVAL;
+ mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]);
+ mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]);
+ if (mapinfo_ack != mapinfo_send)
+ pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n",
+ __func__, mapinfo_send, mapinfo_ack);
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
+
+/* netlink attribute policy for the received port mapper error message */
+static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
+ [IWPM_NLA_ERR_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_ERR_CODE] = { .type = NLA_U16 },
+};
+
+/*
+ * iwpm_mapping_error_cb - Process a port mapper error message
+ */
+int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+ struct nlattr *nltb[IWPM_NLA_ERR_MAX];
+ u32 msg_seq;
+ u16 err_code;
+ const char *msg_type = "Mapping Error Msg";
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX,
+ map_error_policy, nltb, msg_type))
+ return -EINVAL;
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]);
+ err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]);
+ pr_info("%s: Received msg seq = %u err code = %u client = %d\n",
+ __func__, msg_seq, err_code, nl_client);
+ /* look for nlmsg_request */
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ /* not all errors have associated requests */
+ pr_debug("Could not find matching req (seq = %u)\n", msg_seq);
+ return 0;
+ }
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ nlmsg_request->err_code = err_code;
+ nlmsg_request->request_done = 1;
+ /* always for found request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_mapping_error_cb);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
new file mode 100644
index 00000000000..69e9f84c160
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+#define IWPM_HASH_BUCKET_SIZE 512
+#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1)
+
+static LIST_HEAD(iwpm_nlmsg_req_list);
+static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
+
+static struct hlist_head *iwpm_hash_bucket;
+static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
+
+static DEFINE_MUTEX(iwpm_admin_lock);
+static struct iwpm_admin_data iwpm_admin;
+
+int iwpm_init(u8 nl_client)
+{
+ if (iwpm_valid_client(nl_client))
+ return -EINVAL;
+ mutex_lock(&iwpm_admin_lock);
+ if (atomic_read(&iwpm_admin.refcount) == 0) {
+ iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_hash_bucket) {
+ mutex_unlock(&iwpm_admin_lock);
+ pr_err("%s Unable to create mapinfo hash table\n", __func__);
+ return -ENOMEM;
+ }
+ }
+ atomic_inc(&iwpm_admin.refcount);
+ mutex_unlock(&iwpm_admin_lock);
+ iwpm_set_valid(nl_client, 1);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_init);
+
+static void free_hash_bucket(void);
+
+int iwpm_exit(u8 nl_client)
+{
+
+ if (!iwpm_valid_client(nl_client))
+ return -EINVAL;
+ mutex_lock(&iwpm_admin_lock);
+ if (atomic_read(&iwpm_admin.refcount) == 0) {
+ mutex_unlock(&iwpm_admin_lock);
+ pr_err("%s Incorrect usage - negative refcount\n", __func__);
+ return -EINVAL;
+ }
+ if (atomic_dec_and_test(&iwpm_admin.refcount)) {
+ free_hash_bucket();
+ pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+ }
+ mutex_unlock(&iwpm_admin_lock);
+ iwpm_set_valid(nl_client, 0);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_exit);
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+ struct sockaddr_storage *);
+
+int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
+ struct sockaddr_storage *mapped_sockaddr,
+ u8 nl_client)
+{
+ struct hlist_head *hash_bucket_head;
+ struct iwpm_mapping_info *map_info;
+ unsigned long flags;
+
+ if (!iwpm_valid_client(nl_client))
+ return -EINVAL;
+ map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
+ if (!map_info) {
+ pr_err("%s: Unable to allocate a mapping info\n", __func__);
+ return -ENOMEM;
+ }
+ memcpy(&map_info->local_sockaddr, local_sockaddr,
+ sizeof(struct sockaddr_storage));
+ memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
+ sizeof(struct sockaddr_storage));
+ map_info->nl_client = nl_client;
+
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ if (iwpm_hash_bucket) {
+ hash_bucket_head = get_hash_bucket_head(
+ &map_info->local_sockaddr,
+ &map_info->mapped_sockaddr);
+ hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+ }
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_create_mapinfo);
+
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
+ struct sockaddr_storage *mapped_local_addr)
+{
+ struct hlist_node *tmp_hlist_node;
+ struct hlist_head *hash_bucket_head;
+ struct iwpm_mapping_info *map_info = NULL;
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ if (iwpm_hash_bucket) {
+ hash_bucket_head = get_hash_bucket_head(
+ local_sockaddr,
+ mapped_local_addr);
+ hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+ hash_bucket_head, hlist_node) {
+
+ if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr,
+ mapped_local_addr)) {
+
+ hlist_del_init(&map_info->hlist_node);
+ kfree(map_info);
+ ret = 0;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapinfo);
+
+static void free_hash_bucket(void)
+{
+ struct hlist_node *tmp_hlist_node;
+ struct iwpm_mapping_info *map_info;
+ unsigned long flags;
+ int i;
+
+ /* remove all the mapinfo data from the list */
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+ &iwpm_hash_bucket[i], hlist_node) {
+
+ hlist_del_init(&map_info->hlist_node);
+ kfree(map_info);
+ }
+ }
+ /* free the hash list */
+ kfree(iwpm_hash_bucket);
+ iwpm_hash_bucket = NULL;
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+}
+
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+ u8 nl_client, gfp_t gfp)
+{
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ unsigned long flags;
+
+ nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
+ if (!nlmsg_request) {
+ pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+ return NULL;
+ }
+ spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+ list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
+ spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+ kref_init(&nlmsg_request->kref);
+ kref_get(&nlmsg_request->kref);
+ nlmsg_request->nlmsg_seq = nlmsg_seq;
+ nlmsg_request->nl_client = nl_client;
+ nlmsg_request->request_done = 0;
+ nlmsg_request->err_code = 0;
+ return nlmsg_request;
+}
+
+void iwpm_free_nlmsg_request(struct kref *kref)
+{
+ struct iwpm_nlmsg_request *nlmsg_request;
+ unsigned long flags;
+
+ nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref);
+
+ spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+ list_del_init(&nlmsg_request->inprocess_list);
+ spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+ if (!nlmsg_request->request_done)
+ pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n",
+ __func__, nlmsg_request->nlmsg_seq);
+ kfree(nlmsg_request);
+}
+
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
+{
+ struct iwpm_nlmsg_request *nlmsg_request;
+ struct iwpm_nlmsg_request *found_request = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+ list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list,
+ inprocess_list) {
+ if (nlmsg_request->nlmsg_seq == echo_seq) {
+ found_request = nlmsg_request;
+ kref_get(&nlmsg_request->kref);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+ return found_request;
+}
+
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
+{
+ int ret;
+ init_waitqueue_head(&nlmsg_request->waitq);
+
+ ret = wait_event_timeout(nlmsg_request->waitq,
+ (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
+ if (!ret) {
+ ret = -EINVAL;
+ pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
+ __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
+ } else {
+ ret = nlmsg_request->err_code;
+ }
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ return ret;
+}
+
+int iwpm_get_nlmsg_seq(void)
+{
+ return atomic_inc_return(&iwpm_admin.nlmsg_seq);
+}
+
+int iwpm_valid_client(u8 nl_client)
+{
+ if (nl_client >= RDMA_NL_NUM_CLIENTS)
+ return 0;
+ return iwpm_admin.client_list[nl_client];
+}
+
+void iwpm_set_valid(u8 nl_client, int valid)
+{
+ if (nl_client >= RDMA_NL_NUM_CLIENTS)
+ return;
+ iwpm_admin.client_list[nl_client] = valid;
+}
+
+/* valid client */
+int iwpm_registered_client(u8 nl_client)
+{
+ return iwpm_admin.reg_list[nl_client];
+}
+
+/* valid client */
+void iwpm_set_registered(u8 nl_client, int reg)
+{
+ iwpm_admin.reg_list[nl_client] = reg;
+}
+
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+ struct sockaddr_storage *b_sockaddr)
+{
+ if (a_sockaddr->ss_family != b_sockaddr->ss_family)
+ return 1;
+ if (a_sockaddr->ss_family == AF_INET) {
+ struct sockaddr_in *a4_sockaddr =
+ (struct sockaddr_in *)a_sockaddr;
+ struct sockaddr_in *b4_sockaddr =
+ (struct sockaddr_in *)b_sockaddr;
+ if (!memcmp(&a4_sockaddr->sin_addr,
+ &b4_sockaddr->sin_addr, sizeof(struct in_addr))
+ && a4_sockaddr->sin_port == b4_sockaddr->sin_port)
+ return 0;
+
+ } else if (a_sockaddr->ss_family == AF_INET6) {
+ struct sockaddr_in6 *a6_sockaddr =
+ (struct sockaddr_in6 *)a_sockaddr;
+ struct sockaddr_in6 *b6_sockaddr =
+ (struct sockaddr_in6 *)b_sockaddr;
+ if (!memcmp(&a6_sockaddr->sin6_addr,
+ &b6_sockaddr->sin6_addr, sizeof(struct in6_addr))
+ && a6_sockaddr->sin6_port == b6_sockaddr->sin6_port)
+ return 0;
+
+ } else {
+ pr_err("%s: Invalid sockaddr family\n", __func__);
+ }
+ return 1;
+}
+
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+ int nl_client)
+{
+ struct sk_buff *skb = NULL;
+
+ skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ if (!skb) {
+ pr_err("%s Unable to allocate skb\n", __func__);
+ goto create_nlmsg_exit;
+ }
+ if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
+ NLM_F_REQUEST))) {
+ pr_warn("%s: Unable to put the nlmsg header\n", __func__);
+ dev_kfree_skb(skb);
+ skb = NULL;
+ }
+create_nlmsg_exit:
+ return skb;
+}
+
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+ const struct nla_policy *nlmsg_policy,
+ struct nlattr *nltb[], const char *msg_type)
+{
+ int nlh_len = 0;
+ int ret;
+ const char *err_str = "";
+
+ ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+ if (ret) {
+ err_str = "Invalid attribute";
+ goto parse_nlmsg_error;
+ }
+ ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+ if (ret) {
+ err_str = "Unable to parse the nlmsg";
+ goto parse_nlmsg_error;
+ }
+ ret = iwpm_validate_nlmsg_attr(nltb, policy_max);
+ if (ret) {
+ err_str = "Invalid NULL attribute";
+ goto parse_nlmsg_error;
+ }
+ return 0;
+parse_nlmsg_error:
+ pr_warn("%s: %s (msg type %s ret = %d)\n",
+ __func__, err_str, msg_type, ret);
+ return ret;
+}
+
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg)
+{
+ struct sockaddr_in6 *sockaddr_v6;
+ struct sockaddr_in *sockaddr_v4;
+
+ switch (sockaddr->ss_family) {
+ case AF_INET:
+ sockaddr_v4 = (struct sockaddr_in *)sockaddr;
+ pr_debug("%s IPV4 %pI4: %u(0x%04X)\n",
+ msg, &sockaddr_v4->sin_addr,
+ ntohs(sockaddr_v4->sin_port),
+ ntohs(sockaddr_v4->sin_port));
+ break;
+ case AF_INET6:
+ sockaddr_v6 = (struct sockaddr_in6 *)sockaddr;
+ pr_debug("%s IPV6 %pI6: %u(0x%04X)\n",
+ msg, &sockaddr_v6->sin6_addr,
+ ntohs(sockaddr_v6->sin6_port),
+ ntohs(sockaddr_v6->sin6_port));
+ break;
+ default:
+ break;
+ }
+}
+
+static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr)
+{
+ u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0);
+ u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0);
+ return hash;
+}
+
+static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
+{
+ u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0);
+ u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0);
+ return hash;
+}
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
+ *local_sockaddr,
+ struct sockaddr_storage
+ *mapped_sockaddr)
+{
+ u32 local_hash, mapped_hash, hash;
+
+ if (local_sockaddr->ss_family == AF_INET) {
+ local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
+ mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+
+ } else if (local_sockaddr->ss_family == AF_INET6) {
+ local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
+ mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+ } else {
+ pr_err("%s: Invalid sockaddr family\n", __func__);
+ return NULL;
+ }
+
+ if (local_hash == mapped_hash) /* if port mapper isn't available */
+ hash = local_hash;
+ else
+ hash = jhash_2words(local_hash, mapped_hash, 0);
+
+ return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+}
+
+static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
+{
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto mapinfo_num_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ msg_seq = 0;
+ err_str = "Unable to put attribute of mapinfo number nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ);
+ if (ret)
+ goto mapinfo_num_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+ &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
+ if (ret)
+ goto mapinfo_num_error;
+ ret = ibnl_unicast(skb, nlh, iwpm_pid);
+ if (ret) {
+ skb = NULL;
+ err_str = "Unable to send a nlmsg";
+ goto mapinfo_num_error;
+ }
+ pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+ return 0;
+mapinfo_num_error:
+ pr_info("%s: %s\n", __func__, err_str);
+ if (skb)
+ dev_kfree_skb(skb);
+ return ret;
+}
+
+static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
+{
+ struct nlmsghdr *nlh = NULL;
+ int ret = 0;
+
+ if (!skb)
+ return ret;
+ if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+ RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+ pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+ return -ENOMEM;
+ }
+ nlh->nlmsg_type = NLMSG_DONE;
+ ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+ if (ret)
+ pr_warn("%s Unable to send a nlmsg\n", __func__);
+ return ret;
+}
+
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
+{
+ struct iwpm_mapping_info *map_info;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ int skb_num = 0, mapping_num = 0;
+ int i = 0, nlmsg_bytes = 0;
+ unsigned long flags;
+ const char *err_str = "";
+ int ret;
+
+ skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ if (!skb) {
+ ret = -ENOMEM;
+ err_str = "Unable to allocate skb";
+ goto send_mapping_info_exit;
+ }
+ skb_num++;
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
+ hlist_node) {
+ if (map_info->nl_client != nl_client)
+ continue;
+ nlh = NULL;
+ if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+ RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+ ret = -ENOMEM;
+ err_str = "Unable to put the nlmsg header";
+ goto send_mapping_info_unlock;
+ }
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh,
+ sizeof(struct sockaddr_storage),
+ &map_info->local_sockaddr,
+ IWPM_NLA_MAPINFO_LOCAL_ADDR);
+ if (ret)
+ goto send_mapping_info_unlock;
+
+ ret = ibnl_put_attr(skb, nlh,
+ sizeof(struct sockaddr_storage),
+ &map_info->mapped_sockaddr,
+ IWPM_NLA_MAPINFO_MAPPED_ADDR);
+ if (ret)
+ goto send_mapping_info_unlock;
+
+ iwpm_print_sockaddr(&map_info->local_sockaddr,
+ "send_mapping_info: Local sockaddr:");
+ iwpm_print_sockaddr(&map_info->mapped_sockaddr,
+ "send_mapping_info: Mapped local sockaddr:");
+ mapping_num++;
+ nlmsg_bytes += nlh->nlmsg_len;
+
+ /* check if all mappings can fit in one skb */
+ if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) {
+ /* and leave room for NLMSG_DONE */
+ nlmsg_bytes = 0;
+ skb_num++;
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock,
+ flags);
+ /* send the skb */
+ ret = send_nlmsg_done(skb, nl_client, iwpm_pid);
+ skb = NULL;
+ if (ret) {
+ err_str = "Unable to send map info";
+ goto send_mapping_info_exit;
+ }
+ if (skb_num == IWPM_MAPINFO_SKB_COUNT) {
+ ret = -ENOMEM;
+ err_str = "Insufficient skbs for map info";
+ goto send_mapping_info_exit;
+ }
+ skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ if (!skb) {
+ ret = -ENOMEM;
+ err_str = "Unable to allocate skb";
+ goto send_mapping_info_exit;
+ }
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ }
+ }
+ }
+send_mapping_info_unlock:
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+send_mapping_info_exit:
+ if (ret) {
+ pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
+ if (skb)
+ dev_kfree_skb(skb);
+ return ret;
+ }
+ send_nlmsg_done(skb, nl_client, iwpm_pid);
+ return send_mapinfo_num(mapping_num, nl_client, iwpm_pid);
+}
+
+int iwpm_mapinfo_available(void)
+{
+ unsigned long flags;
+ int full_bucket = 0, i = 0;
+
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ if (iwpm_hash_bucket) {
+ for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+ if (!hlist_empty(&iwpm_hash_bucket[i])) {
+ full_bucket = 1;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+ return full_bucket;
+}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
new file mode 100644
index 00000000000..9777c869a14
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWPM_UTIL_H
+#define _IWPM_UTIL_H
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/jhash.h>
+#include <linux/kref.h>
+#include <net/netlink.h>
+#include <linux/errno.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+
+
+#define IWPM_NL_RETRANS 3
+#define IWPM_NL_TIMEOUT (10*HZ)
+#define IWPM_MAPINFO_SKB_COUNT 20
+
+#define IWPM_PID_UNDEFINED -1
+#define IWPM_PID_UNAVAILABLE -2
+
+struct iwpm_nlmsg_request {
+ struct list_head inprocess_list;
+ __u32 nlmsg_seq;
+ void *req_buffer;
+ u8 nl_client;
+ u8 request_done;
+ u16 err_code;
+ wait_queue_head_t waitq;
+ struct kref kref;
+};
+
+struct iwpm_mapping_info {
+ struct hlist_node hlist_node;
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+ u8 nl_client;
+};
+
+struct iwpm_admin_data {
+ atomic_t refcount;
+ atomic_t nlmsg_seq;
+ int client_list[RDMA_NL_NUM_CLIENTS];
+ int reg_list[RDMA_NL_NUM_CLIENTS];
+};
+
+/**
+ * iwpm_get_nlmsg_request - Allocate and initialize netlink message request
+ * @nlmsg_seq: Sequence number of the netlink message
+ * @nl_client: The index of the netlink client
+ * @gfp: Indicates how the memory for the request should be allocated
+ *
+ * Returns the newly allocated netlink request object if successful,
+ * otherwise returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+ u8 nl_client, gfp_t gfp);
+
+/**
+ * iwpm_free_nlmsg_request - Deallocate netlink message request
+ * @kref: Holds reference of netlink message request
+ */
+void iwpm_free_nlmsg_request(struct kref *kref);
+
+/**
+ * iwpm_find_nlmsg_request - Find netlink message request in the request list
+ * @echo_seq: Sequence number of the netlink request to find
+ *
+ * Returns the found netlink message request,
+ * if not found, returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq);
+
+/**
+ * iwpm_wait_complete_req - Block while servicing the netlink request
+ * @nlmsg_request: Netlink message request to service
+ *
+ * Wakes up, after the request is completed or expired
+ * Returns 0 if the request is complete without error
+ */
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
+
+/**
+ * iwpm_get_nlmsg_seq - Get the sequence number for a netlink
+ * message to send to the port mapper
+ *
+ * Returns the sequence number for the netlink message.
+ */
+int iwpm_get_nlmsg_seq(void);
+
+/**
+ * iwpm_valid_client - Check if the port mapper client is valid
+ * @nl_client: The index of the netlink client
+ *
+ * Valid clients need to call iwpm_init() before using
+ * the port mapper
+ */
+int iwpm_valid_client(u8 nl_client);
+
+/**
+ * iwpm_set_valid - Set the port mapper client to valid or not
+ * @nl_client: The index of the netlink client
+ * @valid: 1 if valid or 0 if invalid
+ */
+void iwpm_set_valid(u8 nl_client, int valid);
+
+/**
+ * iwpm_registered_client - Check if the port mapper client is registered
+ * @nl_client: The index of the netlink client
+ *
+ * Call iwpm_register_pid() to register a client
+ */
+int iwpm_registered_client(u8 nl_client);
+
+/**
+ * iwpm_set_registered - Set the port mapper client to registered or not
+ * @nl_client: The index of the netlink client
+ * @reg: 1 if registered or 0 if not
+ */
+void iwpm_set_registered(u8 nl_client, int reg);
+
+/**
+ * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of
+ * a client to the user space port mapper
+ * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
+ *
+ * If successful, returns the number of sent mapping info records
+ */
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid);
+
+/**
+ * iwpm_mapinfo_available - Check if any mapping info records is available
+ * in the hash table
+ *
+ * Returns 1 if mapping information is available, otherwise returns 0
+ */
+int iwpm_mapinfo_available(void);
+
+/**
+ * iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ *
+ * Returns 0 if they are holding the same ip/tcp address info,
+ * otherwise returns 1
+ */
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+ struct sockaddr_storage *b_sockaddr);
+
+/**
+ * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes
+ * @nltb: Holds address of each netlink message attributes
+ * @nla_count: Number of netlink message attributes
+ *
+ * Returns error if any of the nla_count attributes is NULL
+ */
+static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[],
+ int nla_count)
+{
+ int i;
+ for (i = 1; i < nla_count; i++) {
+ if (!nltb[i])
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * iwpm_create_nlmsg - Allocate skb and form a netlink message
+ * @nl_op: Netlink message opcode
+ * @nlh: Holds address of the netlink message header in skb
+ * @nl_client: The index of the netlink client
+ *
+ * Returns the newly allcated skb, or NULL if the tailroom of the skb
+ * is insufficient to store the message header and payload
+ */
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+ int nl_client);
+
+/**
+ * iwpm_parse_nlmsg - Validate and parse the received netlink message
+ * @cb: Netlink callback structure
+ * @policy_max: Maximum attribute type to be expected
+ * @nlmsg_policy: Validation policy
+ * @nltb: Array to store policy_max parsed elements
+ * @msg_type: Type of netlink message
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+ const struct nla_policy *nlmsg_policy,
+ struct nlattr *nltb[], const char *msg_type);
+
+/**
+ * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port
+ * @sockaddr: Socket address to print
+ * @msg: Message to print
+ */
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+#endif
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 822cfdcd9f7..ab31f136d04 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -35,6 +35,7 @@
*/
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
@@ -276,6 +277,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error1;
}
+ /* Verify the QP requested is supported. For example, Ethernet devices
+ * will not have QP0 */
+ if (!port_priv->qp_info[qpn].qp) {
+ ret = ERR_PTR(-EPROTONOSUPPORT);
+ goto error1;
+ }
+
/* Allocate structures */
mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
if (!mad_agent_priv) {
@@ -1014,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.mad,
sge[0].length,
DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
+ return -ENOMEM;
+
mad_send_wr->header_mapping = sge[0].addr;
sge[1].addr = ib_dma_map_single(mad_agent->device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
+ ib_dma_unmap_single(mad_agent->device,
+ mad_send_wr->header_mapping,
+ sge[0].length, DMA_TO_DEVICE);
+ return -ENOMEM;
+ }
mad_send_wr->payload_mapping = sge[1].addr;
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -1589,6 +1606,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
mad->mad_hdr.class_version].class;
if (!class)
goto out;
+ if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
+ IB_MGMT_MAX_METHODS)
+ goto out;
method = class->method_table[convert_mgmt_class(
mad->mad_hdr.mgmt_class)];
if (method)
@@ -1831,6 +1851,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
}
}
+static bool generate_unmatched_resp(struct ib_mad_private *recv,
+ struct ib_mad_private *response)
+{
+ if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET ||
+ recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) {
+ memcpy(response, recv, sizeof *response);
+ response->header.recv_wc.wc = &response->header.wc;
+ response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+ response->header.recv_wc.recv_buf.grh = &response->grh;
+ response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ response->mad.mad.mad_hdr.status =
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
+ if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION;
+
+ return true;
+ } else {
+ return false;
+ }
+}
static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
{
@@ -1840,6 +1880,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
+ int ret = IB_MAD_RESULT_SUCCESS;
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
qp_info = mad_list->mad_queue->qp_info;
@@ -1923,8 +1964,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
local:
/* Give driver "right of first refusal" on incoming MAD */
if (port_priv->device->process_mad) {
- int ret;
-
ret = port_priv->device->process_mad(port_priv->device, 0,
port_priv->port_num,
wc, &recv->grh,
@@ -1952,6 +1991,10 @@ local:
* or via recv_handler in ib_mad_complete_recv()
*/
recv = NULL;
+ } else if ((ret & IB_MAD_RESULT_SUCCESS) &&
+ generate_unmatched_resp(recv, response)) {
+ agent_send_response(&response->mad.mad, &recv->grh, wc,
+ port_priv->device, port_num, qp_info->qp->qp_num);
}
out:
@@ -1970,7 +2013,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
unsigned long delay;
if (list_empty(&mad_agent_priv->wait_list)) {
- __cancel_delayed_work(&mad_agent_priv->timed_work);
+ cancel_delayed_work(&mad_agent_priv->timed_work);
} else {
mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
struct ib_mad_send_wr_private,
@@ -1979,13 +2022,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
if (time_after(mad_agent_priv->timeout,
mad_send_wr->timeout)) {
mad_agent_priv->timeout = mad_send_wr->timeout;
- __cancel_delayed_work(&mad_agent_priv->timed_work);
delay = mad_send_wr->timeout - jiffies;
if ((long)delay <= 0)
delay = 1;
- queue_delayed_work(mad_agent_priv->qp_info->
- port_priv->wq,
- &mad_agent_priv->timed_work, delay);
+ mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
}
}
}
@@ -2018,11 +2059,9 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
list_add(&mad_send_wr->agent_list, list_item);
/* Reschedule a work item if we have a shorter timeout */
- if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
- __cancel_delayed_work(&mad_agent_priv->timed_work);
- queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
- &mad_agent_priv->timed_work, delay);
- }
+ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
+ mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
}
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
@@ -2560,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof *mad_priv -
sizeof mad_priv->header,
DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
+ sg_list.addr))) {
+ ret = -ENOMEM;
+ break;
+ }
mad_priv->header.mapping = sg_list.addr;
recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
@@ -2633,6 +2677,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
int ret, i;
struct ib_qp_attr *attr;
struct ib_qp *qp;
+ u16 pkey_index;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
@@ -2640,6 +2685,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
return -ENOMEM;
}
+ ret = ib_find_pkey(port_priv->device, port_priv->port_num,
+ IB_DEFAULT_PKEY_FULL, &pkey_index);
+ if (ret)
+ pkey_index = 0;
+
for (i = 0; i < IB_MAD_QPS_CORE; i++) {
qp = port_priv->qp_info[i].qp;
if (!qp)
@@ -2650,7 +2700,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
* one is needed for the Reset to Init transition
*/
attr->qp_state = IB_QPS_INIT;
- attr->pkey_index = 0;
+ attr->pkey_index = pkey_index;
attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 68b4162fd9d..d2360a8ef0b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -34,6 +34,7 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/random.h>
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
new file mode 100644
index 00000000000..23dd5a5c759
--- /dev/null
+++ b/drivers/infiniband/core/netlink.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2010 Voltaire Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+#include <linux/export.h>
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <rdma/rdma_netlink.h>
+
+struct ibnl_client {
+ struct list_head list;
+ int index;
+ int nops;
+ const struct ibnl_client_cbs *cb_table;
+};
+
+static DEFINE_MUTEX(ibnl_mutex);
+static struct sock *nls;
+static LIST_HEAD(client_list);
+
+int ibnl_add_client(int index, int nops,
+ const struct ibnl_client_cbs cb_table[])
+{
+ struct ibnl_client *cur;
+ struct ibnl_client *nl_client;
+
+ nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
+ if (!nl_client)
+ return -ENOMEM;
+
+ nl_client->index = index;
+ nl_client->nops = nops;
+ nl_client->cb_table = cb_table;
+
+ mutex_lock(&ibnl_mutex);
+
+ list_for_each_entry(cur, &client_list, list) {
+ if (cur->index == index) {
+ pr_warn("Client for %d already exists\n", index);
+ mutex_unlock(&ibnl_mutex);
+ kfree(nl_client);
+ return -EINVAL;
+ }
+ }
+
+ list_add_tail(&nl_client->list, &client_list);
+
+ mutex_unlock(&ibnl_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(ibnl_add_client);
+
+int ibnl_remove_client(int index)
+{
+ struct ibnl_client *cur, *next;
+
+ mutex_lock(&ibnl_mutex);
+ list_for_each_entry_safe(cur, next, &client_list, list) {
+ if (cur->index == index) {
+ list_del(&(cur->list));
+ mutex_unlock(&ibnl_mutex);
+ kfree(cur);
+ return 0;
+ }
+ }
+ pr_warn("Can't remove callback for client idx %d. Not found\n", index);
+ mutex_unlock(&ibnl_mutex);
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(ibnl_remove_client);
+
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+ int len, int client, int op, int flags)
+{
+ unsigned char *prev_tail;
+
+ prev_tail = skb_tail_pointer(skb);
+ *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
+ len, flags);
+ if (!*nlh)
+ goto out_nlmsg_trim;
+ (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
+ return nlmsg_data(*nlh);
+
+out_nlmsg_trim:
+ nlmsg_trim(skb, prev_tail);
+ return NULL;
+}
+EXPORT_SYMBOL(ibnl_put_msg);
+
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ int len, void *data, int type)
+{
+ unsigned char *prev_tail;
+
+ prev_tail = skb_tail_pointer(skb);
+ if (nla_put(skb, type, len, data))
+ goto nla_put_failure;
+ nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(ibnl_put_attr);
+
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct ibnl_client *client;
+ int type = nlh->nlmsg_type;
+ int index = RDMA_NL_GET_CLIENT(type);
+ int op = RDMA_NL_GET_OP(type);
+
+ list_for_each_entry(client, &client_list, list) {
+ if (client->index == index) {
+ if (op < 0 || op >= client->nops ||
+ !client->cb_table[op].dump)
+ return -EINVAL;
+
+ {
+ struct netlink_dump_control c = {
+ .dump = client->cb_table[op].dump,
+ .module = client->cb_table[op].module,
+ };
+ return netlink_dump_start(nls, skb, nlh, &c);
+ }
+ }
+ }
+
+ pr_info("Index %d wasn't found in client list\n", index);
+ return -EINVAL;
+}
+
+static void ibnl_rcv(struct sk_buff *skb)
+{
+ mutex_lock(&ibnl_mutex);
+ netlink_rcv_skb(skb, &ibnl_rcv_msg);
+ mutex_unlock(&ibnl_mutex);
+}
+
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ __u32 pid)
+{
+ return nlmsg_unicast(nls, skb, pid);
+}
+EXPORT_SYMBOL(ibnl_unicast);
+
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ unsigned int group, gfp_t flags)
+{
+ return nlmsg_multicast(nls, skb, 0, group, flags);
+}
+EXPORT_SYMBOL(ibnl_multicast);
+
+int __init ibnl_init(void)
+{
+ struct netlink_kernel_cfg cfg = {
+ .input = ibnl_rcv,
+ };
+
+ nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
+ if (!nls) {
+ pr_warn("Failed to create netlink socket\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void ibnl_cleanup(void)
+{
+ struct ibnl_client *cur, *next;
+
+ mutex_lock(&ibnl_mutex);
+ list_for_each_entry_safe(cur, next, &client_list, list) {
+ list_del(&(cur->list));
+ kfree(cur);
+ }
+ mutex_unlock(&ibnl_mutex);
+
+ netlink_kernel_release(nls);
+}
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 019bd4b0863..1b65986c0be 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/export.h>
#include <linux/string.h>
#include <rdma/ib_pack.h>
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 91a660310b7..233eaf541f5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -42,7 +42,7 @@
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
-
+#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
#include "sa.h"
@@ -94,6 +94,12 @@ struct ib_sa_path_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_guidinfo_query {
+ void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
@@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 },
};
+#define GUIDINFO_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
+ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .field_name = "sa_guidinfo_rec:" #field
+
+static const struct ib_field guidinfo_rec_table[] = {
+ { GUIDINFO_REC_FIELD(lid),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { GUIDINFO_REC_FIELD(block_num),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 8 },
+ { GUIDINFO_REC_FIELD(res1),
+ .offset_words = 0,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { GUIDINFO_REC_FIELD(res2),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { GUIDINFO_REC_FIELD(guid_info_list),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 512 },
+};
+
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -425,7 +459,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
port->sm_ah = NULL;
spin_unlock_irqrestore(&port->ah_lock, flags);
- schedule_work(&sa_dev->port[event->element.port_num -
+ queue_work(ib_wq, &sa_dev->port[event->element.port_num -
sa_dev->start_port].update_task);
}
}
@@ -522,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
+ if (force_grh) {
+ memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+ ah_attr->vlan_id = rec->vlan_id;
+ } else {
+ ah_attr->vlan_id = 0xffff;
+ }
+
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -577,19 +618,21 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
{
+ bool preload = !!(gfp_mask & __GFP_WAIT);
unsigned long flags;
int ret, id;
-retry:
- if (!idr_pre_get(&query_idr, gfp_mask))
- return -ENOMEM;
+ if (preload)
+ idr_preload(gfp_mask);
spin_lock_irqsave(&idr_lock, flags);
- ret = idr_get_new(&query_idr, query, &id);
+
+ id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT);
+
spin_unlock_irqrestore(&idr_lock, flags);
- if (ret == -EAGAIN)
- goto retry;
- if (ret)
- return ret;
+ if (preload)
+ idr_preload_end();
+ if (id < 0)
+ return id;
query->mad_buf->timeout_ms = timeout_ms;
query->mad_buf->context[0] = query;
@@ -616,6 +659,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
}
EXPORT_SYMBOL(ib_sa_unpack_path);
+void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+{
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_path);
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -628,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
+ rec.vlan_id = 0xffff;
+ memset(rec.dmac, 0, ETH_ALEN);
+ memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
@@ -945,6 +997,105 @@ err1:
return ret;
}
+/* Support GuidInfoRecord */
+static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_guidinfo_query *query =
+ container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_guidinfo_rec rec;
+
+ ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
+}
+
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_guidinfo_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_SA_METHOD_DELETE) {
+ return -EINVAL;
+ }
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
+ query->sa_query.release = ib_sa_guidinfo_rec_release;
+
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
+ mad->data);
+
+ *sa_query = &query->sa_query;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+
+err1:
+ kfree(query);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
@@ -1079,7 +1230,7 @@ static void ib_sa_remove_one(struct ib_device *device)
ib_unregister_event_handler(&sa_dev->event_handler);
- flush_scheduled_work();
+ flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 9ab5df72df7..cbd0383f622 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -35,6 +35,7 @@
#include "core_priv.h"
#include <linux/slab.h>
+#include <linux/stat.h>
#include <linux/string.h>
#include <rdma/ib_mad.h>
@@ -178,7 +179,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
{
struct ib_port_attr attr;
char *speed = "";
- int rate;
+ int rate; /* in deci-Gb/sec */
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
@@ -186,11 +187,33 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
return ret;
switch (attr.active_speed) {
- case 2: speed = " DDR"; break;
- case 4: speed = " QDR"; break;
+ case IB_SPEED_DDR:
+ speed = " DDR";
+ rate = 50;
+ break;
+ case IB_SPEED_QDR:
+ speed = " QDR";
+ rate = 100;
+ break;
+ case IB_SPEED_FDR10:
+ speed = " FDR10";
+ rate = 100;
+ break;
+ case IB_SPEED_FDR:
+ speed = " FDR";
+ rate = 140;
+ break;
+ case IB_SPEED_EDR:
+ speed = " EDR";
+ rate = 250;
+ break;
+ case IB_SPEED_SDR:
+ default: /* default to SDR for invalid rates */
+ rate = 25;
+ break;
}
- rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
+ rate *= ib_width_enum_to_int(attr.active_width);
if (rate < 0)
return -EINVAL;
@@ -406,15 +429,19 @@ static void ib_port_release(struct kobject *kobj)
struct attribute *a;
int i;
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
+ if (p->gid_group.attrs) {
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->gid_group.attrs);
+ kfree(p->gid_group.attrs);
+ }
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
+ if (p->pkey_group.attrs) {
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->pkey_group.attrs);
+ kfree(p->pkey_group.attrs);
+ }
kfree(p);
}
@@ -511,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- kobject_get(device->ports_parent),
+ device->ports_parent,
"%d", port_num);
- if (ret)
- goto err_put;
+ if (ret) {
+ kfree(p);
+ return ret;
+ }
ret = sysfs_create_group(&p->kobj, &pma_group);
if (ret)
@@ -522,8 +551,10 @@ static int add_port(struct ib_device *device, int port_num,
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
- if (!p->gid_group.attrs)
+ if (!p->gid_group.attrs) {
+ ret = -ENOMEM;
goto err_remove_pma;
+ }
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
@@ -532,8 +563,10 @@ static int add_port(struct ib_device *device, int port_num,
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
- if (!p->pkey_group.attrs)
+ if (!p->pkey_group.attrs) {
+ ret = -ENOMEM;
goto err_remove_gid;
+ }
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
@@ -558,6 +591,7 @@ err_free_pkey:
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
+ p->pkey_group.attrs = NULL;
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -567,13 +601,13 @@ err_free_gid:
kfree(p->gid_group.attrs[i]);
kfree(p->gid_group.attrs);
+ p->gid_group.attrs = NULL;
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
err_put:
- kobject_put(device->ports_parent);
- kfree(p);
+ kobject_put(&p->kobj);
return ret;
}
@@ -585,6 +619,8 @@ static ssize_t show_node_type(struct device *device,
switch (dev->node_type) {
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
+ case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
+ case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
@@ -780,6 +816,22 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
+static void free_port_list_attributes(struct ib_device *device)
+{
+ struct kobject *p, *t;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ struct ib_port *port = container_of(p, struct ib_port, kobj);
+ list_del(&p->entry);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ }
+
+ kobject_put(device->ports_parent);
+}
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -790,7 +842,7 @@ int ib_device_register_sysfs(struct ib_device *device,
class_dev->class = &ib_class;
class_dev->parent = device->dma_device;
- dev_set_name(class_dev, device->name);
+ dev_set_name(class_dev, "%s", device->name);
dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
@@ -806,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device,
}
device->ports_parent = kobject_create_and_add("ports",
- kobject_get(&class_dev->kobj));
+ &class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
@@ -833,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device,
return 0;
err_put:
- {
- struct kobject *p, *t;
- struct ib_port *port;
-
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
- }
-
- kobject_put(&class_dev->kobj);
+ free_port_list_attributes(device);
err_unregister:
device_unregister(class_dev);
@@ -858,22 +896,18 @@ err:
void ib_device_unregister_sysfs(struct ib_device *device)
{
- struct kobject *p, *t;
- struct ib_port *port;
-
/* Hold kobject until ib_dealloc_device() */
- kobject_get(&device->dev.kobj);
+ struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
+ int i;
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
+ if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
+ sysfs_remove_group(kobj_dev, &iw_stats_group);
+
+ free_port_list_attributes(device);
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
+ device_remove_file(&device->dev, ib_class_attributes[i]);
- kobject_put(device->ports_parent);
device_unregister(&device->dev);
}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 08f948df8fa..f2f63933e8a 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -106,9 +106,6 @@ enum {
IB_UCM_MAX_DEVICES = 32
};
-/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
-extern struct class cm_class;
-
#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
static void ib_ucm_add_one(struct ib_device *device);
@@ -179,7 +176,6 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
{
struct ib_ucm_context *ctx;
- int result;
ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
if (!ctx)
@@ -190,17 +186,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
ctx->file = file;
INIT_LIST_HEAD(&ctx->events);
- do {
- result = idr_pre_get(&ctx_id_table, GFP_KERNEL);
- if (!result)
- goto error;
-
- mutex_lock(&ctx_id_mutex);
- result = idr_get_new(&ctx_id_table, ctx, &ctx->id);
- mutex_unlock(&ctx_id_mutex);
- } while (result == -EAGAIN);
-
- if (result)
+ mutex_lock(&ctx_id_mutex);
+ ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL);
+ mutex_unlock(&ctx_id_mutex);
+ if (ctx->id < 0)
goto error;
list_add_tail(&ctx->file_list, &file->ctxs);
@@ -400,7 +389,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
struct ib_ucm_event_get cmd;
struct ib_ucm_event *uevent;
int result = 0;
- DEFINE_WAIT(wait);
if (out_len < sizeof(struct ib_ucm_event_resp))
return -ENOSPC;
@@ -1122,7 +1110,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
+ if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
return -EINVAL;
if (hdr.in + sizeof(hdr) > len)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ca12acf3837..56a4b7ca7ee 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -41,11 +41,14 @@
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/module.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -54,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
static unsigned int max_backlog = 1024;
static struct ctl_table_header *ucma_ctl_table_hdr;
-static ctl_table ucma_ctl_table[] = {
+static struct ctl_table ucma_ctl_table[] = {
{
.procname = "max_backlog",
.data = &max_backlog,
@@ -65,12 +68,6 @@ static ctl_table ucma_ctl_table[] = {
{ }
};
-static struct ctl_path ucma_ctl_path[] = {
- { .procname = "net" },
- { .procname = "rdma_ucm" },
- { }
-};
-
struct ucma_file {
struct mutex mut;
struct file *filp;
@@ -150,7 +147,6 @@ static void ucma_put_ctx(struct ucma_context *ctx)
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
{
struct ucma_context *ctx;
- int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -161,17 +157,10 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_LIST_HEAD(&ctx->mc_list);
ctx->file = file;
- do {
- ret = idr_pre_get(&ctx_idr, GFP_KERNEL);
- if (!ret)
- goto error;
-
- mutex_lock(&mut);
- ret = idr_get_new(&ctx_idr, ctx, &ctx->id);
- mutex_unlock(&mut);
- } while (ret == -EAGAIN);
-
- if (ret)
+ mutex_lock(&mut);
+ ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
+ mutex_unlock(&mut);
+ if (ctx->id < 0)
goto error;
list_add_tail(&ctx->list, &file->ctx_list);
@@ -185,23 +174,15 @@ error:
static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
{
struct ucma_multicast *mc;
- int ret;
mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return NULL;
- do {
- ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
- if (!ret)
- goto error;
-
- mutex_lock(&mut);
- ret = idr_get_new(&multicast_idr, mc, &mc->id);
- mutex_unlock(&mut);
- } while (ret == -EAGAIN);
-
- if (ret)
+ mutex_lock(&mut);
+ mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
+ mutex_unlock(&mut);
+ if (mc->id < 0)
goto error;
mc->ctx = ctx;
@@ -272,17 +253,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
if (!uevent)
return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
+ mutex_lock(&ctx->file->mut);
uevent->cm_id = cm_id;
ucma_set_event_context(ctx, event, uevent);
uevent->resp.event = event->event;
uevent->resp.status = event->status;
- if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
+ if (cm_id->qp_type == IB_QPT_UD)
ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
- mutex_lock(&ctx->file->mut);
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
if (!ctx->backlog) {
ret = -ENOMEM;
@@ -290,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
goto out;
}
ctx->backlog--;
- } else if (!ctx->uid) {
+ } else if (!ctx->uid || ctx->cm_id != cm_id) {
/*
* We ignore events for new connections until userspace has set
* their context. This can only happen if an error occurs on a
@@ -315,7 +296,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
int ret = 0;
- DEFINE_WAIT(wait);
if (out_len < sizeof uevent->resp)
return -ENOSPC;
@@ -367,13 +347,31 @@ done:
return ret;
}
-static ssize_t ucma_create_id(struct ucma_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
+static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
+{
+ switch (cmd->ps) {
+ case RDMA_PS_TCP:
+ *qp_type = IB_QPT_RC;
+ return 0;
+ case RDMA_PS_UDP:
+ case RDMA_PS_IPOIB:
+ *qp_type = IB_QPT_UD;
+ return 0;
+ case RDMA_PS_IB:
+ *qp_type = cmd->qp_type;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
{
struct rdma_ucm_create_id cmd;
struct rdma_ucm_create_id_resp resp;
struct ucma_context *ctx;
+ enum ib_qp_type qp_type;
int ret;
if (out_len < sizeof(resp))
@@ -382,6 +380,10 @@ static ssize_t ucma_create_id(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ ret = ucma_get_qp_type(&cmd, &qp_type);
+ if (ret)
+ return ret;
+
mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
mutex_unlock(&file->mut);
@@ -389,7 +391,7 @@ static ssize_t ucma_create_id(struct ucma_file *file,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps);
+ ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(ctx->cm_id)) {
ret = PTR_ERR(ctx->cm_id);
goto err1;
@@ -426,24 +428,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
mutex_unlock(&mut);
}
-static void ucma_cleanup_events(struct ucma_context *ctx)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx != ctx)
- continue;
-
- list_del(&uevent->list);
-
- /* clear incoming connections. */
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
-
- kfree(uevent);
- }
-}
-
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
@@ -457,9 +441,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
}
}
+/*
+ * We cannot hold file->mut when calling rdma_destroy_id() or we can
+ * deadlock. We also acquire file->mut in ucma_event_handler(), and
+ * rdma_destroy_id() will wait until all callbacks have completed.
+ */
static int ucma_free_ctx(struct ucma_context *ctx)
{
int events_reported;
+ struct ucma_event *uevent, *tmp;
+ LIST_HEAD(list);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
@@ -468,10 +459,20 @@ static int ucma_free_ctx(struct ucma_context *ctx)
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
- ucma_cleanup_events(ctx);
+ list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &list);
+ }
list_del(&ctx->list);
mutex_unlock(&ctx->file->mut);
+ list_for_each_entry_safe(uevent, tmp, &list, list) {
+ list_del(&uevent->list);
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ rdma_destroy_id(uevent->cm_id);
+ kfree(uevent);
+ }
+
events_reported = ctx->events_reported;
kfree(ctx);
return events_reported;
@@ -511,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
return ret;
}
-static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
+static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_bind_addr cmd;
+ struct rdma_ucm_bind_ip cmd;
struct ucma_context *ctx;
int ret;
@@ -530,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
return ret;
}
+static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_bind cmd;
+ struct sockaddr *addr;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ addr = (struct sockaddr *) &cmd.addr;
+ if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
+ return -EINVAL;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_bind_addr(ctx->cm_id, addr);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_resolve_ip(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_resolve_ip cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
+ (struct sockaddr *) &cmd.dst_addr,
+ cmd.timeout_ms);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
static ssize_t ucma_resolve_addr(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_resolve_addr cmd;
+ struct sockaddr *src, *dst;
struct ucma_context *ctx;
int ret;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ src = (struct sockaddr *) &cmd.src_addr;
+ dst = (struct sockaddr *) &cmd.dst_addr;
+ if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
+ !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
+ return -EINVAL;
+
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
- (struct sockaddr *) &cmd.dst_addr,
- cmd.timeout_ms);
+ ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
ucma_put_ctx(ctx);
return ret;
}
@@ -603,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- struct rdma_dev_addr *dev_addr;
- struct net_device *dev;
- u16 vid = 0;
resp->num_paths = route->num_paths;
switch (route->num_paths) {
case 0:
- dev_addr = &route->addr.dev_addr;
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (dev) {
- vid = rdma_vlan_dev_vlan_id(dev);
- dev_put(dev);
- }
-
- iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
- dev_addr->dst_dev_addr, vid);
- iboe_addr_get_sgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].sgid);
+ rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
+ (union ib_gid *)&resp->ib_route[0].dgid);
+ rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
+ (union ib_gid *)&resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(0xffff);
break;
case 2:
@@ -636,11 +678,21 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
}
}
+static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
+ struct rdma_route *route)
+{
+ struct rdma_dev_addr *dev_addr;
+
+ dev_addr = &route->addr.dev_addr;
+ rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
+ rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
+}
+
static ssize_t ucma_query_route(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_query_route cmd;
+ struct rdma_ucm_query cmd;
struct rdma_ucm_query_route_resp resp;
struct ucma_context *ctx;
struct sockaddr *addr;
@@ -670,8 +722,10 @@ static ssize_t ucma_query_route(struct ucma_file *file,
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
resp.port_num = ctx->cm_id->port_num;
- if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
- switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ switch (rdma_port_get_link_layer(ctx->cm_id->device,
+ ctx->cm_id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
ucma_copy_ib_route(&resp, &ctx->cm_id->route);
break;
@@ -681,6 +735,12 @@ static ssize_t ucma_query_route(struct ucma_file *file,
default:
break;
}
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ucma_copy_iw_route(&resp, &ctx->cm_id->route);
+ break;
+ default:
+ break;
}
out:
@@ -692,7 +752,162 @@ out:
return ret;
}
-static void ucma_copy_conn_param(struct rdma_conn_param *dst,
+static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
+ struct rdma_ucm_query_addr_resp *resp)
+{
+ if (!cm_id->device)
+ return;
+
+ resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->port_num = cm_id->port_num;
+ resp->pkey = (__force __u16) cpu_to_be16(
+ ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
+}
+
+static ssize_t ucma_query_addr(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_addr_resp resp;
+ struct sockaddr *addr;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ memset(&resp, 0, sizeof resp);
+
+ addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
+ resp.src_size = rdma_addr_size(addr);
+ memcpy(&resp.src_addr, addr, resp.src_size);
+
+ addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
+ resp.dst_size = rdma_addr_size(addr);
+ memcpy(&resp.dst_addr, addr, resp.dst_size);
+
+ ucma_query_device_addr(ctx->cm_id, &resp);
+
+ if (copy_to_user(response, &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static ssize_t ucma_query_path(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_path_resp *resp;
+ int i, ret = 0;
+
+ if (out_len < sizeof(*resp))
+ return -ENOSPC;
+
+ resp = kzalloc(out_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->num_paths = ctx->cm_id->route.num_paths;
+ for (i = 0, out_len -= sizeof(*resp);
+ i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
+ i++, out_len -= sizeof(struct ib_path_rec_data)) {
+
+ resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
+ IB_PATH_BIDIRECTIONAL;
+ ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
+ &resp->path_data[i].path_rec);
+ }
+
+ if (copy_to_user(response, resp,
+ sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
+ ret = -EFAULT;
+
+ kfree(resp);
+ return ret;
+}
+
+static ssize_t ucma_query_gid(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_addr_resp resp;
+ struct sockaddr_ib *addr;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ memset(&resp, 0, sizeof resp);
+
+ ucma_query_device_addr(ctx->cm_id, &resp);
+
+ addr = (struct sockaddr_ib *) &resp.src_addr;
+ resp.src_size = sizeof(*addr);
+ if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
+ memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
+ } else {
+ addr->sib_family = AF_IB;
+ addr->sib_pkey = (__force __be16) resp.pkey;
+ rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
+ (union ib_gid *) &addr->sib_addr);
+ addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+ &ctx->cm_id->route.addr.src_addr);
+ }
+
+ addr = (struct sockaddr_ib *) &resp.dst_addr;
+ resp.dst_size = sizeof(*addr);
+ if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
+ memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
+ } else {
+ addr->sib_family = AF_IB;
+ addr->sib_pkey = (__force __be16) resp.pkey;
+ rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
+ (union ib_gid *) &addr->sib_addr);
+ addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+ &ctx->cm_id->route.addr.dst_addr);
+ }
+
+ if (copy_to_user(response, &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static ssize_t ucma_query(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_query cmd;
+ struct ucma_context *ctx;
+ void __user *response;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ response = (void __user *)(unsigned long) cmd.response;
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ switch (cmd.option) {
+ case RDMA_USER_CM_QUERY_ADDR:
+ ret = ucma_query_addr(ctx, response, out_len);
+ break;
+ case RDMA_USER_CM_QUERY_PATH:
+ ret = ucma_query_path(ctx, response, out_len);
+ break;
+ case RDMA_USER_CM_QUERY_GID:
+ ret = ucma_query_gid(ctx, response, out_len);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static void ucma_copy_conn_param(struct rdma_cm_id *id,
+ struct rdma_conn_param *dst,
struct rdma_ucm_conn_param *src)
{
dst->private_data = src->private_data;
@@ -704,6 +919,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst,
dst->rnr_retry_count = src->rnr_retry_count;
dst->srq = src->srq;
dst->qp_num = src->qp_num;
+ dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
}
static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
@@ -724,7 +940,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
ret = rdma_connect(ctx->cm_id, &conn_param);
ucma_put_ctx(ctx);
return ret;
@@ -767,9 +983,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
if (cmd.conn_param.valid) {
- ctx->uid = cmd.uid;
- ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
+ mutex_lock(&file->mut);
ret = rdma_accept(ctx->cm_id, &conn_param);
+ if (!ret)
+ ctx->uid = cmd.uid;
+ mutex_unlock(&file->mut);
} else
ret = rdma_accept(ctx->cm_id, NULL);
@@ -865,6 +1084,20 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
break;
+ case RDMA_OPTION_ID_REUSEADDR:
+ if (optlen != sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
+ break;
+ case RDMA_OPTION_ID_AFONLY:
+ if (optlen != sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
+ break;
default:
ret = -ENOSYS;
}
@@ -951,23 +1184,18 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- optval = kmalloc(cmd.optlen, GFP_KERNEL);
- if (!optval) {
- ret = -ENOMEM;
- goto out1;
- }
-
- if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
- cmd.optlen)) {
- ret = -EFAULT;
- goto out2;
+ optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+ cmd.optlen);
+ if (IS_ERR(optval)) {
+ ret = PTR_ERR(optval);
+ goto out;
}
ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
cmd.optlen);
-out2:
kfree(optval);
-out1:
+
+out:
ucma_put_ctx(ctx);
return ret;
}
@@ -991,23 +1219,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
return ret;
}
-static ssize_t ucma_join_multicast(struct ucma_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
+static ssize_t ucma_process_join(struct ucma_file *file,
+ struct rdma_ucm_join_mcast *cmd, int out_len)
{
- struct rdma_ucm_join_mcast cmd;
struct rdma_ucm_create_id_resp resp;
struct ucma_context *ctx;
struct ucma_multicast *mc;
+ struct sockaddr *addr;
int ret;
if (out_len < sizeof(resp))
return -ENOSPC;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
+ addr = (struct sockaddr *) &cmd->addr;
+ if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+ return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx(file, cmd->id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1018,14 +1246,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
goto err1;
}
- mc->uid = cmd.uid;
- memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+ mc->uid = cmd->uid;
+ memcpy(&mc->addr, addr, cmd->addr_size);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
if (ret)
goto err2;
resp.id = mc->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user((void __user *)(unsigned long) cmd->response,
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err3;
@@ -1050,6 +1278,38 @@ err1:
return ret;
}
+static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_ip_mcast cmd;
+ struct rdma_ucm_join_mcast join_cmd;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ join_cmd.response = cmd.response;
+ join_cmd.uid = cmd.uid;
+ join_cmd.id = cmd.id;
+ join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
+ join_cmd.reserved = 0;
+ memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
+
+ return ucma_process_join(file, &join_cmd, out_len);
+}
+
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_mcast cmd;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ return ucma_process_join(file, &cmd, out_len);
+}
+
static ssize_t ucma_leave_multicast(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -1138,7 +1398,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
struct ucma_context *ctx;
- struct file *filp;
+ struct fd f;
struct ucma_file *cur_file;
int ret = 0;
@@ -1146,12 +1406,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
return -EFAULT;
/* Get current fd to protect against it being closed */
- filp = fget(cmd.fd);
- if (!filp)
+ f = fdget(cmd.fd);
+ if (!f.file)
return -ENOENT;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(filp->private_data, cmd.id);
+ ctx = ucma_get_ctx(f.file->private_data, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto file_put;
@@ -1185,32 +1445,36 @@ response:
ucma_put_ctx(ctx);
file_put:
- fput(filp);
+ fdput(f);
return ret;
}
static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len) = {
- [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
- [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
- [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr,
- [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
- [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
- [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
- [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
- [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
- [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
- [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
- [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
- [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
- [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
- [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
- [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
- [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
- [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
- [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
- [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id
+ [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
+ [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
+ [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip,
+ [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip,
+ [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
+ [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
+ [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
+ [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
+ [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
+ [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
+ [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
+ [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
+ [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
+ [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
+ [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
+ [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
+ [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
+ [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
+ [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id,
+ [RDMA_USER_CM_CMD_QUERY] = ucma_query,
+ [RDMA_USER_CM_CMD_BIND] = ucma_bind,
+ [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
@@ -1226,7 +1490,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
+ if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
return -EINVAL;
if (hdr.in + sizeof(hdr) > len)
@@ -1313,9 +1577,11 @@ static const struct file_operations ucma_fops = {
};
static struct miscdevice ucma_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "rdma_cm",
- .fops = &ucma_fops,
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "rdma_cm",
+ .nodename = "infiniband/rdma_cm",
+ .mode = 0666,
+ .fops = &ucma_fops,
};
static ssize_t show_abi_version(struct device *dev,
@@ -1340,7 +1606,7 @@ static int __init ucma_init(void)
goto err1;
}
- ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table);
+ ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
if (!ucma_ctl_table_hdr) {
printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
ret = -ENOMEM;
@@ -1356,7 +1622,7 @@ err1:
static void __exit ucma_cleanup(void)
{
- unregister_sysctl_table(ucma_ctl_table_hdr);
+ unregister_net_sysctl_table(ucma_ctl_table_hdr);
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
misc_deregister(&ucma_misc);
idr_destroy(&ctx_idr);
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index bb7e1928082..72feee620eb 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/string.h>
+#include <linux/export.h>
#include <linux/if_ether.h>
#include <rdma/ib_pack.h>
@@ -278,36 +279,6 @@ void ib_ud_header_init(int payload_bytes,
EXPORT_SYMBOL(ib_ud_header_init);
/**
- * ib_lrh_header_pack - Pack LRH header struct into wire format
- * @lrh:unpacked LRH header struct
- * @buf:Buffer to pack into
- *
- * ib_lrh_header_pack() packs the LRH header structure @lrh into
- * wire format in the buffer @buf.
- */
-int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf)
-{
- ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf);
- return 0;
-}
-EXPORT_SYMBOL(ib_lrh_header_pack);
-
-/**
- * ib_lrh_header_unpack - Unpack LRH structure from wire format
- * @lrh:unpacked LRH header struct
- * @buf:Buffer to pack into
- *
- * ib_lrh_header_unpack() unpacks the LRH header structure from
- * wire format (in buf) into @lrh.
- */
-int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh)
-{
- ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh);
- return 0;
-}
-EXPORT_SYMBOL(ib_lrh_header_unpack);
-
-/**
* ib_ud_header_pack - Pack UD header struct into wire format
* @header:UD header struct
* @buf:Buffer to pack into
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 415e186eee3..a3a2e9c1639 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -35,35 +35,36 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
+#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
#include <linux/slab.h>
#include "uverbs.h"
-#define IB_UMEM_MAX_PAGE_CHUNK \
- ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
- ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
- (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
- struct ib_umem_chunk *chunk, *tmp;
+ struct scatterlist *sg;
+ struct page *page;
int i;
- list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
- ib_dma_unmap_sg(dev, chunk->page_list,
- chunk->nents, DMA_BIDIRECTIONAL);
- for (i = 0; i < chunk->nents; ++i) {
- struct page *page = sg_page(&chunk->page_list[i]);
+ if (umem->nmap > 0)
+ ib_dma_unmap_sg(dev, umem->sg_head.sgl,
+ umem->nmap,
+ DMA_BIDIRECTIONAL);
- if (umem->writable && dirty)
- set_page_dirty_lock(page);
- put_page(page);
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
- kfree(chunk);
+ page = sg_page(sg);
+ if (umem->writable && dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
}
+
+ sg_free_table(&umem->sg_head);
+ return;
+
}
/**
@@ -80,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
- struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
- int off;
int i;
DEFINE_DMA_ATTRS(attrs);
+ struct scatterlist *sg, *sg_list_start;
+ int need_release = 0;
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
@@ -96,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
- umem = kmalloc(sizeof *umem, GFP_KERNEL);
+ umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
@@ -116,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
- INIT_LIST_HEAD(&umem->chunk_list);
-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
@@ -136,7 +135,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
down_write(&current->mm->mmap_sem);
- locked = npages + current->mm->locked_vm;
+ locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -146,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base = addr & PAGE_MASK;
- ret = 0;
+ if (npages == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
+ if (ret)
+ goto out;
+
+ need_release = 1;
+ sg_list_start = umem->sg_head.sgl;
+
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
@@ -156,57 +166,41 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (ret < 0)
goto out;
+ umem->npages += ret;
cur_base += ret * PAGE_SIZE;
npages -= ret;
- off = 0;
-
- while (ret) {
- chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
- min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
- GFP_KERNEL);
- if (!chunk) {
- ret = -ENOMEM;
- goto out;
- }
-
- chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
- sg_init_table(chunk->page_list, chunk->nents);
- for (i = 0; i < chunk->nents; ++i) {
- if (vma_list &&
- !is_vm_hugetlb_page(vma_list[i + off]))
- umem->hugetlb = 0;
- sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
- }
-
- chunk->nmap = ib_dma_map_sg_attrs(context->device,
- &chunk->page_list[0],
- chunk->nents,
- DMA_BIDIRECTIONAL,
- &attrs);
- if (chunk->nmap <= 0) {
- for (i = 0; i < chunk->nents; ++i)
- put_page(sg_page(&chunk->page_list[i]));
- kfree(chunk);
-
- ret = -ENOMEM;
- goto out;
- }
-
- ret -= chunk->nents;
- off += chunk->nents;
- list_add_tail(&chunk->list, &umem->chunk_list);
+ for_each_sg(sg_list_start, sg, ret, i) {
+ if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
+ umem->hugetlb = 0;
+
+ sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}
- ret = 0;
+ /* preparing for next loop */
+ sg_list_start = sg;
}
+ umem->nmap = ib_dma_map_sg_attrs(context->device,
+ umem->sg_head.sgl,
+ umem->npages,
+ DMA_BIDIRECTIONAL,
+ &attrs);
+
+ if (umem->nmap <= 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = 0;
+
out:
if (ret < 0) {
- __ib_umem_release(context->device, umem, 0);
+ if (need_release)
+ __ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
- current->mm->locked_vm = locked;
+ current->mm->pinned_vm = locked;
up_write(&current->mm->mmap_sem);
if (vma_list)
@@ -222,7 +216,7 @@ static void ib_umem_account(struct work_struct *work)
struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->mm->mmap_sem);
- umem->mm->locked_vm -= umem->diff;
+ umem->mm->pinned_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
@@ -262,13 +256,13 @@ void ib_umem_release(struct ib_umem *umem)
umem->mm = mm;
umem->diff = diff;
- schedule_work(&umem->work);
+ queue_work(ib_wq, &umem->work);
return;
}
} else
down_write(&mm->mmap_sem);
- current->mm->locked_vm -= diff;
+ current->mm->pinned_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
kfree(umem);
@@ -277,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
+ struct scatterlist *sg;
shift = ilog2(umem->page_size);
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (i = 0; i < chunk->nmap; ++i)
- n += sg_dma_len(&chunk->page_list[i]) >> shift;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
+ n += sg_dma_len(sg) >> shift;
return n;
}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index cd1996d0ad0..1acb9910055 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -98,7 +98,7 @@ struct ib_umad_port {
struct ib_umad_device {
int start_port, end_port;
- struct kref ref;
+ struct kobject kobj;
struct ib_umad_port port[0];
};
@@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
-static void ib_umad_release_dev(struct kref *ref)
+static void ib_umad_release_dev(struct kobject *kobj)
{
struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
+ container_of(kobj, struct ib_umad_device, kobj);
kfree(dev);
}
+static struct kobj_type ib_umad_dev_ktype = {
+ .release = ib_umad_release_dev,
+};
+
static int hdr_size(struct ib_umad_file *file)
{
return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
@@ -458,8 +462,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
- if (packet->mad.hdr.id < 0 ||
- packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
+ if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
ret = -EINVAL;
goto err;
}
@@ -703,7 +706,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
- if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
@@ -781,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
- int ret;
+ int ret = -ENXIO;
port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
- if (port)
- kref_get(&port->umad_dev->ref);
- else
- return -ENXIO;
mutex_lock(&port->file_mutex);
- if (!port->ib_dev) {
- ret = -ENXIO;
+ if (!port->ib_dev)
goto out;
- }
+ ret = -ENOMEM;
file = kzalloc(sizeof *file, GFP_KERNEL);
- if (!file) {
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
- ret = -ENOMEM;
+ if (!file)
goto out;
- }
mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
@@ -815,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
ret = nonseekable_open(inode, filp);
+ if (ret) {
+ list_del(&file->port_list);
+ kfree(file);
+ goto out;
+ }
+
+ kobject_get(&port->umad_dev->kobj);
out:
mutex_unlock(&port->file_mutex);
@@ -853,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
- kref_put(&dev->ref, ib_umad_release_dev);
+ kobject_put(&dev->kobj);
return 0;
}
@@ -881,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
int ret;
port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
- if (port)
- kref_get(&port->umad_dev->ref);
- else
- return -ENXIO;
if (filp->f_flags & O_NONBLOCK) {
if (down_trylock(&port->sm_sem)) {
@@ -899,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
}
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
- if (ret) {
- up(&port->sm_sem);
- goto fail;
- }
+ if (ret)
+ goto err_up_sem;
filp->private_data = port;
- return nonseekable_open(inode, filp);
+ ret = nonseekable_open(inode, filp);
+ if (ret)
+ goto err_clr_sm_cap;
+
+ kobject_get(&port->umad_dev->kobj);
+
+ return 0;
+
+err_clr_sm_cap:
+ swap(props.set_port_cap_mask, props.clr_port_cap_mask);
+ ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+
+err_up_sem:
+ up(&port->sm_sem);
fail:
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
return ret;
}
@@ -928,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&port->umad_dev->kobj);
return ret;
}
@@ -996,6 +1004,7 @@ static int find_overflow_devnum(void)
}
static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
@@ -1028,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
+ port->cdev.kobj.parent = &umad_dev->kobj;
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
@@ -1046,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
+ port->sm_cdev.kobj.parent = &umad_dev->kobj;
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
@@ -1139,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device)
if (!umad_dev)
return;
- kref_init(&umad_dev->ref);
+ kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
umad_dev->start_port = s;
umad_dev->end_port = e;
@@ -1147,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device)
for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
- if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+ if (ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->port[i - s]))
goto err;
}
@@ -1159,7 +1171,7 @@ err:
while (--i >= s)
ib_umad_kill_port(&umad_dev->port[i - s]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&umad_dev->kobj);
}
static void ib_umad_remove_one(struct ib_device *device)
@@ -1173,7 +1185,12 @@ static void ib_umad_remove_one(struct ib_device *device)
for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
ib_umad_kill_port(&umad_dev->port[i]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&umad_dev->kobj);
+}
+
+static char *umad_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}
static int __init ib_umad_init(void)
@@ -1194,6 +1211,8 @@ static int __init ib_umad_init(void)
goto out_chrdev;
}
+ umad_class->devnode = umad_devnode;
+
ret = class_create_file(umad_class, &class_attr_abi_version.attr);
if (ret) {
printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a078e5624d2..a283274a5a0 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -47,6 +47,22 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (const void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \
+ (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
/*
* Our lifetime rules for these structs are the following:
*
@@ -76,6 +92,8 @@ struct ib_uverbs_device {
struct ib_device *ib_dev;
int devnum;
struct cdev cdev;
+ struct rb_root xrcd_tree;
+ struct mutex xrcd_tree_mutex;
};
struct ib_uverbs_event_file {
@@ -120,9 +138,20 @@ struct ib_uevent_object {
u32 events_reported;
};
+struct ib_uxrcd_object {
+ struct ib_uobject uobject;
+ atomic_t refcnt;
+};
+
+struct ib_usrq_object {
+ struct ib_uevent_object uevent;
+ struct ib_uxrcd_object *uxrcd;
+};
+
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
+ struct ib_uxrcd_object *uxrcd;
};
struct ib_ucq_object {
@@ -142,6 +171,8 @@ extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
+extern struct idr ib_uverbs_xrcd_idr;
+extern struct idr ib_uverbs_rule_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
@@ -161,6 +192,23 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+
+struct ib_uverbs_flow_spec {
+ union {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_spec_eth eth;
+ struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
+ };
+};
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
@@ -174,6 +222,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(alloc_mw);
+IB_UVERBS_DECLARE_CMD(dealloc_mw);
IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
IB_UVERBS_DECLARE_CMD(resize_cq);
@@ -181,6 +231,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq);
IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(open_qp);
IB_UVERBS_DECLARE_CMD(query_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
@@ -195,5 +246,16 @@ IB_UVERBS_DECLARE_CMD(create_srq);
IB_UVERBS_DECLARE_CMD(modify_srq);
IB_UVERBS_DECLARE_CMD(query_srq);
IB_UVERBS_DECLARE_CMD(destroy_srq);
+IB_UVERBS_DECLARE_CMD(create_xsrq);
+IB_UVERBS_DECLARE_CMD(open_xrcd);
+IB_UVERBS_DECLARE_CMD(close_xrcd);
+
+#define IB_UVERBS_DECLARE_EX_CMD(name) \
+ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
+ struct ib_udata *ucore, \
+ struct ib_udata *uhw)
+
+IB_UVERBS_DECLARE_EX_CMD(create_flow);
+IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b342248aec0..ea6203ee7bc 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,21 +40,22 @@
#include <asm/uaccess.h>
#include "uverbs.h"
-
-static struct lock_class_key pd_lock_key;
-static struct lock_class_key mr_lock_key;
-static struct lock_class_key cq_lock_key;
-static struct lock_class_key qp_lock_key;
-static struct lock_class_key ah_lock_key;
-static struct lock_class_key srq_lock_key;
-
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
- do { \
- (udata)->inbuf = (void __user *) (ibuf); \
- (udata)->outbuf = (void __user *) (obuf); \
- (udata)->inlen = (ilen); \
- (udata)->outlen = (olen); \
- } while (0)
+#include "core_priv.h"
+
+struct uverbs_lock_class {
+ struct lock_class_key key;
+ char name[16];
+};
+
+static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
+static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
+static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
+static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
+static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
+static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
+static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
+static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
/*
* The ib_uobject locking scheme is as follows:
@@ -82,13 +83,13 @@ static struct lock_class_key srq_lock_key;
*/
static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct lock_class_key *key)
+ struct ib_ucontext *context, struct uverbs_lock_class *c)
{
uobj->user_handle = user_handle;
uobj->context = context;
kref_init(&uobj->ref);
init_rwsem(&uobj->mutex);
- lockdep_set_class(&uobj->mutex, key);
+ lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
uobj->live = 0;
}
@@ -118,18 +119,17 @@ static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
{
int ret;
-retry:
- if (!idr_pre_get(idr, GFP_KERNEL))
- return -ENOMEM;
-
+ idr_preload(GFP_KERNEL);
spin_lock(&ib_uverbs_idr_lock);
- ret = idr_get_new(idr, uobj, &uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
- if (ret == -EAGAIN)
- goto retry;
+ ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
+ if (ret >= 0)
+ uobj->id = ret;
- return ret;
+ spin_unlock(&ib_uverbs_idr_lock);
+ idr_preload_end();
+
+ return ret < 0 ? ret : 0;
}
void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
@@ -240,11 +240,24 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
}
+static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj;
+
+ uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
+ return uobj ? uobj->object : NULL;
+}
+
static void put_qp_read(struct ib_qp *qp)
{
put_uobj_read(qp->uobject);
}
+static void put_qp_write(struct ib_qp *qp)
+{
+ put_uobj_write(qp->uobject);
+}
+
static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
@@ -255,6 +268,18 @@ static void put_srq_read(struct ib_srq *srq)
put_uobj_read(srq->uobject);
}
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
+ struct ib_uobject **uobj)
+{
+ *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
+ return *uobj ? (*uobj)->object : NULL;
+}
+
+static void put_xrcd_read(struct ib_uobject *uobj)
+{
+ put_uobj_read(uobj);
+}
+
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -298,11 +323,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
+ INIT_LIST_HEAD(&ucontext->xrcd_list);
+ INIT_LIST_HEAD(&ucontext->rule_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
- ret = get_unused_fd();
+ ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
goto err_free;
resp.async_fd = ret;
@@ -495,7 +522,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
if (!uobj)
return -ENOMEM;
- init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
+ init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
down_write(&uobj->mutex);
pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
@@ -579,6 +606,305 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
return in_len;
}
+struct xrcd_table_entry {
+ struct rb_node node;
+ struct ib_xrcd *xrcd;
+ struct inode *inode;
+};
+
+static int xrcd_table_insert(struct ib_uverbs_device *dev,
+ struct inode *inode,
+ struct ib_xrcd *xrcd)
+{
+ struct xrcd_table_entry *entry, *scan;
+ struct rb_node **p = &dev->xrcd_tree.rb_node;
+ struct rb_node *parent = NULL;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->xrcd = xrcd;
+ entry->inode = inode;
+
+ while (*p) {
+ parent = *p;
+ scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+ if (inode < scan->inode) {
+ p = &(*p)->rb_left;
+ } else if (inode > scan->inode) {
+ p = &(*p)->rb_right;
+ } else {
+ kfree(entry);
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &dev->xrcd_tree);
+ igrab(inode);
+ return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
+ struct inode *inode)
+{
+ struct xrcd_table_entry *entry;
+ struct rb_node *p = dev->xrcd_tree.rb_node;
+
+ while (p) {
+ entry = rb_entry(p, struct xrcd_table_entry, node);
+
+ if (inode < entry->inode)
+ p = p->rb_left;
+ else if (inode > entry->inode)
+ p = p->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
+{
+ struct xrcd_table_entry *entry;
+
+ entry = xrcd_table_search(dev, inode);
+ if (!entry)
+ return NULL;
+
+ return entry->xrcd;
+}
+
+static void xrcd_table_delete(struct ib_uverbs_device *dev,
+ struct inode *inode)
+{
+ struct xrcd_table_entry *entry;
+
+ entry = xrcd_table_search(dev, inode);
+ if (entry) {
+ iput(inode);
+ rb_erase(&entry->node, &dev->xrcd_tree);
+ kfree(entry);
+ }
+}
+
+ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_open_xrcd cmd;
+ struct ib_uverbs_open_xrcd_resp resp;
+ struct ib_udata udata;
+ struct ib_uxrcd_object *obj;
+ struct ib_xrcd *xrcd = NULL;
+ struct fd f = {NULL, 0};
+ struct inode *inode = NULL;
+ int ret = 0;
+ int new_xrcd = 0;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ mutex_lock(&file->device->xrcd_tree_mutex);
+
+ if (cmd.fd != -1) {
+ /* search for file descriptor */
+ f = fdget(cmd.fd);
+ if (!f.file) {
+ ret = -EBADF;
+ goto err_tree_mutex_unlock;
+ }
+
+ inode = file_inode(f.file);
+ xrcd = find_xrcd(file->device, inode);
+ if (!xrcd && !(cmd.oflags & O_CREAT)) {
+ /* no file descriptor. Need CREATE flag */
+ ret = -EAGAIN;
+ goto err_tree_mutex_unlock;
+ }
+
+ if (xrcd && cmd.oflags & O_EXCL) {
+ ret = -EINVAL;
+ goto err_tree_mutex_unlock;
+ }
+ }
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj) {
+ ret = -ENOMEM;
+ goto err_tree_mutex_unlock;
+ }
+
+ init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
+
+ down_write(&obj->uobject.mutex);
+
+ if (!xrcd) {
+ xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(xrcd)) {
+ ret = PTR_ERR(xrcd);
+ goto err;
+ }
+
+ xrcd->inode = inode;
+ xrcd->device = file->device->ib_dev;
+ atomic_set(&xrcd->usecnt, 0);
+ mutex_init(&xrcd->tgt_qp_mutex);
+ INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+ new_xrcd = 1;
+ }
+
+ atomic_set(&obj->refcnt, 0);
+ obj->uobject.object = xrcd;
+ ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+ if (ret)
+ goto err_idr;
+
+ memset(&resp, 0, sizeof resp);
+ resp.xrcd_handle = obj->uobject.id;
+
+ if (inode) {
+ if (new_xrcd) {
+ /* create new inode/xrcd table entry */
+ ret = xrcd_table_insert(file->device, inode, xrcd);
+ if (ret)
+ goto err_insert_xrcd;
+ }
+ atomic_inc(&xrcd->usecnt);
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ if (f.file)
+ fdput(f);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uobject.live = 1;
+ up_write(&obj->uobject.mutex);
+
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return in_len;
+
+err_copy:
+ if (inode) {
+ if (new_xrcd)
+ xrcd_table_delete(file->device, inode);
+ atomic_dec(&xrcd->usecnt);
+ }
+
+err_insert_xrcd:
+ idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+
+err_idr:
+ ib_dealloc_xrcd(xrcd);
+
+err:
+ put_uobj_write(&obj->uobject);
+
+err_tree_mutex_unlock:
+ if (f.file)
+ fdput(f);
+
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_close_xrcd cmd;
+ struct ib_uobject *uobj;
+ struct ib_xrcd *xrcd = NULL;
+ struct inode *inode = NULL;
+ struct ib_uxrcd_object *obj;
+ int live;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ mutex_lock(&file->device->xrcd_tree_mutex);
+ uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
+ if (!uobj) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xrcd = uobj->object;
+ inode = xrcd->inode;
+ obj = container_of(uobj, struct ib_uxrcd_object, uobject);
+ if (atomic_read(&obj->refcnt)) {
+ put_uobj_write(uobj);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
+ ret = ib_dealloc_xrcd(uobj->object);
+ if (!ret)
+ uobj->live = 0;
+ }
+
+ live = uobj->live;
+ if (inode && ret)
+ atomic_inc(&xrcd->usecnt);
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ goto out;
+
+ if (inode && !live)
+ xrcd_table_delete(file->device, inode);
+
+ idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+ ret = in_len;
+
+out:
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd)
+{
+ struct inode *inode;
+
+ inode = xrcd->inode;
+ if (inode && !atomic_dec_and_test(&xrcd->usecnt))
+ return;
+
+ ib_dealloc_xrcd(xrcd);
+
+ if (inode)
+ xrcd_table_delete(dev, inode);
+}
+
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -604,19 +930,15 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- /*
- * Local write permission is required if remote write or
- * remote atomic permission is also requested.
- */
- if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
- !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
- return -EINVAL;
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ return ret;
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
- init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
@@ -718,6 +1040,126 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
return in_len;
}
+ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_alloc_mw cmd;
+ struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_mw *mw;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ mw = pd->device->alloc_mw(pd, cmd.mw_type);
+ if (IS_ERR(mw)) {
+ ret = PTR_ERR(mw);
+ goto err_put;
+ }
+
+ mw->device = pd->device;
+ mw->pd = pd;
+ mw->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+
+ uobj->object = mw;
+ ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
+ if (ret)
+ goto err_unalloc;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.rkey = mw->rkey;
+ resp.mw_handle = uobj->id;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->mw_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+
+err_unalloc:
+ ib_dealloc_mw(mw);
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dealloc_mw cmd;
+ struct ib_mw *mw;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ mw = uobj->object;
+
+ ret = ib_dealloc_mw(mw);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return in_len;
+}
+
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -733,7 +1175,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = get_unused_fd();
+ ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
return ret;
resp.fd = ret;
@@ -784,7 +1226,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key);
+ init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class);
down_write(&obj->uobject.mutex);
if (cmd.comp_channel >= 0) {
@@ -893,68 +1335,81 @@ out:
return ret ? ret : in_len;
}
+static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
+{
+ struct ib_uverbs_wc tmp;
+
+ tmp.wr_id = wc->wr_id;
+ tmp.status = wc->status;
+ tmp.opcode = wc->opcode;
+ tmp.vendor_err = wc->vendor_err;
+ tmp.byte_len = wc->byte_len;
+ tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.qp_num = wc->qp->qp_num;
+ tmp.src_qp = wc->src_qp;
+ tmp.wc_flags = wc->wc_flags;
+ tmp.pkey_index = wc->pkey_index;
+ tmp.slid = wc->slid;
+ tmp.sl = wc->sl;
+ tmp.dlid_path_bits = wc->dlid_path_bits;
+ tmp.port_num = wc->port_num;
+ tmp.reserved = 0;
+
+ if (copy_to_user(dest, &tmp, sizeof tmp))
+ return -EFAULT;
+
+ return 0;
+}
+
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_poll_cq cmd;
- struct ib_uverbs_poll_cq_resp *resp;
+ struct ib_uverbs_poll_cq_resp resp;
+ u8 __user *header_ptr;
+ u8 __user *data_ptr;
struct ib_cq *cq;
- struct ib_wc *wc;
- int ret = 0;
- int i;
- int rsize;
+ struct ib_wc wc;
+ int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
- if (!wc)
- return -ENOMEM;
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq)
+ return -EINVAL;
- rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
- resp = kmalloc(rsize, GFP_KERNEL);
- if (!resp) {
- ret = -ENOMEM;
- goto out_wc;
- }
+ /* we copy a struct ib_uverbs_poll_cq_resp to user space */
+ header_ptr = (void __user *)(unsigned long) cmd.response;
+ data_ptr = header_ptr + sizeof resp;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!cq) {
- ret = -EINVAL;
- goto out;
- }
+ memset(&resp, 0, sizeof resp);
+ while (resp.count < cmd.ne) {
+ ret = ib_poll_cq(cq, 1, &wc);
+ if (ret < 0)
+ goto out_put;
+ if (!ret)
+ break;
- resp->count = ib_poll_cq(cq, cmd.ne, wc);
+ ret = copy_wc_to_user(data_ptr, &wc);
+ if (ret)
+ goto out_put;
- put_cq_read(cq);
+ data_ptr += sizeof(struct ib_uverbs_wc);
+ ++resp.count;
+ }
- for (i = 0; i < resp->count; i++) {
- resp->wc[i].wr_id = wc[i].wr_id;
- resp->wc[i].status = wc[i].status;
- resp->wc[i].opcode = wc[i].opcode;
- resp->wc[i].vendor_err = wc[i].vendor_err;
- resp->wc[i].byte_len = wc[i].byte_len;
- resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
- resp->wc[i].qp_num = wc[i].qp->qp_num;
- resp->wc[i].src_qp = wc[i].src_qp;
- resp->wc[i].wc_flags = wc[i].wc_flags;
- resp->wc[i].pkey_index = wc[i].pkey_index;
- resp->wc[i].slid = wc[i].slid;
- resp->wc[i].sl = wc[i].sl;
- resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
- resp->wc[i].port_num = wc[i].port_num;
- }
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+ if (copy_to_user(header_ptr, &resp, sizeof resp)) {
ret = -EFAULT;
+ goto out_put;
+ }
-out:
- kfree(resp);
+ ret = in_len;
-out_wc:
- kfree(wc);
- return ret ? ret : in_len;
+out_put:
+ put_cq_read(cq);
+ return ret;
}
ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
@@ -1039,9 +1494,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
struct ib_uverbs_create_qp_resp resp;
struct ib_udata udata;
struct ib_uqp_object *obj;
- struct ib_pd *pd;
- struct ib_cq *scq, *rcq;
- struct ib_srq *srq;
+ struct ib_device *device;
+ struct ib_pd *pd = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_cq *scq = NULL, *rcq = NULL;
+ struct ib_srq *srq = NULL;
struct ib_qp *qp;
struct ib_qp_init_attr attr;
int ret;
@@ -1052,26 +1510,57 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+ return -EPERM;
+
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+ init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
- srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
- rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
- scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+ if (cmd.qp_type == IB_QPT_XRC_TGT) {
+ xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ device = xrcd->device;
+ } else {
+ if (cmd.qp_type == IB_QPT_XRC_INI) {
+ cmd.max_recv_wr = cmd.max_recv_sge = 0;
+ } else {
+ if (cmd.is_srq) {
+ srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
+
+ if (cmd.recv_cq_handle != cmd.send_cq_handle) {
+ rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
+ if (!rcq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
+ }
- if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
- ret = -EINVAL;
- goto err_put;
+ scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
+ rcq = rcq ?: scq;
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd || !scq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ device = pd->device;
}
attr.event_handler = ib_uverbs_qp_event_handler;
@@ -1079,6 +1568,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
+ attr.xrcd = xrcd;
attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
attr.qp_type = cmd.qp_type;
attr.create_flags = 0;
@@ -1093,26 +1583,35 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- qp = pd->device->create_qp(pd, &attr, &udata);
+ if (cmd.qp_type == IB_QPT_XRC_TGT)
+ qp = ib_create_qp(pd, &attr);
+ else
+ qp = device->create_qp(pd, &attr, &udata);
+
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
- qp->device = pd->device;
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->uobject = &obj->uevent.uobject;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
- atomic_inc(&pd->usecnt);
- atomic_inc(&attr.send_cq->usecnt);
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
+ if (cmd.qp_type != IB_QPT_XRC_TGT) {
+ qp->real_qp = qp;
+ qp->device = device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ atomic_set(&qp->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ if (attr.recv_cq)
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+ }
+ qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -1134,9 +1633,18 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
- put_cq_read(scq);
- if (rcq != scq)
+ if (xrcd) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ put_xrcd_read(xrcd_uobj);
+ }
+
+ if (pd)
+ put_pd_read(pd);
+ if (scq)
+ put_cq_read(scq);
+ if (rcq && rcq != scq)
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
@@ -1158,6 +1666,8 @@ err_destroy:
ib_destroy_qp(qp);
err_put:
+ if (xrcd)
+ put_xrcd_read(xrcd_uobj);
if (pd)
put_pd_read(pd);
if (scq)
@@ -1171,6 +1681,100 @@ err_put:
return ret;
}
+ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_open_qp cmd;
+ struct ib_uverbs_create_qp_resp resp;
+ struct ib_udata udata;
+ struct ib_uqp_object *obj;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_qp *qp;
+ struct ib_qp_open_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
+
+ xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.qp_num = cmd.qpn;
+ attr.qp_type = cmd.qp_type;
+
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+
+ qp = ib_open_qp(xrcd, &attr);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+
+ qp->uobject = &obj->uevent.uobject;
+
+ obj->uevent.uobject.object = qp;
+ ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+ if (ret)
+ goto err_destroy;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_remove;
+ }
+
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ put_xrcd_read(xrcd_uobj);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uevent.uobject.live = 1;
+
+ up_write(&obj->uevent.uobject.mutex);
+
+ return in_len;
+
+err_remove:
+ idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+
+err_destroy:
+ ib_destroy_qp(qp);
+
+err_put:
+ put_xrcd_read(xrcd_uobj);
+ put_uobj_write(&obj->uevent.uobject);
+ return ret;
+}
+
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -1271,6 +1875,20 @@ out:
return ret ? ret : in_len;
}
+/* Remove ignored fields set in the attribute mask */
+static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
+{
+ switch (qp_type) {
+ case IB_QPT_XRC_INI:
+ return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
+ case IB_QPT_XRC_TGT:
+ return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY);
+ default:
+ return mask;
+ }
+}
+
ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -1343,7 +1961,15 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
- ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
+ if (qp->real_qp == qp) {
+ ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
+ if (ret)
+ goto out;
+ ret = qp->device->modify_qp(qp, attr,
+ modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+ } else {
+ ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+ }
put_qp_read(qp);
@@ -1394,6 +2020,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (ret)
return ret;
+ if (obj->uxrcd)
+ atomic_dec(&obj->uxrcd->refcnt);
+
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
mutex_lock(&file->mutex);
@@ -1489,6 +2118,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ if (next->opcode == IB_WR_SEND_WITH_IMM)
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -1540,7 +2172,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
resp.bad_wr = 0;
- ret = qp->device->post_send(qp, wr, &bad_wr);
+ ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
@@ -1678,7 +2310,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
goto out;
resp.bad_wr = 0;
- ret = qp->device->post_recv(qp, wr, &bad_wr);
+ ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
put_qp_read(qp);
@@ -1774,7 +2406,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (!uobj)
return -ENOMEM;
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key);
+ init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
@@ -1891,7 +2523,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_write_qp(cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -1920,7 +2552,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_read(qp);
+ put_qp_write(qp);
return ret ? ret : in_len;
}
@@ -1938,7 +2570,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_write_qp(cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -1957,100 +2589,367 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
}
out_put:
- put_qp_read(qp);
+ put_qp_write(qp);
return ret ? ret : in_len;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ ib_spec->type = kern_spec->type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+ if (ib_spec->eth.size != kern_spec->eth.size)
+ return -EINVAL;
+ memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+ sizeof(struct ib_flow_eth_filter));
+ memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+ sizeof(struct ib_flow_eth_filter));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+ if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+ return -EINVAL;
+ memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+ sizeof(struct ib_flow_ipv4_filter));
+ memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+ sizeof(struct ib_flow_ipv4_filter));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+ if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+ return -EINVAL;
+ memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_create_flow cmd;
+ struct ib_uverbs_create_flow_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_flow *flow_id;
+ struct ib_uverbs_flow_attr *kern_flow_attr;
+ struct ib_flow_attr *flow_attr;
+ struct ib_qp *qp;
+ int err = 0;
+ void *kern_spec;
+ void *ib_spec;
+ int i;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (ucore->outlen < sizeof(resp))
+ return -ENOSPC;
+
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ ucore->inbuf += sizeof(cmd);
+ ucore->inlen -= sizeof(cmd);
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
+ !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+ return -EPERM;
+
+ if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ return -EINVAL;
+
+ if (cmd.flow_attr.size > ucore->inlen ||
+ cmd.flow_attr.size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
+ return -EINVAL;
+
+ if (cmd.flow_attr.reserved[0] ||
+ cmd.flow_attr.reserved[1])
+ return -EINVAL;
+
+ if (cmd.flow_attr.num_of_specs) {
+ kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
+ GFP_KERNEL);
+ if (!kern_flow_attr)
+ return -ENOMEM;
+
+ memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
+ err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+ cmd.flow_attr.size);
+ if (err)
+ goto err_free_attr;
+ } else {
+ kern_flow_attr = &cmd.flow_attr;
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ err = -ENOMEM;
+ goto err_free_attr;
+ }
+ init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+ down_write(&uobj->mutex);
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
+ flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
+ if (!flow_attr) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ flow_attr->type = kern_flow_attr->type;
+ flow_attr->priority = kern_flow_attr->priority;
+ flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
+ flow_attr->port = kern_flow_attr->port;
+ flow_attr->flags = kern_flow_attr->flags;
+ flow_attr->size = sizeof(*flow_attr);
+
+ kern_spec = kern_flow_attr + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs &&
+ cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
+ cmd.flow_attr.size >=
+ ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
+ err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ if (err)
+ goto err_free;
+ flow_attr->size +=
+ ((union ib_flow_spec *) ib_spec)->size;
+ cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
+ kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
+ ib_spec += ((union ib_flow_spec *) ib_spec)->size;
+ }
+ if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
+ pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ i, cmd.flow_attr.size);
+ err = -EINVAL;
+ goto err_free;
+ }
+ flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+ if (IS_ERR(flow_id)) {
+ err = PTR_ERR(flow_id);
+ goto err_free;
+ }
+ flow_id->qp = qp;
+ flow_id->uobject = uobj;
+ uobj->object = flow_id;
+
+ err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+ if (err)
+ goto destroy_flow;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.flow_handle = uobj->id;
+
+ err = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+ if (err)
+ goto err_copy;
+
+ put_qp_read(qp);
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rule_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+ kfree(flow_attr);
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return 0;
+err_copy:
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+destroy_flow:
+ ib_destroy_flow(flow_id);
+err_free:
+ kfree(flow_attr);
+err_put:
+ put_qp_read(qp);
+err_uobj:
+ put_uobj_write(uobj);
+err_free_attr:
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return err;
+}
+
+int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_destroy_flow cmd;
+ struct ib_flow *flow_id;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ flow_id = uobj->object;
+
+ ret = ib_destroy_flow(flow_id);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return ret;
+}
+
+static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+ struct ib_uverbs_create_xsrq *cmd,
+ struct ib_udata *udata)
{
- struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
- struct ib_uevent_object *obj;
+ struct ib_usrq_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
-
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
- down_write(&obj->uobject.mutex);
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
+
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err;
+ }
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+
+ attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ if (!attr.ext.xrc.cq) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+ }
+
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
- goto err;
+ goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
attr.srq_context = file;
- attr.attr.max_wr = cmd.max_wr;
- attr.attr.max_sge = cmd.max_sge;
- attr.attr.srq_limit = cmd.srq_limit;
+ attr.srq_type = cmd->srq_type;
+ attr.attr.max_wr = cmd->max_wr;
+ attr.attr.max_sge = cmd->max_sge;
+ attr.attr.srq_limit = cmd->srq_limit;
- obj->events_reported = 0;
- INIT_LIST_HEAD(&obj->event_list);
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
- srq = pd->device->create_srq(pd, &attr, &udata);
+ srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
goto err_put;
}
- srq->device = pd->device;
- srq->pd = pd;
- srq->uobject = &obj->uobject;
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->srq_type = cmd->srq_type;
+ srq->uobject = &obj->uevent.uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
+
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ srq->ext.xrc.cq = attr.ext.xrc.cq;
+ srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
+ atomic_inc(&attr.ext.xrc.cq->usecnt);
+ atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+ }
+
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
- obj->uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ obj->uevent.uobject.object = srq;
+ ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
if (ret)
goto err_destroy;
memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uobject.id;
+ resp.srq_handle = obj->uevent.uobject.id;
resp.max_wr = attr.attr.max_wr;
resp.max_sge = attr.attr.max_sge;
+ if (cmd->srq_type == IB_SRQT_XRC)
+ resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ if (copy_to_user((void __user *) (unsigned long) cmd->response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ put_uobj_read(xrcd_uobj);
+ put_cq_read(attr.ext.xrc.cq);
+ }
put_pd_read(pd);
mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
mutex_unlock(&file->mutex);
- obj->uobject.live = 1;
+ obj->uevent.uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ up_write(&obj->uevent.uobject.mutex);
- return in_len;
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
err_destroy:
ib_destroy_srq(srq);
@@ -2058,11 +2957,81 @@ err_destroy:
err_put:
put_pd_read(pd);
+err_put_cq:
+ if (cmd->srq_type == IB_SRQT_XRC)
+ put_cq_read(attr.ext.xrc.cq);
+
+err_put_xrcd:
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ atomic_dec(&obj->uxrcd->refcnt);
+ put_uobj_read(xrcd_uobj);
+ }
+
err:
- put_uobj_write(&obj->uobject);
+ put_uobj_write(&obj->uevent.uobject);
return ret;
}
+ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_srq cmd;
+ struct ib_uverbs_create_xsrq xcmd;
+ struct ib_uverbs_create_srq_resp resp;
+ struct ib_udata udata;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ xcmd.response = cmd.response;
+ xcmd.user_handle = cmd.user_handle;
+ xcmd.srq_type = IB_SRQT_BASIC;
+ xcmd.pd_handle = cmd.pd_handle;
+ xcmd.max_wr = cmd.max_wr;
+ xcmd.max_sge = cmd.max_sge;
+ xcmd.srq_limit = cmd.srq_limit;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ ret = __uverbs_create_xsrq(file, &xcmd, &udata);
+ if (ret)
+ return ret;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_create_xsrq cmd;
+ struct ib_uverbs_create_srq_resp resp;
+ struct ib_udata udata;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ ret = __uverbs_create_xsrq(file, &cmd, &udata);
+ if (ret)
+ return ret;
+
+ return in_len;
+}
+
ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -2143,6 +3112,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
+ struct ib_usrq_object *us;
+ enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2152,6 +3123,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
return -EINVAL;
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
+ srq_type = srq->srq_type;
ret = ib_destroy_srq(srq);
if (!ret)
@@ -2162,6 +3134,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ if (srq_type == IB_SRQT_XRC) {
+ us = container_of(obj, struct ib_usrq_object, uevent);
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
mutex_lock(&file->mutex);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ec83e9fe387..08219fb3338 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -72,6 +72,8 @@ DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
+DEFINE_IDR(ib_uverbs_xrcd_idr);
+DEFINE_IDR(ib_uverbs_rule_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -86,6 +88,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
[IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
+ [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
[IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
[IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
@@ -107,6 +111,17 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
[IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
+ [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
+ [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
+ [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
+};
+
+static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw) = {
+ [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
+ [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -196,14 +211,35 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uobj);
}
+ /* Remove MWs before QPs, in order to support type 2A MWs. */
+ list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
+ struct ib_mw *mw = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+ ib_dealloc_mw(mw);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
+ struct ib_flow *flow_id = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+ ib_destroy_flow(flow_id);
+ kfree(uobj);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = uobj->object;
struct ib_uqp_object *uqp =
container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
+ if (qp != qp->real_qp) {
+ ib_close_qp(qp);
+ } else {
+ ib_uverbs_detach_umcast(qp, uqp);
+ ib_destroy_qp(qp);
+ }
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
@@ -231,8 +267,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uevent);
}
- /* XXX Free MWs */
-
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
@@ -241,6 +275,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uobj);
}
+ mutex_lock(&file->device->xrcd_tree_mutex);
+ list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
+ struct ib_xrcd *xrcd = uobj->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobj, struct ib_uxrcd_object, uobject);
+
+ idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+ ib_uverbs_dealloc_xrcd(file->device, xrcd);
+ kfree(uxrcd);
+ }
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
struct ib_pd *pd = uobj->object;
@@ -520,16 +566,15 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
struct ib_uverbs_event_file *ev_file = NULL;
- struct file *filp;
+ struct fd f = fdget(fd);
- filp = fget(fd);
- if (!filp)
+ if (!f.file)
return NULL;
- if (filp->f_op != &uverbs_event_fops)
+ if (f.file->f_op != &uverbs_event_fops)
goto out;
- ev_file = filp->private_data;
+ ev_file = f.file->private_data;
if (ev_file->is_async) {
ev_file = NULL;
goto out;
@@ -538,7 +583,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
kref_get(&ev_file->ref);
out:
- fput(filp);
+ fdput(f);
return ev_file;
}
@@ -547,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_cmd_hdr hdr;
+ __u32 flags;
if (count < sizeof hdr)
return -EINVAL;
@@ -554,23 +600,110 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
- if (hdr.in_words * 4 != count)
- return -EINVAL;
+ flags = (hdr.command &
+ IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
- if (hdr.command < 0 ||
- hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[hdr.command])
- return -EINVAL;
+ if (!flags) {
+ __u32 command;
- if (!file->ucontext &&
- hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
- return -EINVAL;
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ !uverbs_cmd_table[command])
+ return -EINVAL;
+
+ if (!file->ucontext &&
+ command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ return -EINVAL;
+
+ if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ return uverbs_cmd_table[command](file,
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
- if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
- return -ENOSYS;
+ } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ __u32 command;
- return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
- hdr.in_words * 4, hdr.out_words * 4);
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ int err;
+ size_t written_count = count;
+
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
+ !uverbs_ex_cmd_table[command])
+ return -ENOSYS;
+
+ if (!file->ucontext)
+ return -EINVAL;
+
+ if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+
+ count -= sizeof(hdr) + sizeof(ex_hdr);
+ buf += sizeof(hdr) + sizeof(ex_hdr);
+
+ if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr.cmd_hdr_reserved)
+ return -EINVAL;
+
+ if (ex_hdr.response) {
+ if (!hdr.out_words && !ex_hdr.provider_out_words)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *) (unsigned long) ex_hdr.response,
+ (hdr.out_words + ex_hdr.provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr.out_words || ex_hdr.provider_out_words)
+ return -EINVAL;
+ }
+
+ INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
+ hdr.in_words * 8, hdr.out_words * 8);
+
+ INIT_UDATA_BUF_OR_NULL(&uhw,
+ buf + ucore.inlen,
+ (unsigned long) ex_hdr.response + ucore.outlen,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
+ err = uverbs_ex_cmd_table[command](file,
+ &ucore,
+ &uhw);
+
+ if (err)
+ return err;
+
+ return written_count;
+ }
+
+ return -ENOSYS;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -741,6 +874,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
kref_init(&uverbs_dev->ref);
init_completion(&uverbs_dev->comp);
+ uverbs_dev->xrcd_tree = RB_ROOT;
+ mutex_init(&uverbs_dev->xrcd_tree_mutex);
spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -824,6 +959,13 @@ static void ib_uverbs_remove_one(struct ib_device *device)
kfree(uverbs_dev);
}
+static char *uverbs_devnode(struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
static int __init ib_uverbs_init(void)
{
int ret;
@@ -842,6 +984,8 @@ static int __init ib_uverbs_init(void)
goto out_chrdev;
}
+ uverbs_class->devnode = uverbs_devnode;
+
ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 5440da0e59b..e7bee46868d 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/export.h>
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
@@ -40,18 +41,21 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
dst->grh.sgid_index = src->grh.sgid_index;
dst->grh.hop_limit = src->grh.hop_limit;
dst->grh.traffic_class = src->grh.traffic_class;
+ memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
dst->dlid = src->dlid;
dst->sl = src->sl;
dst->src_path_bits = src->src_path_bits;
dst->static_rate = src->static_rate;
dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
dst->port_num = src->port_num;
+ dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
+ dst->qp_state = src->qp_state;
dst->cur_qp_state = src->cur_qp_state;
dst->path_mtu = src->path_mtu;
dst->path_mig_state = src->path_mig_state;
@@ -83,6 +87,7 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
dst->rnr_retry = src->rnr_retry;
dst->alt_port_num = src->alt_port_num;
dst->alt_timeout = src->alt_timeout;
+ memset(dst->reserved, 0, sizeof(dst->reserved));
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index af7a8b08b2e..c2b89cc5dbc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -38,12 +38,17 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <linux/export.h>
#include <linux/string.h>
+#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
-int ib_rate_to_mult(enum ib_rate rate)
+#include "core_priv.h"
+
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 1;
@@ -60,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)
}
EXPORT_SYMBOL(ib_rate_to_mult);
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
case 1: return IB_RATE_2_5_GBPS;
@@ -77,7 +82,32 @@ enum ib_rate mult_to_ib_rate(int mult)
}
EXPORT_SYMBOL(mult_to_ib_rate);
-enum rdma_transport_type
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
+{
+ switch (rate) {
+ case IB_RATE_2_5_GBPS: return 2500;
+ case IB_RATE_5_GBPS: return 5000;
+ case IB_RATE_10_GBPS: return 10000;
+ case IB_RATE_20_GBPS: return 20000;
+ case IB_RATE_30_GBPS: return 30000;
+ case IB_RATE_40_GBPS: return 40000;
+ case IB_RATE_60_GBPS: return 60000;
+ case IB_RATE_80_GBPS: return 80000;
+ case IB_RATE_120_GBPS: return 120000;
+ case IB_RATE_14_GBPS: return 14062;
+ case IB_RATE_56_GBPS: return 56250;
+ case IB_RATE_112_GBPS: return 112500;
+ case IB_RATE_168_GBPS: return 168750;
+ case IB_RATE_25_GBPS: return 25781;
+ case IB_RATE_100_GBPS: return 103125;
+ case IB_RATE_200_GBPS: return 206250;
+ case IB_RATE_300_GBPS: return 309375;
+ default: return -1;
+ }
+}
+EXPORT_SYMBOL(ib_rate_to_mbps);
+
+__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
switch (node_type) {
@@ -87,6 +117,10 @@ rdma_node_get_transport(enum rdma_node_type node_type)
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
+ case RDMA_NODE_USNIC:
+ return RDMA_TRANSPORT_USNIC;
+ case RDMA_NODE_USNIC_UDP:
+ return RDMA_TRANSPORT_USNIC_UDP;
default:
BUG();
return 0;
@@ -103,6 +137,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
case RDMA_TRANSPORT_IB:
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_USNIC:
+ case RDMA_TRANSPORT_USNIC_UDP:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
@@ -162,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
u32 flow_class;
u16 gid_index;
int ret;
+ int is_eth = (rdma_port_get_link_layer(device, port_num) ==
+ IB_LINK_LAYER_ETHERNET);
memset(ah_attr, 0, sizeof *ah_attr);
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH))
+ return -EPROTOTYPE;
+
+ if (wc->wc_flags & IB_WC_WITH_SMAC &&
+ wc->wc_flags & IB_WC_WITH_VLAN) {
+ memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ ah_attr->vlan_id = wc->vlan_id;
+ } else {
+ ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
+ ah_attr->dmac, &ah_attr->vlan_id);
+ if (ret)
+ return ret;
+ }
+ } else {
+ ah_attr->vlan_id = 0xffff;
+ }
+
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
@@ -250,6 +306,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
+ srq->srq_type = srq_init_attr->srq_type;
+ if (srq->srq_type == IB_SRQT_XRC) {
+ srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
+ srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
+ atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+ atomic_inc(&srq->ext.xrc.cq->usecnt);
+ }
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
@@ -279,16 +342,29 @@ EXPORT_SYMBOL(ib_query_srq);
int ib_destroy_srq(struct ib_srq *srq)
{
struct ib_pd *pd;
+ enum ib_srq_type srq_type;
+ struct ib_xrcd *uninitialized_var(xrcd);
+ struct ib_cq *uninitialized_var(cq);
int ret;
if (atomic_read(&srq->usecnt))
return -EBUSY;
pd = srq->pd;
+ srq_type = srq->srq_type;
+ if (srq_type == IB_SRQT_XRC) {
+ xrcd = srq->ext.xrc.xrcd;
+ cq = srq->ext.xrc.cq;
+ }
ret = srq->device->destroy_srq(srq);
- if (!ret)
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (srq_type == IB_SRQT_XRC) {
+ atomic_dec(&xrcd->usecnt);
+ atomic_dec(&cq->usecnt);
+ }
+ }
return ret;
}
@@ -296,28 +372,127 @@ EXPORT_SYMBOL(ib_destroy_srq);
/* Queue pairs */
+static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
+{
+ struct ib_qp *qp = context;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->device->event_handler_lock, flags);
+ list_for_each_entry(event->element.qp, &qp->open_list, open_list)
+ if (event->element.qp->event_handler)
+ event->element.qp->event_handler(event, event->element.qp->qp_context);
+ spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
+}
+
+static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
+{
+ mutex_lock(&xrcd->tgt_qp_mutex);
+ list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
+ mutex_unlock(&xrcd->tgt_qp_mutex);
+}
+
+static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
+ void (*event_handler)(struct ib_event *, void *),
+ void *qp_context)
+{
+ struct ib_qp *qp;
+ unsigned long flags;
+
+ qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->real_qp = real_qp;
+ atomic_inc(&real_qp->usecnt);
+ qp->device = real_qp->device;
+ qp->event_handler = event_handler;
+ qp->qp_context = qp_context;
+ qp->qp_num = real_qp->qp_num;
+ qp->qp_type = real_qp->qp_type;
+
+ spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ list_add(&qp->open_list, &real_qp->open_list);
+ spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+ return qp;
+}
+
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+ struct ib_qp_open_attr *qp_open_attr)
+{
+ struct ib_qp *qp, *real_qp;
+
+ if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
+ return ERR_PTR(-EINVAL);
+
+ qp = ERR_PTR(-EINVAL);
+ mutex_lock(&xrcd->tgt_qp_mutex);
+ list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
+ if (real_qp->qp_num == qp_open_attr->qp_num) {
+ qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+ qp_open_attr->qp_context);
+ break;
+ }
+ }
+ mutex_unlock(&xrcd->tgt_qp_mutex);
+ return qp;
+}
+EXPORT_SYMBOL(ib_open_qp);
+
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
- struct ib_qp *qp;
+ struct ib_qp *qp, *real_qp;
+ struct ib_device *device;
- qp = pd->device->create_qp(pd, qp_init_attr, NULL);
+ device = pd ? pd->device : qp_init_attr->xrcd->device;
+ qp = device->create_qp(pd, qp_init_attr, NULL);
if (!IS_ERR(qp)) {
- qp->device = pd->device;
- qp->pd = pd;
- qp->send_cq = qp_init_attr->send_cq;
- qp->recv_cq = qp_init_attr->recv_cq;
- qp->srq = qp_init_attr->srq;
- qp->uobject = NULL;
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
- qp->qp_type = qp_init_attr->qp_type;
- atomic_inc(&pd->usecnt);
- atomic_inc(&qp_init_attr->send_cq->usecnt);
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
- if (qp_init_attr->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
+ qp->device = device;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = qp_init_attr->qp_type;
+
+ atomic_set(&qp->usecnt, 0);
+ if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
+ qp->event_handler = __ib_shared_qp_event_handler;
+ qp->qp_context = qp;
+ qp->pd = NULL;
+ qp->send_cq = qp->recv_cq = NULL;
+ qp->srq = NULL;
+ qp->xrcd = qp_init_attr->xrcd;
+ atomic_inc(&qp_init_attr->xrcd->usecnt);
+ INIT_LIST_HEAD(&qp->open_list);
+
+ real_qp = qp;
+ qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
+ qp_init_attr->qp_context);
+ if (!IS_ERR(qp))
+ __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ else
+ real_qp->device->destroy_qp(real_qp);
+ } else {
+ qp->event_handler = qp_init_attr->event_handler;
+ qp->qp_context = qp_init_attr->qp_context;
+ if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
+ qp->recv_cq = NULL;
+ qp->srq = NULL;
+ } else {
+ qp->recv_cq = qp_init_attr->recv_cq;
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ qp->srq = qp_init_attr->srq;
+ if (qp->srq)
+ atomic_inc(&qp_init_attr->srq->usecnt);
+ }
+
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->xrcd = NULL;
+
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
+ }
}
return qp;
@@ -326,8 +501,10 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
- enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETHERTYPE + 1];
- enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETHERTYPE + 1];
+ enum ib_qp_attr_mask req_param[IB_QPT_MAX];
+ enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
+ enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
+ enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -337,12 +514,19 @@ static const struct {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_PORT,
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -365,6 +549,12 @@ static const struct {
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -384,6 +574,22 @@ static const struct {
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_XRC_INI] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN),
+ [IB_QPT_XRC_TGT] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER),
+ },
+ .req_param_add_eth = {
+ [IB_QPT_RC] = (IB_QP_SMAC),
+ [IB_QPT_UC] = (IB_QP_SMAC),
+ [IB_QPT_XRC_INI] = (IB_QP_SMAC),
+ [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
},
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
@@ -394,11 +600,31 @@ static const struct {
[IB_QPT_RC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
+ [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
- }
+ },
+ .opt_param_add_eth = {
+ [IB_QPT_RC] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_UC] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID)
+ }
}
},
[IB_QPS_RTR] = {
@@ -414,6 +640,13 @@ static const struct {
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_SQ_PSN |
+ IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT |
+ IB_QP_SQ_PSN),
[IB_QPT_SMI] = IB_QP_SQ_PSN,
[IB_QPT_GSI] = IB_QP_SQ_PSN,
},
@@ -429,6 +662,15 @@ static const struct {
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -453,6 +695,15 @@ static const struct {
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE |
+ IB_QP_MIN_RNR_TIMER),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -465,6 +716,8 @@ static const struct {
[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
}
@@ -487,6 +740,15 @@ static const struct {
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -515,6 +777,25 @@ static const struct {
IB_QP_PKEY_INDEX |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_INI] = (IB_QP_PORT |
+ IB_QP_AV |
+ IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_PORT |
+ IB_QP_AV |
+ IB_QP_TIMEOUT |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -546,7 +827,8 @@ static const struct {
};
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask)
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
@@ -565,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ req_param |= qp_state_table[cur_state][next_state].
+ req_param_add_eth[type];
+ opt_param |= qp_state_table[cur_state][next_state].
+ opt_param_add_eth[type];
+ }
+
if ((mask & req_param) != req_param)
return 0;
@@ -575,11 +864,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
+int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+ int ret = 0;
+ union ib_gid sgid;
+
+ if ((*qp_attr_mask & IB_QP_AV) &&
+ (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) {
+ ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
+ qp_attr->ah_attr.grh.sgid_index, &sgid);
+ if (ret)
+ goto out;
+ if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
+ rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
+ qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+ } else {
+ ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
+ if (ret)
+ goto out;
+ ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
+ if (ret)
+ goto out;
+ }
+ *qp_attr_mask |= IB_QP_SMAC;
+ if (qp_attr->vlan_id < 0xFFFF)
+ *qp_attr_mask |= IB_QP_VID;
+ }
+out:
+ return ret;
+}
+EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+
+
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
+ int ret;
+
+ ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
+
+ return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -589,11 +919,59 @@ int ib_query_qp(struct ib_qp *qp,
struct ib_qp_init_attr *qp_init_attr)
{
return qp->device->query_qp ?
- qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+ qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_query_qp);
+int ib_close_qp(struct ib_qp *qp)
+{
+ struct ib_qp *real_qp;
+ unsigned long flags;
+
+ real_qp = qp->real_qp;
+ if (real_qp == qp)
+ return -EINVAL;
+
+ spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ list_del(&qp->open_list);
+ spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+ atomic_dec(&real_qp->usecnt);
+ kfree(qp);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_close_qp);
+
+static int __ib_destroy_shared_qp(struct ib_qp *qp)
+{
+ struct ib_xrcd *xrcd;
+ struct ib_qp *real_qp;
+ int ret;
+
+ real_qp = qp->real_qp;
+ xrcd = real_qp->xrcd;
+
+ mutex_lock(&xrcd->tgt_qp_mutex);
+ ib_close_qp(qp);
+ if (atomic_read(&real_qp->usecnt) == 0)
+ list_del(&real_qp->xrcd_list);
+ else
+ real_qp = NULL;
+ mutex_unlock(&xrcd->tgt_qp_mutex);
+
+ if (real_qp) {
+ ret = ib_destroy_qp(real_qp);
+ if (!ret)
+ atomic_dec(&xrcd->usecnt);
+ else
+ __ib_insert_xrcd_qp(xrcd, real_qp);
+ }
+
+ return 0;
+}
+
int ib_destroy_qp(struct ib_qp *qp)
{
struct ib_pd *pd;
@@ -601,16 +979,25 @@ int ib_destroy_qp(struct ib_qp *qp)
struct ib_srq *srq;
int ret;
- pd = qp->pd;
- scq = qp->send_cq;
- rcq = qp->recv_cq;
- srq = qp->srq;
+ if (atomic_read(&qp->usecnt))
+ return -EBUSY;
+
+ if (qp->real_qp != qp)
+ return __ib_destroy_shared_qp(qp);
+
+ pd = qp->pd;
+ scq = qp->send_cq;
+ rcq = qp->recv_cq;
+ srq = qp->srq;
ret = qp->device->destroy_qp(qp);
if (!ret) {
- atomic_dec(&pd->usecnt);
- atomic_dec(&scq->usecnt);
- atomic_dec(&rcq->usecnt);
+ if (pd)
+ atomic_dec(&pd->usecnt);
+ if (scq)
+ atomic_dec(&scq->usecnt);
+ if (rcq)
+ atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
}
@@ -671,6 +1058,11 @@ EXPORT_SYMBOL(ib_resize_cq);
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
mr = pd->device->get_dma_mr(pd, mr_access_flags);
@@ -693,6 +1085,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
u64 *iova_start)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);
@@ -723,6 +1120,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
struct ib_pd *old_pd;
int ret;
+ ret = ib_check_mr_access(mr_access_flags);
+ if (ret)
+ return ret;
+
if (!mr->device->rereg_phys_mr)
return -ENOSYS;
@@ -768,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr)
}
EXPORT_SYMBOL(ib_dereg_mr);
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr)
+{
+ struct ib_mr *mr;
+
+ if (!pd->device->create_mr)
+ return ERR_PTR(-ENOSYS);
+
+ mr = pd->device->create_mr(pd, mr_init_attr);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_create_mr);
+
+int ib_destroy_mr(struct ib_mr *mr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ pd = mr->pd;
+ ret = mr->device->destroy_mr(mr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_mr);
+
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
@@ -816,18 +1256,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list);
/* Memory windows */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *mw;
if (!pd->device->alloc_mw)
return ERR_PTR(-ENOSYS);
- mw = pd->device->alloc_mw(pd);
+ mw = pd->device->alloc_mw(pd, type);
if (!IS_ERR(mw)) {
mw->device = pd->device;
mw->pd = pd;
mw->uobject = NULL;
+ mw->type = type;
atomic_inc(&pd->usecnt);
}
@@ -901,22 +1342,106 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
+ int ret;
+
if (!qp->device->attach_mcast)
return -ENOSYS;
if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
return -EINVAL;
- return qp->device->attach_mcast(qp, gid, lid);
+ ret = qp->device->attach_mcast(qp, gid, lid);
+ if (!ret)
+ atomic_inc(&qp->usecnt);
+ return ret;
}
EXPORT_SYMBOL(ib_attach_mcast);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
+ int ret;
+
if (!qp->device->detach_mcast)
return -ENOSYS;
if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
return -EINVAL;
- return qp->device->detach_mcast(qp, gid, lid);
+ ret = qp->device->detach_mcast(qp, gid, lid);
+ if (!ret)
+ atomic_dec(&qp->usecnt);
+ return ret;
}
EXPORT_SYMBOL(ib_detach_mcast);
+
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+{
+ struct ib_xrcd *xrcd;
+
+ if (!device->alloc_xrcd)
+ return ERR_PTR(-ENOSYS);
+
+ xrcd = device->alloc_xrcd(device, NULL, NULL);
+ if (!IS_ERR(xrcd)) {
+ xrcd->device = device;
+ xrcd->inode = NULL;
+ atomic_set(&xrcd->usecnt, 0);
+ mutex_init(&xrcd->tgt_qp_mutex);
+ INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+ }
+
+ return xrcd;
+}
+EXPORT_SYMBOL(ib_alloc_xrcd);
+
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ struct ib_qp *qp;
+ int ret;
+
+ if (atomic_read(&xrcd->usecnt))
+ return -EBUSY;
+
+ while (!list_empty(&xrcd->tgt_qp_list)) {
+ qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
+ ret = ib_destroy_qp(qp);
+ if (ret)
+ return ret;
+ }
+
+ return xrcd->device->dealloc_xrcd(xrcd);
+}
+EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct ib_flow *flow_id;
+ if (!qp->device->create_flow)
+ return ERR_PTR(-ENOSYS);
+
+ flow_id = qp->device->create_flow(qp, flow_attr, domain);
+ if (!IS_ERR(flow_id))
+ atomic_inc(&qp->usecnt);
+ return flow_id;
+}
+EXPORT_SYMBOL(ib_create_flow);
+
+int ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err;
+ struct ib_qp *qp = flow_id->qp;
+
+ err = qp->device->destroy_flow(flow_id);
+ if (!err)
+ atomic_dec(&qp->usecnt);
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_flow);
+
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status)
+{
+ return mr->device->check_mr_status ?
+ mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_check_mr_status);